# Function Demonstrations

In [69]:
include("./RG_functions.jl")

train_neural_network

### Logistic Regression

In [91]:
# Create data
X = rand(500,2)
y = zeros(500)
pos_ind = []
neg_ind = []
for i=1:size(X,1)
    if X[i,2] > 1.5*X[i,1] - .25
        y[i] = 1
        push!(pos_ind,i)
    else
        y[i] = 0
        push!(neg_ind,i)
    end
end

#class_pos = X[pos_ind,:]
#class_neg = X[neg_ind,:]

#scatter(class_pos[:,1], class_pos[:,2], color="blue", label="Class 1")
#scatter!(class_neg[:,1], class_neg[:,2], color="red", label="Class 2")

#x = LinRange(0,1,100)
f(x) = 1.5*x - .25
#plot!(x, f.(x),xlims=(0,1),ylims=(0,1))

x = LinRange(1/5,4/5,10)
x_noise_class1 = f.(x) + 1/5 * (rand(10) .- 0.5)
x_noise_class2 = f.(x) + 1/5 * (rand(10) .- 0.5)
#scatter!(x, x_noise_class1, color="blue")
#scatter!(x, x_noise_class2, color="red")
Xx = vcat(X,hcat(x,x_noise_class1), hcat(x,x_noise_class2))
X = hcat(ones(size(Xx,1),1),Xx)   # append columns of 1's
y = vcat(y,ones(10),zeros(10));

In [92]:
# Solve
n,p = size(X)
b = zeros(p)
b = solve_LR_coef(X,y,b)
# Report error
prob(x, β) = 1 / (1 + exp(-(dot(x,β) )))

# Prediction function
pred(x,b) = round(prob(x,b))

Number of iterations: 8.


pred (generic function with 2 methods)

In [93]:
misclass = 0
for i = 1:n
    if pred(X[i,:],b) == y[i]
        misclass += 1
    end
end
println("$misclass out of $n training observations misclassified.")

0 out of 520 training observations misclassified.


### Kernel Ridge

In [94]:
X = transpose(BostonHousing.features())
y = transpose(BostonHousing.targets())
n,p = size(X)

(506, 13)

In [95]:
# Radial Basis Function
s = 10
K_rbf(x,z) = exp(-(norm(x-z)^2) / (2*(s^2)))
# Polynomial
c = 1; d = 2
K_poly(x,z) = (c + dot(x,z))^d

λ = 1
α_rbf = kernel_ridge(X,y,λ,K_rbf)
α_poly = kernel_ridge(X,y,λ,K_poly)

# Prediction function
pred_kernel_ridge(x,K,X,α) = sum( K(x,X[i,:]) * α[i] for i = 1:size(X,1))

pred_kernel_ridge (generic function with 1 method)

In [96]:
# Mean Squared Error
y_preds_rbf = zeros(n)
y_preds_poly = zeros(n)
for i = 1:n
    y_preds_rbf[i] = pred_kernel_ridge(X[i,:],K_rbf,X,α_rbf)
    y_preds_poly[i] = pred_kernel_ridge(X[i,:],K_poly,X,α_poly)
end
println("Mean square error with RBF kernel: $(Statistics.mean( (y_preds_rbf .- y).^2))")
println("Mean square error with polynomial kernel: $(Statistics.mean( (y_preds_poly .- y).^2))")

Mean square error with RBF kernel: 48.58030241248382
Mean square error with polynomial kernel: 6.363275739611623


### Proximal Gradient Descent

In [97]:
# Load data
X = transpose(BostonHousing.features())
y = transpose(BostonHousing.targets())
n,p = size(X)
# Center y and estimate β_0
y_centered = y .- Statistics.mean(y)
β_0 = Statistics.mean(y)
# Create matrix of centered X columns
X_centered = zeros(n,p)
for j in 1:p
    X_centered[:,j] = X[:,j] .- Statistics.mean(X[:,j])
end

In [98]:
# Initialize β
β_init = zeros(size(X_centered,2))
λ = 10000

β = prox_grad_desc(X_centered, y_centered, β_init, λ)
println("$β")

# Prediction function
pred(X,β,β_0) = β_0 .+ X * β

Number of iterations: 1000.
Max iterations reached.
Real[0; 0.0381765577924749; 0; 0; 0; 0; 0; 0; 0; -0.016327374889328842; 0; 0.010874369828524572; -0.1857394401823319]


pred (generic function with 2 methods)

In [99]:
y_pred = pred(X,β,β_0)

MSE = Statistics.mean((y - y_pred).^2)
println("Mean squared error: $MSE")

Mean squared error: 74.18408330593503


### Elastic Net

In [100]:
# Load data
X = transpose(BostonHousing.features())
y = transpose(BostonHousing.targets())
n,p = size(X)
# Center y and estimate β_0
y_centered = y .- Statistics.mean(y)
β_0 = Statistics.mean(y)
# Create matrix of centered X columns
X_centered = zeros(n,p)
for j in 1:p
    X_centered[:,j] = X[:,j] .- Statistics.mean(X[:,j])
end

In [101]:
# Initialize β
β_init = zeros(size(X_centered,2))
λ = 10000
α = 0.1   # 1 means lasso, 0 means ridge

β = elastic_net(X_centered, y_centered, β_init, λ, α)
println("$β")

# Prediction function
pred(X,β,β_0) = β_0 .+ X * β

Number of iterations: 1000.
Max iterations reached.
Real[-0.041753603120333466; 0.05502475975765802; -0.019075219745038403; 0; 0; 0.015521427968780603; 0.0007620910771608188; -0.009899700036271004; 0.03733115888685481; -0.012845543597233947; -0.04954615426492346; 0.010518427530508552; -0.32525422243992397]


pred (generic function with 2 methods)

In [102]:
y_pred = pred(X,β,β_0)

MSE = Statistics.mean((y - y_pred).^2)
println("Mean squared error: $MSE")

Mean squared error: 78.37089939829339


### (Linear) Support Vector Machines

In [104]:
# Create linear boundary data
X = rand(500,2)
y = zeros(500)
pos_ind = []
neg_ind = []
for i=1:size(X,1)
    if X[i,2] > 1.5*X[i,1] - .25
        y[i] = 1
        push!(pos_ind,i)
    else
        y[i] = -1
        push!(neg_ind,i)
    end
end

#class_pos = X[pos_ind,:]
#class_neg = X[neg_ind,:]

#scatter(class_pos[:,1], class_pos[:,2], color="blue", label="Class 1")
#scatter!(class_neg[:,1], class_neg[:,2], color="red", label="Class 2")

#x = LinRange(0,1,100)
f(x) = 1.5*x - .25
#plot!(x, f.(x),xlims=(0,1),ylims=(0,1))

x = LinRange(1/5,4/5,10)
x_noise_class1 = f.(x) + 1/5 * (rand(10) .- 0.5)
x_noise_class2 = f.(x) + 1/5 * (rand(10) .- 0.5)
#scatter!(x, x_noise_class1, color="blue")
#scatter!(x, x_noise_class2, color="red")
Xx = vcat(X,hcat(x,x_noise_class1), hcat(x,x_noise_class2))
X = hcat(ones(size(Xx,1),1),Xx)   # append columns of 1's
y = vcat(y,ones(10),-ones(10));


In [105]:
# Set parameter
C = 40
# Solve
(β_0, β, ξ) = SVM(X,y,C)

# Prediction function
SVM_classifier(x,β_0,β) = sign(x'*β + β_0)

Academic license - for non-commercial use only - expires 2022-08-24
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 521 rows, 524 columns and 3120 nonzeros
Model fingerprint: 0x345c63d8
Model has 3 quadratic objective terms
Coefficient statistics:
  Matrix range     [1e-03, 1e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e+00, 2e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 4e+01]
Presolve time: 0.00s
Presolved: 521 rows, 524 columns, 3120 nonzeros
Presolved model has 3 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 Dense cols : 4
 Free vars  : 4
 AA' NZ     : 2.600e+03
 Factor NZ  : 3.513e+03
 Factor Ops : 3.024e+04 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   7.

SVM_classifier (generic function with 1 method)

In [106]:
# Report error
misclass = 0
for i = 1:n
    if SVM_classifier(X[i,:], β_0, β) != y[i]
        misclass += 1
    end
end
println("$misclass out of $n training observations misclassified.")

3 out of 506 training observations misclassified.


### Kernel Support Vector Machines

In [109]:
# Quadratic Boundary Data
X = rand(500,2)
y = zeros(500)
pos_ind = []
neg_ind = []
for i=1:size(X,1)
    if X[i,2] > -(X[i,1]+.5)*(X[i,1]-1)
        y[i] = 1
        push!(pos_ind,i)
    else
        y[i] = -1
        push!(neg_ind,i)
    end
end

#class_pos = X[pos_ind,:]
#class_neg = X[neg_ind,:]

#scatter(class_pos[:,1], class_pos[:,2], color="blue", label="Class 1")
#scatter!(class_neg[:,1], class_neg[:,2], color="red", label="Class 2")

#x = LinRange(0,1,100)
f(x) = -(x+0.5)*(x-1)
#plot!(x, f.(x),xlims=(0,1),ylims=(0,1))

x = LinRange(0,1,10)
x_noise_class1 = f.(x) + 1/5 * (rand(10) .- 0.5)
x_noise_class2 = f.(x) + 1/5 * (rand(10) .- 0.5)
#scatter!(x, x_noise_class1, color="blue")
#scatter!(x, x_noise_class2, color="red")
X = vcat(X,hcat(x,x_noise_class1), hcat(x,x_noise_class2))
y = vcat(y,ones(10),-ones(10));

In [114]:
# Radial Basis Function
s = 100
K_rbf(x,z) = exp(-(norm(x-z)^2) / (2*(s^2)))
# Polynomial
c = 0; d = 2
K_poly(x,z) = (c + dot(x,z))^d

C = 10
α_rbf = kernel_SVM(X,y,C,K_rbf)
α_poly = kernel_SVM(X,y,C,K_poly)

# find index of max α component, let that be k 
k_rbf = argmax(α_rbf)
k_poly = argmax(α_poly)

b_rbf = y[k_rbf] - sum( α_rbf[i]*y[i]*K_rbf(X[k_rbf,:],X[i,:]) for i = 1:n)
b_poly = y[k_poly] - sum( α_poly[i]*y[i]*K_poly(X[k_poly,:],X[i,:]) for i = 1:n)

# Prediction function
kernel_SVM_classifier(x,K,X,α,b) = sign(sum( α[i]*y[i]*K(x,X[i,:]) for i = 1:size(X,1)) + b)

Academic license - for non-commercial use only - expires 2022-08-24
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 1 rows, 520 columns and 520 nonzeros
Model fingerprint: 0xc0bf3455
Model has 135460 quadratic objective terms
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  QObjective range [1e+00, 2e+00]
  Bounds range     [1e+01, 1e+01]
  RHS range        [0e+00, 0e+00]
Presolve time: 0.01s
Presolved: 1 rows, 520 columns, 520 nonzeros
Presolved model has 135460 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 Free vars  : 6
 AA' NZ     : 2.100e+01
 Factor NZ  : 2.800e+01
 Factor Ops : 1.400e+02 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   2.08086560e+06 

kernel_SVM_classifier (generic function with 1 method)

In [115]:
misclass_rbf = 0
misclass_poly = 0
for i = 1:n
    if kernel_SVM_classifier(X[i,:], K_rbf,X, α_rbf, b_rbf) != y[i]
        misclass_rbf += 1
    end
    if kernel_SVM_classifier(X[i,:], K_poly,X, α_poly, b_poly) != y[i]
        misclass_poly += 1
    end
end
println("RBF kernel: $misclass_rbf out of $n training observations misclassified.")
println("Polynomail kernel: $misclass_poly out of $n training observations misclassified.")

RBF kernel: 318 out of 506 training observations misclassified.
Polynomail kernel: 12 out of 506 training observations misclassified.


### Bagging Procedure

In [None]:
X = [ones(5)'; 2*ones(5)'; 3*ones(5)'; 4*ones(5)']
y = [1; 2; 3; 4]
b_samples, ind_used, ind_not_used = bootstrapper(X,y)
X_b, y_b = b_samples[1]
X_b
y_b
ind_used[1]
ind_not_used[1]

### Neural Network Classification

In [None]:
# Classication

# load partial training set
tr_size = 5000
train_x, train_y = MNIST.traindata(1:tr_size)
# load partial test set
te_size = 200
test_x,  test_y  = MNIST.testdata(1:te_size)

X = zeros(tr_size,784)
for i = 1:tr_size
    X[i,:] = reshape(train_x[:,:,i],1,784)
end
y = train_y[1:tr_size]

X_test = zeros(te_size,784)
for i = 1:te_size
    X_test[i,:] = reshape(test_x[:,:,i],1,784)
end
y_test = test_y

In [None]:
num_hidden_layers = 5
size_hidden_layers = 100
misclass = train_neural_network(X,y,num_hidden_layers,size_hidden_layers,activation="sigmoid",problem_type="classification",num_classes=10,num_epochs=2)

### Neural Network Regression

In [None]:
# Regression
X = transpose(BostonHousing.features())
y = transpose(BostonHousing.targets())

In [None]:
num_hidden_layers = 5
size_hidden_layers = 100
ave_loss = train_neural_network(X,y,num_hidden_layers,size_hidden_layers,activation="ReLU",problem_type="regression",num_epochs=5)