# References:

### https://github.com/homefish/edX_Learning_From_Data_2017/blob/master/homework_1/homework_1_PLA.ipynb
### https://github.com/homefish/edX_Learning_From_Data_2017/blob/master/homework_2/homework_2_problem_5_6_linear_regression.ipynb
### https://github.com/homefish/edX_Learning_From_Data_2017/blob/master/homework_2/homework_2_problem_8_9_10_Nonlinear_Transformation.ipynb

In [13]:
import numpy as np
import matplotlib.pyplot as plt

In [14]:
def random_points(lower_bound, upper_bound, dimension):
    return np.random.uniform(lower_bound, upper_bound, size = dimension)

def target_function(lower_bound, upper_bound, dimension):
    # choose two random points A and B that belong to domain X
    A = random_points(lower_bound, upper_bound, dimension)
    B = random_points(lower_bound, upper_bound, dimension)
    
    # a line passing through 2 points A and B can be described by y = m*x + b where m is the slope
    # where m = y - b / x
    # and b = y - m*x
    m = (B[1] - A[1]) / (B[0] - A[0])
    b = B[1] - m * B[0]  
    return np.array([b, m, -1])

In [15]:
#Parameters initialization

#Target function f(x) parameters
dimension = 2
lower_bound = -1
upper_bound = 1

#Experiment parameters
runs = 1000
training_points = 10

#Support variables/counters
iterations_total = 0
ratio_mismatch_total = 0

In [22]:
#Loop of <runs> experiments
for run in range(runs):
    #"In each run choose a random line in the plane as your target function f (...)"
    f = target_function(lower_bound, upper_bound, dimension)
    print("Target function f(x) = ", f[1], "x +", f[0])
    
    #"(...) Choose the inputs x of the data set as random points uniformly in X and evaluate the target function on each x to get the corresponding           #output y (...)"
    X = np.transpose(np.array([np.ones(training_points), random_points(lower_bound, upper_bound, training_points), random_points(lower_bound, upper_bound, training_points)]))
    print(X)
    
    #np.sign is used to map one side of the line to -1 and the other to +1, hence giving the corresponding output y to an input x
    #np.dot is used to 
    y = np.sign(np.dot(X, f))

    #"(...) Start the PLA with the weight vector being all zeros (...) so all points are initially misclassified points. (...)"
    h = np.zeros(3)

    iterations_count = 0
    
    #Start the Perceptron Learning Algorithm (PLA)
    while True:
        y_h = np.sign(np.dot(X, h))       # classification by hypothesis
        comp = (y_h != y)                 # compare classification with actual data from target function
        wrong = np.where(comp)[0]           # indices of points with wrong classification by hypothesis h

        if wrong.size == 0:
            break
        
        rnd_choice = np.random.choice(wrong)        # pick a random misclassified point

        # update weight vector (new hypothesis):
        h = h +  y[rnd_choice] * np.transpose(X[rnd_choice])
        iterations_count += 1

    iterations_total += iterations_count
    
    #Out-of-sample Data

    # Calculate error
    # Create data "outside" of training data

    N_outside = 1000
    test_x0 = np.random.uniform(-1,1,N_outside)
    test_x1 = np.random.uniform(-1,1,N_outside)

    X = np.array([np.ones(N_outside), test_x0, test_x1]).T

    y_target = np.sign(X.dot(f))
    y_hypothesis = np.sign(X.dot(h))
    
    ratio_mismatch = ((y_target != y_hypothesis).sum()) / N_outside
    ratio_mismatch_total += ratio_mismatch

0.94819557 -0.61699252]
 [ 1.         -0.39850297  0.475566  ]
 [ 1.          0.97943618 -0.98002365]
 [ 1.          0.39821033  0.49961137]
 [ 1.          0.02823206 -0.0106774 ]]
Target function f(x) =  -0.8358566521216493 x + 0.162055362679908
[[ 1.          0.43432334  0.56287124]
 [ 1.          0.59052139  0.64756499]
 [ 1.          0.85209989  0.02623911]
 [ 1.          0.84482204 -0.25808625]
 [ 1.         -0.73152846 -0.12701026]
 [ 1.          0.61165844 -0.65049902]
 [ 1.         -0.14233098 -0.67255778]
 [ 1.          0.51395555  0.15301493]
 [ 1.          0.89780635 -0.38329929]
 [ 1.         -0.13333057 -0.87435516]]
Target function f(x) =  -3.605836079400278 x + 0.5578144397297238
[[ 1.         -0.97408852  0.83027684]
 [ 1.         -0.84939962 -0.10169081]
 [ 1.         -0.02967128 -0.78019123]
 [ 1.          0.98053246  0.93634729]
 [ 1.         -0.78570492  0.88195762]
 [ 1.         -0.01295448 -0.63476776]
 [ 1.         -0.01984862  0.28350929]
 [ 1.          0.661499

In [None]:
print("Size of training data: N = ", N, "points")
    
iterations_avg = iterations_total / RUNS
print("\nAverage number of PLA iterations over", RUNS, "runs: t_avg = ", iterations_avg)

ratio_mismatch_avg = ratio_mismatch_total / RUNS
print("\nAverage ratio for the mismatch between f(x) and h(x) outside of the training data:")
print("P(f(x)!=h(x)) = ", ratio_mismatch_avg)

In [None]:
# Pick N data points (x, y) uniformly from the box [-1,1] x [-1,1]
N = 100
X = np.transpose(np.array([np.ones(N), rnd(N), rnd(N)]))           # input

# Classify these points
y_f = np.sign(np.dot(X, w_f))


# plot points and color them according to their classification
plt.plot(X[:,1][y_f == 1], X[:,2][y_f == 1], 'ro')
plt.plot(X[:,1][y_f == -1], X[:,2][y_f == -1], 'bo')


# plot line
# create some data points on the line (for the plot) using the parametric vector form of a line
# line(t) = A + t * d,  where A is a point on the line, d the directional vector and t the parameter
d = B - A
line_x = [A[0] + t * d[0] for t in range(-10,10)]
line_y = [A[1] + t * d[1] for t in range(-10,10)]
plt.plot(line_x, line_y)

# plot the two points that define the line
plt.plot(A[0], A[1], 'go')            
plt.plot(B[0], B[1], 'go')


# set the ranges for the x and y axis to display the [-1,1] x [-1,1] box
plt.ylim(-1,1)
plt.xlim(-1,1)
plt.show()


In [None]:
# LINEAR REGRESSION
X_dagger = np.dot(np.linalg.inv(np.dot(X.T, X)), X.T)
w_lr = np.dot(X_dagger, y_f)


# plot classification according to w found by linear regression
# it shows that some of the points are missclassified
y_lr = np.sign(np.dot(X, w_lr))
print("check dimensions of y_lr: ", y_lr.shape)

# plot points and color them according to their classification
plt.plot(X[:,1][y_lr == 1], X[:,2][y_lr == 1], 'ro')
plt.plot(X[:,1][y_lr == -1], X[:,2][y_lr == -1], 'bo')

# plot the correct classification line (target function)
plt.plot(line_x, line_y, 'g')
plt.ylim(-1,1)
plt.xlim(-1,1)

plt.show()

In [None]:
#Problems 5 and 6
# START actual HOMEWORK now
import numpy as np
import matplotlib.pyplot as plt

def rnd(n): 
    return np.random.uniform(-1, 1, size = n)


# repeat the experiment 1000 times
RUNS = 1000
N_sample = 100
E_in_total = 0
E_out_total = 0
N_test = 1000

for run in range(RUNS):
    # choose two random points A, B in [-1,1] x [-1,1]
    A = rnd(2)
    B = rnd(2)

    # the line can be described by y = m*x + b where m is the slope
    m = (B[1] - A[1]) / (B[0] - A[0])
    b = B[1] - m * B[0]  
    w_f = np.array([b, m, -1])

    #-----------------------

    # Create N data points (x, y) from the target function
    X = np.transpose(np.array([np.ones(N_sample), rnd(N_sample), rnd(N_sample)]))           # input
    y_f = np.sign(np.dot(X, w_f))
    
    #-----------------------
    
    # LINEAR REGRESSION
    X_dagger = np.dot(np.linalg.inv(np.dot(X.T, X)), X.T)
    w_lr = np.dot(X_dagger, y_f)
    
    # classification according to w found by linear regression
    y_lr = np.sign(np.dot(X, w_lr))
    
    #------------------------
    
    # Error E_in
    E_in = sum(y_lr != y_f) / N_sample
    E_in_total += E_in

    #------------------------
    # Problem 6: Take 1000 test points (out of sample points) and count disagreement
    # between y_f_test and y_lr_test
    X_test = np.transpose(np.array([np.ones(N_test), rnd(N_test), rnd(N_test)]))
    y_f_test = np.sign(np.dot(X_test, w_f))
    y_lr_test = np.sign(np.dot(X_test, w_lr))
    
    E_out = sum(y_lr_test != y_f_test) / N_test
    E_out_total += E_out
    
    
# Average of E_in over RUNS
E_in_avg = E_in_total / RUNS
print("Average of E_in over", RUNS, " runs:", E_in_avg)

# Average of E_out over RUNS
E_out_avg = E_out_total / RUNS
print("Average of E_out over", RUNS, " runs:", E_out_avg)

In [None]:
#Nonlinear Transformation
import numpy as np
import matplotlib.pyplot as plt

# create 1000 random points
N_train = 1000

def rnd(n):
    return np.random.uniform(-1, 1, size = n)

# matrix consisting of feature vectors
X_train = np.transpose(np.array([np.ones(N_train), rnd(N_train), rnd(N_train)]))
y_f_train = np.sign(np.multiply(X_train[:,1], X_train[:,1]) + np.multiply(X_train[:,2], X_train[:,2]) - 0.6)
print(X_train.shape)
print(y_f_train.shape)


# pick 10% = 100 random indices
indices = list(range(N_train))
np.random.shuffle(indices)
random_indices = indices[:(N_train // 10)]


# flip sign in y_f_train vector
for idx in random_indices:
    y_f_train[idx] = (-1) * y_f_train[idx]

# linear regression
X_dagger = np.dot(np.linalg.inv(np.dot(X_train.T, X_train)), X_train.T)
w_lr_train = np.dot(X_dagger, y_f_train)

# calculate E_in
y_lr_train = np.sign(np.dot(X_train, w_lr_train))
E_in = sum(y_lr_train != y_f_train)  / N_train
print("In sample error: ", E_in)


# Create a plot of the classified points
plt.plot(X_train[:,1][y_f_train == 1], X_train[:,2][y_f_train == 1], 'ro')
plt.plot(X_train[:,1][y_f_train == -1], X_train[:,2][y_f_train == -1], 'bo')
plt.xlim(-1,1)
plt.ylim(-1,1)
plt.show()

In [None]:
#Problem 8
# Now do this 1000 times to take average
import numpy as np
import matplotlib.pyplot as plt

def rnd(n):
    return np.random.uniform(-1, 1, size = n)


RUNS = 1000
N_train = 1000
E_in_total = 0

for run in range(RUNS):
    
    # create 1000 random points
    # matrix consisting of feature vectors
    X_train = np.transpose(np.array([np.ones(N_train), rnd(N_train), rnd(N_train)]))
    y_f_train = np.sign(X_train[:,1] * X_train[:,1] + X_train[:,2] * X_train[:,2] - 0.6)

    # pick 10% = 100 random indices
    indices = list(range(N_train))
    np.random.shuffle(indices)
    random_indices = indices[:(N_train // 10)]

    # flip sign in y_f_train vector
    for idx in random_indices:
        y_f_train[idx] = (-1) * y_f_train[idx]

    # linear regression
    X_dagger = np.dot(np.linalg.inv(np.dot(X_train.T, X_train)), X_train.T)
    w_lr_train = np.dot(X_dagger, y_f_train)

    # calculate E_in
    y_lr_train = np.sign(np.dot(X_train, w_lr_train))
    E_in = sum((y_lr_train != y_f_train))  / N_train
    E_in_total += E_in
    #print("In sample error: ", E_in)

    
E_in_avg = E_in_total / RUNS
print("The average error E_in over", RUNS, "runs is: E_in_avg = ", E_in_avg)

In [None]:
# Problem 9 :  transform the N = 1000 training data into the following nonlinear feature
# vector: (1, x1, x2, x1*x2, x1*x1, x2*x2)

# How to concatenate extra columns to X ?
X = X_train

# new feature matrix
X_trans = np.transpose(np.array([np.ones(N_train), X[:,1], X[:,2], X[:,1]*X[:,2], X[:,1]*X[:,1], X[:,2]*X[:,2]]))


# linear regression on the new "feature matrix"
X_dagger_trans = np.dot(np.linalg.inv(np.dot(X_trans.T, X_trans)), X_trans.T)
w_lr_trans = np.dot(X_dagger_trans, y_f_train)

# try the different hypotheses that are given
w_a = np.array([-1, -0.05, 0.08, 0.13, 1.5, 1.5])
w_b = np.array([-1, -0.05, 0.08, 0.13, 1.5, 15])
w_c = np.array([-1, -0.05, 0.08, 0.13, 15, 1.5])
w_d = np.array([-1, -1.5, 0.08, 0.13, 0.05, 0.05])
w_e = np.array([-1, -0.05, 0.08, 1.5, 0.15, 0.15])

# compute classifications made by each hypothesis
y_lr_trans = np.sign(np.dot(X_trans, w_lr_trans))
y_a = np.sign(np.dot(X_trans, w_a))
y_b = np.sign(np.dot(X_trans, w_b))
y_c = np.sign(np.dot(X_trans, w_c))
y_d = np.sign(np.dot(X_trans, w_d))
y_e = np.sign(np.dot(X_trans, w_e))

mismatch_lr_and_a = sum(y_a != y_lr_trans) / N_train                 # ALWAYS RESTART KERNEL !!!!!!!!!!!!                                                         
mismatch_lr_and_b = sum(y_b != y_lr_trans) / N_train
mismatch_lr_and_c = sum(y_c != y_lr_trans) / N_train
mismatch_lr_and_d = sum(y_d != y_lr_trans) / N_train
mismatch_lr_and_e = sum(y_e != y_lr_trans) / N_train

print("mismatch between LR and a = ", mismatch_lr_and_a)
print("mismatch between LR and b = ", mismatch_lr_and_b)
print("mismatch between LR and c = ", mismatch_lr_and_c)
print("mismatch between LR and d = ", mismatch_lr_and_d)
print("mismatch between LR and e = ", mismatch_lr_and_e)

print("The weight vector of my hypothesis is: w_LR = ", w_lr_trans)
# Use that weight vector for problem 10


# compare predictions made by w_lr_trans with those made by targer function
print("Sanity check: E_in = ", sum(y_f_train != y_lr_trans) / N_train)

In [None]:
# Problem 10

RUNS = 1000
N_test = 1000
E_out_total = 0

for run in range(RUNS):
    
    # create 1000 random points
    # matrix consisting of feature vectors
    X_test = np.transpose(np.array([np.ones(N_train), rnd(N_train), rnd(N_train)]))
    y_f_test = np.sign(X_test[:,1] * X_test[:,1] + X_test[:,2] * X_test[:,2] - 0.6)

    # pick 10% = 100 random indices
    indices = list(range(N_test))
    np.random.shuffle(indices)
    random_indices = indices[:(N_test // 10)]

    # flip sign in y_f_train vector
    for idx in random_indices:
        y_f_test[idx] = (-1) * y_f_test[idx]

    # Compute classification made by my hypothesis from Problem 9
    # first create transformed feature matrix
    X = X_test
    X_trans_test = np.transpose(np.array([np.ones(N_test), X[:,1], X[:,2], X[:,1]*X[:,2], X[:,1]*X[:,1], X[:,2]*X[:,2]]))
    y_lr_trans_test = np.sign(np.dot(X_trans_test, w_lr_trans))
    
    # Compute disagreement between hypothesis and target function
    E_out = sum(y_lr_trans_test != y_f_test) / N_train
    E_out_total += E_out
    
E_out_avg = E_out_total / RUNS
print("The average error E_out over", RUNS, "runs is: E_out_avg = ", E_out_avg)