In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [9]:
def predict(alpha, X, kernel, x):
    '''
     alpha:  vector of shape (n,) where n is the number of samples
     X:      matrix of shape (n, 2) 
     kernel: a kernel function
     x:      vector of shape (2,)

    returns:
     the result of evaluating f_w(x)
    '''
    K=[]
    for i in range(len(X)):
        K.append(kernel(X[i],x))
    K=np.array(K)
    result = np.dot(alpha,K)
    return result

def loss(alpha, X, kernel, x, y):
    '''
     alpha:  vector of shape (n,) where n is the number of samples
     X:      matrix of shape (n, 2), training input samples
     kernel: a kernel function
     x:      vector of shape (2,), input sample 
     y:      scalar, target output sample
    returns:
     the result of evaluating the loss function for a sample (x, y)
    '''
    #loss function for all elements 
    E=np.abs(y-predict(alpha,X,kernel,x))
    return  E

In [43]:
def num_de_dalpha(alpha, X, kernel, x, y, epsilon):
    h = 1E-7
    return (loss(alpha+h, X, kernel, x, y) - loss(alpha-h, X, kernel, x, y))/(2.0*h) 
            # return the vector of the derivatives
 # Hint use the central difference to compute the derivative

In [44]:
def k1(x, y):
    return np.dot(x, y)

def k2(x, y):
    return (np.dot(x, y) + 1) ** 2

def de_dalpha(alpha, X, kernel, x, y):
    K=[]
    for i in range(len(X)):
        K.append(kernel(X[i],x))
    K=np.array(K)
    de = -np.sign(y-predict(alpha, X, kernel, x))*K
    return de

def test_de_dalpha(kernel):
    num_tests = 100
    epsilon = 0.0001
    X = [[-2, -1],
         [-1, 3],
         [2.5, -1.5],
         [4, 2]]
    for i in range(num_tests):
        talpha = np.random.randn(len(X))
        tx = np.random.randn(2)
        ty = np.random.randn(1)
        if np.linalg.norm(de_dalpha(talpha, X, kernel, tx, ty) - 
                          num_de_dalpha(talpha, X, kernel, tx, ty, epsilon)) > epsilon:
            return False
    return True

def test2():
    return test_de_dalpha(k1) and test_de_dalpha(k2)

In [45]:
test2()

False

In [46]:
def train(X, Y, epochs, eta, alpha_ini, kernel):
    '''
     X:      matrix of shape (n, 2), training input samples
     Y:      vector of shape (n, ), training output samples
     epochs: number of epochs
     eta:    learning rate
     alpha_ini:  vector of shape (n,), initial values of alpha
     kernel: a kernel function
    returns:
     a tuple (alpha, losses) where:
       alpha: vector of shape (n, ) with resulting alpha values
       losses: a vector of shape (epochs, ) with the loss values for each epoch
    '''
    losses = []
    alpha = alpha_ini
    for epoch in range(epochs):
        loss_tmp=0
        for j in range(len(X)):
            #forward propagation
            prediction=predict(alpha,X,kernel,X[j]) # Here is the prediction function we show at the begining
            #Back propagation
            alpha-=  num_de_dalpha(alpha, X, kernel, X[j], Y[j], epsilon)*eta - alpha*num_de_dalpha(alpha, X, kernel, X[j], Y[j], epsilon) # Here we compute the derivatives times the learning rate and we subtract the value of alpha times its derivative
        for j in range(len(x)):
            loss_tmp+=loss(alpha, X, kernel, X[j], Y[j])
        losses.append(loss_tmp)   
    return alpha, losses

In [47]:
X = [[-2, -1],
     [-1, 3],
     [2.5, -1.5],
     [4, 2]]
Y = [1, 1, 1, -1]