In [24]:
import numpy as np
import pandas as pd

<u> **Setting up:** </u>

* data: $\tilde{X} \in \mathbb{R}^{n \times d}$ (n data points and d features)

* labels: $\tilde{Y} \in \mathbb{R}^{n}$

**Random linear classifier (w/ 0-1 loss):**

In [15]:
def random_linear_classifier(data, label, k): # k is the number of max iterations
    n, d = data.shape
    loss = []
    
    for t in range(k):
        theta = np.random.rand(d, 1)
        theta_null = np.random.rand(1,)
        h = []
        for i in range(n):
            z = np.matmul(theta.T, data[i,:]) + theta_null
            h.append(np.sign(z)[0][0])
    
        loss = np.mean()
        

**Perceptrons w/ offset term:**

In [13]:
def perceptron_with_offset(data, labels, tau):      # tau is the number of iterations
    n, d = data.shape                   
    theta = np.zeros(d).reshape(d,1)
    theta_0 = np.zeros(1)
    no_mistakes = 0

    print("The data set has {} features, {} data points.\n".format(d,n))
    
    for t in range(tau):
        changed = False
        for i in range(n):
            y = labels[i]
            x = data[i,:].reshape(d, 1)
            if np.sign(y*(np.matmul(theta.T,x) + theta_0)) <= 0:
                no_mistakes +=1
                theta = theta + y*x
                theta_0 = theta_0 + y
                changed = True
        if changed==False:
            break
    print("After {} mistake(s), the Perceptron Algorithm (w/ offset) yields theta = {}, theta_0 = {} on the {}th instance\n".format(no_mistakes, theta, theta_0, i))
    return theta, theta_0

**Perceptron through the origin:**

In [12]:
def perceptron_through_origin(data, labels, tau):      # tau is the number of iterations
    n, d = data.shape    
    data_new = np.append(data, np.ones((n, 1)), axis=1)
    theta = np.zeros(d+1).reshape(d+1,1)
    no_mistakes = 0

    print("The data set has {} features and {} data points.\n".format(d,n))
    
    for t in range(tau):
        changed = False
        for i in range(n):
            y = labels[i]
            x = data_new[i,:].reshape(d+1, 1)
            if np.sign(y*(np.matmul(theta.T,x))) <= 0:
                theta = theta + y*x
                no_mistakes +=1
                changed = True
        if changed==False:
            break
    print("After {} mistake(s), the Perceptron Algorithm (through the origin) yields theta = {} on the {}th instance".format(no_mistakes, theta, i))
    return theta

**Averaged Perceptron:**

* Instead of using all weight vectors, use the average weight vector (i.e longer surviving weight vectors get more say)

* More practical alternative and widely used

In [72]:
def averaged_perceptron(data, labels, tau):
    n, d = data.shape
    theta = np.zeros((d,1))
    theta_0 = np.zeros(1)
    ths = np.zeros((d,1))
    th0s = np.zeros(1)
    no_mistakes = 0
    print("The data set has {} features and {} data points \n".format(d,n))
  
    for t in range(tau):   
      changed = False  
      for i in range(n):
        y = labels[i]
        x = data[i,:].reshape(d,1)
        
        if np.sign(y*(np.matmul(theta.T,x) + theta_0)) <= 0:
          theta += y*x
          theta_0 += y
          no_mistakes +=1
          changed = True
          
        ths += theta
        th0s += theta_0
      if changed==False:
        no_mistakes +=0
        break
        
    print("After {} mistake(s), the Averaged Perceptron algorithm yields theta = {}, theta_0 = {} on the {}th instance".format(no_mistakes, ths/(n*tau),th0s/(n*tau), i))
    return (ths/(n*tau),th0s/(n*tau))

**Cross-validation:**

**Gradient Descent:**

- $\theta_{init}$ $\in$ $\mathbb{R}^{m}$

- $\theta$ $\in$ $\mathbb{R}^{m}$



In [23]:
def feval(f, x, y):
    return eval(f)

def gradient_descent(theta_init, eta, epsilon, T, func={}, derivative={}):            # enter theta_init as a list
    m = len(theta_init)
    # One-dimension case:
    if m == 1: 
        f=func.get('f',input('Enter the function f(x) ='))
        df=derivative.get('df', input('Enter the function df(x) ='))
    
        t = 0
        theta = np.array([theta_init])
        f_ = np.array([feval(f, theta[t], 0)])
    
        while True:
            t +=1
            theta = np.append(theta, theta[t-1]-eta*feval(df, theta[t-1], 0))
            f_ = np.append(f_,feval(f, theta[t], 0))
            if abs(feval(f,theta[t], 0)-feval(f, theta[t-1], 0)) < epsilon:
                break
    # Two-dimension case:
    elif m ==2:
        f=func.get('f',input('Enter the function f(x, y) ='))
        df_x=derivative.get('df', input('Enter the function df(x) ='))
        df_y=derivative.get('df', input('Enter the function df(y) ='))
    
        t = 0
        theta = np.array([theta_init]).reshape(m,1)
        f_ = np.array([feval(f, theta[0, t], theta[1, t])])
    
        while True:
            t +=1
            theta = np.append(theta, theta[:,t-1].reshape(m,1) - eta*np.array([feval(df_x, theta[0,t-1], 0), feval(df_y, 0, theta[1,t-1])]).reshape(m,1), axis=1)
            f_ = np.append(f_, feval(f, theta[0,t], theta[1,t]))
            if t > T:
                break
    else:
        pass

    return theta, f_

**Linear Regression Gradient Descent:**

**Stochastic Gradient Descent:**

In [None]:
def sgd(theta_init):
    