In [None]:
import numpy as np
import pandas as pd
import scipy
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import time
import tqdm
import sklearn
from sklearn import datasets
from sklearn import svm

# Linear Regression


*   Dataset: house price from boston
*   Objective: implement both numerical + analytical solutions
*   PLot and check for convergence
*   Compare with Linear Regression using sklearn


1.   Analytical solution: $\beta = (X^TX)^{-1}X^T y$
2.   Numerical solution:

      *   Loss function (MSE): $\mathcal{L} (y, \hat{y}) = \frac{1}{N}\sum_{i=1}^N (y-\hat{y})^2$
      *   Update rule: $\beta_{t+1} = \beta_t - \gamma \nabla \mathcal{L} (\beta_t)$
      *   Momentum: $\Delta \beta_{t+1} = -\gamma \nabla \mathcal{L}(\beta_{t+1}) + \alpha\Delta \beta_{t}$
      *   Update rule for momentum:  $\beta_{t+1} = \beta_t + \Delta \beta_{t}$









In [None]:
# LOAD DATA
X, y = datasets.load_boston(return_X_y=True)
X = np.insert(X,0,1,axis = 1)

In [None]:
# IMPLEMENT ANALYTICAL SOLUTION HERE
start_time = time.time()
def coef(X, y):
  return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
def predict(X, y):
  return X.dot(beta(X,y))
def cost_function(X, y):
  return np.linalg.norm(predict(X,y)-y)**2/506
end_time = time.time()

print(coef(X,y))
print(cost_function(X,y))
print('Required time for analytical solution: ', end_time - start_time)

[ 3.64594884e+01 -1.08011358e-01  4.64204584e-02  2.05586264e-02
  2.68673382e+00 -1.77666112e+01  3.80986521e+00  6.92224640e-04
 -1.47556685e+00  3.06049479e-01 -1.23345939e-02 -9.52747232e-01
  9.31168327e-03 -5.24758378e-01]
21.8948311817292
Required time for analytical solution:  0.0001404285430908203


In [None]:
# IMPLEMENT GRADIENT DESCENT 
class LinearRegression:
  def loss_function(self, X, y, beta):                 
    N = X.shape[0]
    return np.linalg.norm(y - X.dot(beta))**2/ X.shape[0]

  def gradient(self, X, y, beta):
    N = X.shape[0]
    return 2*X.T.dot(X.dot(beta) - y) / X.shape[0]

  def fit(self, X, y, tau = 1e-4, gamma = 1e-6):        
    beta = [np.random.rand(X.shape[1])]
    loss = [self.loss_function(X, y, beta[-1])]
    cost_dif = 1000             
    while cost_dif >= tau:        
      beta_new = beta[-1] - gamma*self.gradient(X, y, beta[-1])
      cost_new = self.loss_function(X, y, beta_new)
      cost_dif = abs(cost_new - loss[-1])
      loss.append(cost_new)
      beta.append(beta_new)
    self.beta = beta
    self.loss = loss
    return beta[-1]

  def fit_momentum(self, X, y, tau = 1e-4, gamma = 1e-6, alpha = 1e-3): 
    beta = [np.random.rand(X.shape[1])]       
    loss = [self.loss_function(X, y, beta[-1])]
    beta_new = self.gradient(X, y, beta[-1])
    beta.append(beta_new)
    cost_dif = 1000
    i = 1
    while cost_dif >= tau:        
      beta_new += alpha*(beta[-1] - beta[-2]) - gamma*self.gradient(X, y, beta[-1])
      cost_new = self.loss_function(X,y,beta_new)
      cost_dif = abs(cost_new - loss[-1])
      loss.append(cost_new)
      beta.append(beta_new)
      i += 1    #calculate iteration
    self.beta = beta
    self.loss = loss
    return beta[-1]


print(LinearRegression().fit(X, y))



[ 0.29587213 -0.10279402  0.1148915   0.17073693  0.51503422  0.6224361
  0.77536162  0.10091108  0.25746408  0.24423899 -0.01300213  0.42668563
  0.02989604 -0.76293644]


In [None]:
# SOLUTION FROM SKLEARN
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
start_time = time.time()
model = LinearRegression().fit(x, y)
# y_pred = model.intercept_ + np.sum(model.coef_ * x, axis=1)
y_pred = model.predict(x)

model.coef_[0] = model.intercept_
mse = mean_squared_error(y_pred, y)
end_time = time.time()
print(model.coef_)
print(mse)
print('Required time for sklearn: ', end_time - start_time) 

[ 3.64594884e+01 -1.08011358e-01  4.64204584e-02  2.05586264e-02
  2.68673382e+00 -1.77666112e+01  3.80986521e+00  6.92224640e-04
 -1.47556685e+00  3.06049479e-01 -1.23345939e-02 -9.52747232e-01
  9.31168327e-03 -5.24758378e-01]
21.8948311817292
Required time for sklearn:  0.0041391849517822266


# Logistic Regression

*   Dataset: breast cancer dataset
*   Objective: implement both numerical solution
*   PLot and check for convergence
*   Compare with Linear Regression using sklearn


#   Numerical solution:

*   Loss function (Maximum Likelihood Estimator): $\mathcal{L} (y,\hat{y}) = -\frac{1}{N}\sum_{i=1}^N \left[y_i\cdot\log(\hat{y}_i)+(1-y_i)\log(1-\hat{y}_i)\right]$
      

In [None]:
X,y = datasets.load_breast_cancer(return_X_y = True)
X = np.insert(X, 0, 1, axis = 1)

In [None]:
class LogisticRegression:

    def logistic_function(self, t):
        return 1/ (1 + np.exp(-t))
    
    def cost_function(self, X, y, beta):
      yhat = self.logistic_function(X.dot(beta) )                
      return - np.sum(y*log(yhat) + (1-y)*log(1-yhat)) / X.shape[0]

    def gradient(self, X, y, beta):
      yhat = self.logistic_function(X.dot(beta))
      return np.dot(x.T, yhat - y) / X.shape[0]

    def fit(self, X, y, tau = 1e-3, lr = 1e-6):        
      beta = [np.random.rand(X.shape[1])]
      loss = [cost_function(X, y, beta[-1])]
      cost_dif = 1000                
      while cost_dif >= tau:
        beta_new = beta[-1] - gamma*self.gradient(X, y, beta[-1])
        cost_new = self.loss_function(X, y, beta_new)
        cost_dif = abs(cost_new - loss[-1])
        loss.append(cost_new)
        beta.append(beta_new)           
      self.beta = beta
      self.loss = loss
      return beta[-1]

print(LinearRegression().fit(X,y))

[ 0.95275592 -0.13419094  0.12339747  0.14793811  0.16405975  0.63696411
  0.73637154  0.10416876  0.11083533  0.38636659 -0.01871853  0.49343743
  0.03047657 -0.78361108]


# Support Vector Machine


*   Dataset: breast cancer
*   Objective: understand how SVM work/ the effects of choosing kernels on dataset



In [None]:
X, y = datasets.load_breast_cancer(return_X_y = True)
X = X[:,:2]

# fit the model, don't regularize for illustration purposes
clf = svm.SVC(kernel='Linear', C=1000)
clf.fit(X, y)

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)

# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()

# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)

# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
           linewidth=1, facecolors='none', edgecolors='k')
plt.show()

NameError: ignored