In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1.1 Kernel Ridge Regression

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv('Regression_dataset.csv')
data

Unnamed: 0,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,X_9,X_10,X_11,X_12,X_13,Y
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [4]:
y = data['Y']
data = data.drop(columns='Y')

In [5]:
X = data.loc[:,]
X

Unnamed: 0,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,X_9,X_10,X_11,X_12,X_13
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48


In [6]:
# X = StandardScaler().fit_transform(X)
X = np.asarray(X,dtype=np.float)
y = np.asarray(y,dtype=np.float)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train.shape)
print(X_test.shape)

(404, 13)
(102, 13)


In [8]:
def linear_kernel(X,Y):
    assert X.shape[1] == Y.shape[1]
    K = np.matmul(X,Y.T)
    return K

def polynomial_kernel(X,Y,degree=3,gamma=None,a=1):
    assert X.shape[1] == Y.shape[1]
    if gamma is None:
        gamma = 1./X.shape[1]
    K = np.matmul(X,Y.T)
    K *= gamma
    K += a
    K **= degree
    return K

In [9]:
X_train

array([[3.47428e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.54550e+02, 5.29000e+00],
       [2.73397e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
        3.51850e+02, 2.14500e+01],
       [2.53870e-01, 0.00000e+00, 6.91000e+00, ..., 1.79000e+01,
        3.96900e+02, 3.08100e+01],
       ...,
       [4.22239e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.53040e+02, 1.46400e+01],
       [7.90410e-01, 0.00000e+00, 9.90000e+00, ..., 1.84000e+01,
        3.96900e+02, 5.98000e+00],
       [8.64476e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.96900e+02, 1.51700e+01]])

In [10]:
def train_and_test(X_train,y_train,X_test,y_test,\
                   kernel='linear',\
                   degree=3,\
                   gamma=None,\
                   a=1,\
                   lambda_=1):
    if kernel == 'linear':
        K = linear_kernel(X_train,X_train)
    elif kernel == 'poly':
        K = polynomial_kernel(X_train,X_train,degree=degree,gamma=gamma,a=a)
    else:
        print('Invalid Kernel !')
        return
    alpha = np.matmul(np.linalg.inv(K+lambda_*np.eye(K.shape[0])),y_train)
    if kernel == 'linear':
        h = linear_kernel(X_train,X_test)
    elif kernel == 'poly':
        h = polynomial_kernel(X_train,X_test,degree=degree,gamma=gamma,a=a)

    y_preds = np.matmul(alpha.T,h)
    y_train_preds = np.matmul(alpha.T,K)
    return y_preds, np.linalg.norm(y_test-y_preds)**2/y_test.shape[0],np.linalg.norm(y_train-y_train_preds)**2/y_train.shape[0]

In [11]:
lambda_list = np.logspace(-4,4,9)
for lambda_ in lambda_list:
    y_pred, te_error, tr_error = train_and_test(X_train,y_train,X_test,y_test,kernel='linear',lambda_=lambda_)
    print('lambda: {}, Test Error:{:.6f}, Train Error:{:.6f}'.format(lambda_,te_error, tr_error))

lambda: 0.0001, Test Error:34.886863, Train Error:21.819300
lambda: 0.001, Test Error:34.887980, Train Error:21.819233
lambda: 0.01, Test Error:34.889015, Train Error:21.819232
lambda: 0.1, Test Error:34.897987, Train Error:21.819290
lambda: 1.0, Test Error:34.978330, Train Error:21.822057
lambda: 10.0, Test Error:35.529227, Train Error:21.865420
lambda: 100.0, Test Error:39.401669, Train Error:23.480819
lambda: 1000.0, Test Error:55.976666, Train Error:35.413605
lambda: 10000.0, Test Error:71.653994, Train Error:49.507592


In [12]:
lambda_list = np.logspace(-4,4,9)
degree_list = np.arange(5)
for degree in degree_list:
    for lambda_ in lambda_list:
        y_pred, te_error, tr_error = train_and_test(X_train,y_train,X_test,y_test,\
                                                    kernel='poly',lambda_=lambda_,degree=degree)
        print('degree: {}, lambda: {}, Test Error:{:.6f}, Train Error:{:.6f}'.format(degree,lambda_,te_error, tr_error))
    print('-'*50)

degree: 0, lambda: 0.0001, Test Error:102.599013, Train Error:79.954353
degree: 0, lambda: 0.001, Test Error:102.599168, Train Error:79.954353
degree: 0, lambda: 0.01, Test Error:102.600717, Train Error:79.954354
degree: 0, lambda: 0.1, Test Error:102.616235, Train Error:79.954383
degree: 0, lambda: 1.0, Test Error:102.773712, Train Error:79.957363
degree: 0, lambda: 10.0, Test Error:104.566733, Train Error:80.242347
degree: 0, lambda: 100.0, Test Error:135.829117, Train Error:99.386578
degree: 0, lambda: 1000.0, Test Error:402.538591, Train Error:330.363039
degree: 0, lambda: 10000.0, Test Error:625.458993, Train Error:535.973331
--------------------------------------------------
degree: 1, lambda: 0.0001, Test Error:34.029277, Train Error:19.264437
degree: 1, lambda: 0.001, Test Error:34.026447, Train Error:19.264570
degree: 1, lambda: 0.01, Test Error:34.010227, Train Error:19.275494
degree: 1, lambda: 0.1, Test Error:34.189107, Train Error:19.575142
degree: 1, lambda: 1.0, Test Err

# Section 1.2 

In [13]:
data = pd.read_csv('Dataset_3_Team_32.csv')
data

Unnamed: 0,# x_1,x_2,Class_label
0,0.035054,-0.716012,1
1,-0.196325,0.525494,1
2,0.178779,0.941999,0
3,0.888123,-0.115124,0
4,-0.272846,-0.925358,0
...,...,...,...
995,-1.007131,0.164826,0
996,-0.336599,0.509190,1
997,0.518864,-0.933628,0
998,-1.105949,0.092383,0


In [14]:
y = data['Class_label']
X = data.drop(columns='Class_label')
X

Unnamed: 0,# x_1,x_2
0,0.035054,-0.716012
1,-0.196325,0.525494
2,0.178779,0.941999
3,0.888123,-0.115124
4,-0.272846,-0.925358
...,...,...
995,-1.007131,0.164826
996,-0.336599,0.509190
997,0.518864,-0.933628
998,-1.105949,0.092383


In [16]:
X = np.asarray(X,dtype=np.float)
y = np.asarray(y,dtype=np.float)

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train.shape)
print(X_test.shape)

(800, 2)
(200, 2)


# Kernel SVM

In [39]:
from sklearn.svm import SVC
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)
print(clf.score(X_train,y_train))
print(clf.score(X_test,y_test))

0.54875
0.46


array([390, 390], dtype=int32)

In [40]:
for i in range(10):
    clf = SVC(gamma='auto',kernel='poly',degree=i)
    clf.fit(X_train, y_train)
    print(i, clf.score(X_train,y_train), clf.score(X_test,y_test), clf.n_support_)

0 0.50875 0.465 [393 393]
1 0.545 0.46 [390 390]
2 0.985 0.98 [115 114]
3 0.62875 0.585 [392 392]
4 0.96625 0.945 [186 187]
5 0.51125 0.465 [393 393]
6 0.85 0.83 [325 324]
7 0.50875 0.465 [393 393]
8 0.57 0.545 [388 388]
9 0.50875 0.465 [393 393]


# Kernel Logistic Regression