In [None]:
#Q1
"""
We use our custom function to approximate the sine function.
"""

import torch
import math


class CustomFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output


dtype = torch.float
device = torch.device("cuda:0")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x) # We approximate this sine function.

# In our model, we have 4 weights to train: y = a + b * P3(c + d * x).
# These weights need to be initialized.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    P3 = CustomFunction.apply

    # Forward pass: predict y.
    # P3 using our custom backward function.
    y_pred = a + b * P3(c + d * x)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass.
    loss.backward()

    # Update weights using gradient descent
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None        

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 1051.36669921875
199 891.9246826171875
299 869.5606079101562
399 856.9406127929688
499 848.6029663085938
599 842.5750122070312
699 848.6878662109375
799 842.6385498046875
899 849.3232421875
999 843.1123657226562
1099 849.3018188476562
1199 843.0963745117188
1299 849.290771484375
1399 843.0882568359375
1499 849.9043579101562
1599 843.5435791015625
1699 850.6546630859375
1799 844.0972900390625
1899 851.34814453125
1999 844.60595703125
Result: y = -8.150967401032716e-11 + -0.02794724516570568 * P3(-2.2004557898025467e-10 + -0.7375721335411072 x)


In [285]:
def custom_kernel1(X, Y):
  # Polynomial(homogeneous)
  # (X * Y)^d
  out = (np.dot(X, Y.T)) ** 2
  return out

In [286]:
def custom_kernel2(X, Y):
  # Polynomial(inhomogeneous)
  # (X * Y + c)^d
  out = (np.dot(X, Y.T) + 5) ** 2
  return out

In [287]:
def custom_kernel3(X, Y):
  # Gaussian radial basis function(RBF)
  # exp(-gamma * (magnitude(x-y))^2)
  gamma = 1/2
  temp = np.square(X[:, np.newaxis] - Y).sum(axis=2)
  out = np.exp(-gamma * temp)
  return out

In [288]:
def custom_kernel4(X, Y):
  # Hyperbolic tangent(sigmoid)
  # tanh(k * X * Y + c)
  out = np.tanh(1 / 250 * np.dot(X,Y.T) - 2)
  return out

In [289]:
def custom_kernel5(X, Y):
  # Linear
  # X * Y + c
  out = np.dot(X, Y.T) + 1
  return out

In [290]:
def custom_kernel6(X, Y):
  # Cosine kernel
  # (X * Y) / (magnitude(X) * magnitude(Y))
  out = np.dot(X, Y.T) / (np.linalg.norm(X, axis=1)[:,np.newaxis] * np.linalg.norm(Y, axis=1))
  return out

In [291]:
def custom_kernel7(X, Y):
  # Multiquadric kernel
  # sqrt(magnitude(X-Y)^2 + c^2)
  temp = np.square(X[:, np.newaxis] - Y).sum(axis=2)
  out = np.sqrt(temp + 500 ** 2)
  return out 

In [292]:
def custom_kernel8(X, Y):
  # Log kernel
  # -log((X-Y)^d + 1)
  temp = ((X[:, np.newaxis] - Y) ** 2).sum(axis=2) + 1
  out = -np.log(temp)
  return out

In [293]:
def custom_kernel9(X, Y):
  # Cauchy kernel
  # 1 / (1 + magnitude((X-Y)^2) / sigma^2)
  temp = np.square(X[:, np.newaxis] - Y).sum(axis=2)
  out = 1 / (1 + temp / (13**2))
  return out

In [294]:
def custom_kernel10(X, Y):
  # Tstudent kernel
  # 1 / (1 + (magnitude(X-Y))^d)
  temp = ((X[:, np.newaxis] - Y) ** 3).sum(axis=2)
  out = 1 / (1 + temp)
  return out

In [295]:
#Q2
"""
We will implement many custom kernels. Try to improve the classification accuracy and F-1 scores.
"""

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
import sklearn
import pandas as pd
import numpy as np
#You must use a random state of 2011 for this homework.
X = (pd.read_csv("trainX.csv")).values
Y = (pd.read_csv("trainY.csv")).values
XTe = (pd.read_csv("testX.csv")).values
YTe = (pd.read_csv("testY.csv")).values

#clf = SVC(random_state=2011)
clf = SVC(random_state=2011, kernel=custom_kernel6)
clf.fit(X, Y)
yp = clf.predict(XTe)
print(accuracy_score(YTe, yp))
print(f1_score(YTe, yp, average='macro'))

# The version of sklearn should be "0.22.2.post1" for reproducibility.
print(sklearn.__version__)

0.5128205128205128
0.4914207275223061
0.22.2.post1


  y = column_or_1d(y, warn=True)
