In [None]:
import requests

try:
  with open("in.dta", "x") as infile:
    request_in = requests.get("http://work.caltech.edu/data/in.dta")
    infile.write(request_in.text)
    print("Downloaded training data")
except FileExistsError as e:
  print("Training data already downloaded")

try:
  outfile = open("out.dta", "x")
  request_out = requests.get("http://work.caltech.edu/data/out.dta")
  outfile.write(request_out.text)
  print("Downloaded test data")
except FileExistsError as e:
  print("Test data already downloaded")

Downloaded training data
Downloaded test data


In [None]:
import numpy as np
import matplotlib.pyplot as plt

train_data = np.loadtxt("in.dta")
test_data = np.loadtxt("out.dta")

trainX, trainY = train_data[:, :2], train_data[:, 2]
testX, testY = test_data[:, :2], test_data[:, 2]
trainN, testN = trainX.shape[0], testX.shape[0]

def transform(X):
  x_1 = X[:, 0]
  x_2 = X[:, 1]
  ones = np.ones_like(x_1)
  newX = np.column_stack([ones, x_1, x_2, x_1**2, x_2**2, x_1 * x_2, abs(x_1 - x_2), abs(x_1 + x_2)])
  return newX

def error(X, Y, w, N):
  err = 0
  z = transform(X)

  for i in range(N):
    predict = np.sign(np.dot(w.T, z[i]))
    actual = Y[i]
    if predict != actual:
      err += 1

  return err / N

def train_and_find_error(k):
  l = 10 ** k
  transform_len = 8

  trainX_transform = transform(trainX)
  X_transpose = np.transpose(trainX_transform)
  X_transpose_X = np.matmul(X_transpose, trainX_transform)
  X_inverse = np.linalg.inv(X_transpose_X + l * np.eye(transform_len))
  X_dagger = np.matmul(X_inverse, X_transpose)
  w = np.matmul(X_dagger, trainY)

  print("k:", k)
  print("In Sample Error:", error(trainX, trainY, w, trainN))
  print("Out Sample Error:", error(testX, testY, w, testN))
  print("______________________")

In [None]:
transform_len = 8

trainX_transform = transform(trainX)
X_transpose = np.transpose(trainX_transform)
X_transpose_X = np.matmul(X_transpose, trainX_transform)
X_inverse = np.linalg.inv(X_transpose_X)
X_dagger = np.matmul(X_inverse, X_transpose)
w = np.matmul(X_dagger, trainY)

print("In Sample Error:", error(trainX, trainY, w, trainN))
print("Out Sample Error:", error(testX, testY, w, testN))

In Sample Error: 0.02857142857142857
Out Sample Error: 0.084


In [None]:
train_and_find_error(-3)

k: -3
In Sample Error: 0.02857142857142857
Out Sample Error: 0.08
______________________


In [None]:
train_and_find_error(3)

k: 3
In Sample Error: 0.37142857142857144
Out Sample Error: 0.436
______________________


In [None]:
for i in range(-2, 3):
  train_and_find_error(i)

k: -2
In Sample Error: 0.02857142857142857
Out Sample Error: 0.084
______________________
k: -1
In Sample Error: 0.02857142857142857
Out Sample Error: 0.056
______________________
k: 0
In Sample Error: 0.0
Out Sample Error: 0.092
______________________
k: 1
In Sample Error: 0.05714285714285714
Out Sample Error: 0.124
______________________
k: 2
In Sample Error: 0.2
Out Sample Error: 0.228
______________________


Since k = -3 is under regularizing and k = 3 is over regularizing, we test the integer values between -3 and 3.

In [None]:
for i in range(-2, 3):
  train_and_find_error(i)

k: -2
In Sample Error: 0.02857142857142857
Out Sample Error: 0.084
______________________
k: -1
In Sample Error: 0.02857142857142857
Out Sample Error: 0.056
______________________
k: 0
In Sample Error: 0.0
Out Sample Error: 0.092
______________________
k: 1
In Sample Error: 0.05714285714285714
Out Sample Error: 0.124
______________________
k: 2
In Sample Error: 0.2
Out Sample Error: 0.228
______________________
