In [1]:
import numpy as np

In [6]:
# Import phi from train data set
phi = np.loadtxt('train.csv', dtype='float', delimiter=',', skiprows=1,
                 usecols=tuple(range(1, 14)))
phi

array([[9.55770e-01, 0.00000e+00, 8.14000e+00, ..., 2.10000e+01,
        3.06380e+02, 1.72800e+01],
       [2.87500e-02, 2.80000e+01, 1.50400e+01, ..., 1.82000e+01,
        3.96330e+02, 6.21000e+00],
       [1.22358e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
        3.63430e+02, 4.59000e+00],
       ...,
       [6.65492e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.96900e+02, 1.39900e+01],
       [1.31170e-01, 0.00000e+00, 8.56000e+00, ..., 2.09000e+01,
        3.87690e+02, 1.40900e+01],
       [6.46600e-02, 7.00000e+01, 2.24000e+00, ..., 1.48000e+01,
        3.68240e+02, 4.97000e+00]])

In [7]:
# Import y from train data set
y = np.loadtxt('train.csv', dtype='float', delimiter=',', skiprows=1,
               usecols=14, ndmin=2)

In [8]:
# Import phi_test from test data set
phi_test = np.loadtxt('test.csv', dtype='float', delimiter=',',
                      skiprows=1, usecols=tuple(range(1, 14)))
phi_test

array([[1.06120e-01, 3.00000e+01, 4.93000e+00, ..., 1.66000e+01,
        3.94620e+02, 1.24000e+01],
       [3.41090e-01, 0.00000e+00, 7.38000e+00, ..., 1.96000e+01,
        3.96900e+02, 6.12000e+00],
       [1.22472e+01, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        2.46500e+01, 1.56900e+01],
       ...,
       [5.36000e-02, 2.10000e+01, 5.64000e+00, ..., 1.68000e+01,
        3.96900e+02, 5.28000e+00],
       [1.04690e-01, 4.00000e+01, 6.41000e+00, ..., 1.76000e+01,
        3.89250e+02, 6.05000e+00],
       [4.55587e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.54700e+02, 7.12000e+00]])

In [9]:
# Add a cloloumn of 1s to right of phi and phi_test
phi_test = np.concatenate((phi_test, np.ones((105, 1))), axis=1)
phi = np.concatenate((phi, np.ones((400, 1))), axis=1)

In [10]:
# Min Max scaling for phi and phi_test (Feature Engineering)
for i in range(0, 13):
    col_max = max(phi[:, i])
    col_min = min(phi[:, i])
    phi[:, i] = (phi[:, i] - col_min) / (col_max - col_min)
    phi_test[:, i] = (phi_test[:, i] - col_min) / (col_max - col_min)

In [11]:
# Log scaling on y
y = np.log(y)

In [12]:
# Function to calculate change in error function
def delta_w(p, phi, w):
    if p == 2:
        deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
                       np.dot(np.transpose(phi), y)) +
                  lambd * p * np.power(np.absolute(w), (p - 1)))
    if p < 2 and p > 1:
        deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
                       np.dot(np.transpose(phi), y)) +
                  lambd * p * np.power(np.absolute(w), (p - 1)) * np.sign(w))
    return deltaw

In [13]:
# Dictionary containing filenames as keys and p as values
filenames = {'output.csv': 2.0,
             'output_p1.csv': 1.75,
             'output_p2.csv': 1.5,
             'output_p3.csv': 1.3
             }

In [14]:
# For each item in this dictionary
for (fname, p) in filenames.items():
    # Set initial w to zeros
    w = np.zeros((14, 1))

In [16]:
# Hyperparameter lambda value
lambd = 0.2

In [17]:
# Maximum step size
t = 0.00012

In [19]:
# Calculate new value of w
w_new = w - t * delta_w(p, phi, w)
i = 0
# Repeat steps until error between consecutive w is less than threshold
while(np.linalg.norm(w_new-w) > 10 ** -10):
  w = w_new
  w_new = w - t * delta_w(p, phi, w)
  i = i + 1

In [25]:
# Load values of id
id_test = np.loadtxt('test.csv', dtype='int', delimiter=',',
                     skiprows=1, usecols=0, ndmin=2)

In [26]:
 # Calculate y for test data
y_test = np.exp(np.dot(phi_test, w_new))

In [27]:
# Save the ids and y
np.savetxt(fname, np.concatenate((id_test, y_test), axis=1),
           delimiter=',', fmt=['%d', '%f'], header='ID,MEDV', comments='')