In [2]:
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [45]:
class Validation:
    def __init__(self, X_train,y_train,X_test = None,y_test = None,w_vect = None):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        
        # in the non-linear case, will define Z_mat
        self.Z_mat = None
        
        w_vect = np.array([0.,0.,0.]).T
    
    def transform(self,data_set):
        arr = []
        for point in data_set:
            x1,x2 = point[1],point[2]
            arr.append([1,x1,x2,x1**2,x2**2,x1*x2,np.abs(x1-x2),np.abs(x1+x2)])
        return arr
    
    def lr_train(self,k):
        y_vect = np.array(self.y_train)
        
        self.Z_train = self.transform(self.X_train)
        self.w_vect = np.dot(np.linalg.pinv(self.Z_train),y_vect)
        
        # add regularization (weight decay)
        if k is not None:
            lmbda = 10**(k)
#             w_sqrd = (np.linalg.norm(np.array(self.w_vect)))**2
            w_sqrd = np.dot(np.array(self.w_vect).T,np.array(self.w_vect))
            
            N = len(y_vect)
            decay = (lmbda / float(N)) * w_sqrd 
            
            regularization = np.linalg.pinv(self.Z_train + decay)
            print(regularization)
            self.w_vect = np.dot(regularization, y_vect)
                        
    def test(self):
        error = 0
        self.Z_test = np.array(self.transform(self.X_test))
        y_pred = np.sign(np.dot(self.Z_test, self.w_vect))
        print(np.dot(self.Z_test, self.w_vect))
        # count all different members
        for i in range(len(y_pred)-1):
            if y_pred[i][0] != self.y_test[i]:
                error += 1
        return error

    
def run(e_in=False,k=None,validation = False):
    
    # load training and testing data
    df1 = pd.read_table("in.dta.txt",delim_whitespace=True,header=None)
    df2 = pd.read_table("out.dta.txt",delim_whitespace=True, header = None)
    
    X_train = [[1,df1[0][i],df1[1][i]] for i in range(25)]
    X_val = [[1,df1[0][i],df1[1][i]] for i in range(25,35)]
    
    y_train = [[df1[2][i]] for i in range(25)]
    y_val = [df1[2][j] for j in range(25,35)]
    
    if not e_in:
        # load testing data

        X_test = [[1,df2[1][i],df2[0][i]] for i in range(len(df2)-1)]
        y_test = [df2[2][i] for i in range(len(df2)-1)]
   
    else:
        # training data is used for testing
        X_test = X_train
        y_test = y_train
    
        if validation:
            X_test = X_val
            y_test = y_val
    
    

    k_vals = (3,4,5,6,7)
    val = Validation(X_train,y_train,X_test,y_test)

    for k in k_vals:
        val.lr_train(k = k)
        error = val.test()
        print("for k = " + str(k) + " E_val = " + str(error/ float(len(X_val)))) 
        
#     return error / float(len(X_test))
        
if __name__ == "__main__":
    run(True,validation = True)
    

[[  2.34368118e-01  -4.63206496e-01   3.44328998e-02  -9.12764718e-02
    9.89821557e-01   2.82949398e-01   5.70628516e-02  -1.61327496e-01
    3.13018437e-01  -1.07318997e+00   8.73466815e-02   3.82786931e-01
   -1.00318760e+00  -1.66960496e-01  -1.09417312e+00  -2.13660852e-01
    1.27378387e+00   8.01750170e-01   9.99357295e-01  -5.83197171e-02
    5.40493171e-01   1.95504771e-01  -1.41611342e+00  -4.97868725e-01
    4.72079764e-02]
 [ -3.46584847e-02   3.96400493e-03  -7.32910018e-02   1.17822745e-02
   -4.46889631e-02   9.20114800e-02  -5.58832656e-02   1.03592706e-01
    5.92659304e-02   1.46837516e-01   6.81479410e-02  -5.60151771e-02
    5.35499612e-02  -1.25616457e-03  -1.72175857e-01  -4.28938210e-02
   -7.60963200e-02  -1.11333696e-01   5.25756630e-02   1.01927559e-01
    3.86996119e-02   2.45152303e-02  -1.03850168e-01  -7.23225067e-02
    8.75955488e-02]
 [  7.63109132e-02   5.84198395e-02  -1.22657365e-01   5.66804812e-02
   -5.03702171e-02  -1.04652742e-01  -8.45913452e-