In [36]:
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [54]:
class Validation:
    def __init__(self, X_train,y_train,X_test = None,y_test = None,w_vect = None):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        
        # in the non-linear case, will define Z_mat
        self.Z_mat = None
        
        if w_vect is None:
            w_vect = np.array([0.,0.,0.]).T
        else: 
            w_vect = w_vect
    
    def transform(self,data_set):
        arr = []
        for point in data_set:
            x1,x2 = point[1],point[2]
            arr.append([1,x1,x2,x1**2,x2**2,x1*x2,np.abs(x1-x2),np.abs(x1+x2)])
        return arr
    
    def lr_train(self,k = None):
        y_vect = np.array([self.y_train]).T

        self.Z_train = self.transform(self.X_train)
        self.w_vect = np.dot(np.linalg.pinv(self.Z_train),y_vect)
        
        # add regularization (weight decay)
        if k is not None:
            lmbda = 10**(k)
            w_sqrd = (np.linalg.norm(self.w_vect))**2
            N = len(y_vect)
            decay = (lmbda / float(N)) * w_sqrd 
            
            regularization = np.linalg.pinv(self.Z_train + decay)
            self.w_vect = np.dot(regularization, y_vect)

             
    def test(self):
        error = 0
        self.Z_test = self.transform(self.X_test)
        y_pred = np.sign(np.dot(self.Z_test, self.w_vect))

        # count all different members
        for i in range(len(y_pred)-1):
            if y_pred[i][0] != self.y_test[i]:
                error += 1
        return error

    
def run(e_in=False,k=None,validation = False):
    
    # load training and testing data
    df1 = pd.read_table("in.dta.txt",delim_whitespace=True,header=None)
    df2 = pd.read_table("out.dta.txt",delim_whitespace=True, header = None)

    X_train = [[1,df1[1][i],df1[0][i]] for i in range(25)]
    X_val = [[1,df1[1][i],df1[0][i]] for i in range(25,35)]

    y_train = [df1[2][i] for i in range(25)]
    y_val = [df1[2][i] for i in range(25,35)]
    
    if not e_in:
        # load testing data

        X_test = [[1,df2[1][i],df2[0][i]] for i in range(len(df2)-1)]
        y_test = [df2[2][i] for i in range(len(df2)-1)]
   
    else:
        # training data is used for testing
        X_test = X_train
        y_test = y_train
    
        if validation:
            X_test = X_val
            y_test = y_val
    
#     print(np.array_equal(X_val,X_test), np.array_equal(y_val,y_test))
    
    val = Validation(X_train,y_train,X_val,y_val)
    

    k_vals = (3,4,5,6,7)
    
    for k in k_vals:
        val.lr_train(k = k)
        error = val.test()
        print("for k = " + str(k) + " E_val = " + str(error/ float(len(X_val)))) 
        
#     return error / float(len(X_test))
        
if __name__ == "__main__":
    run(True,validation = True)
    

for k = 3 E_val = 0.5
for k = 4 E_val = 0.5
for k = 5 E_val = 0.5
for k = 6 E_val = 0.5
for k = 7 E_val = 0.5
