In [38]:
import numpy as np
import math
from sklearn.cross_validation import KFold
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error

In [39]:
def read_input(filename):
    input_data=[];
    f=open(filename);
    for line in f:
        input_data.append(line.split());
    return input_data;

def create_feature_matrix(input_data):
    x_list=[a[0:len(input_data[0])-1] for a in input_data];
    x=np.matrix(x_list);
    #x=np.transpose(x)
    return x.astype(np.float);

def create_y_matrix(input_data):
    y_list=[a[len(input_data[0])-1] for a in input_data];
    y=np.matrix(y_list);
    y=np.transpose(y)
    return y.astype(np.float);

def mean_square_error(predicted_y, training_data_y):
    difference_y_num=np.empty([predicted_y.shape[0], predicted_y.shape[1]]);
    for i in range(predicted_y.shape[0]):
        for j in range(predicted_y.shape[1]):
            difference_y_num[i][j]=predicted_y[i][j]-training_data_y[i][j];
            difference_y_num[i][j]=(difference_y_num[i][j]*difference_y_num[i][j])
    difference_y=difference_y_num.sum()
    MSE=(difference_y)/len(predicted_y);
    return MSE;

def predict(testing_data_x, theta, degree):
    poly = PolynomialFeatures(degree);
    Z=poly.fit_transform(testing_data_x);
    predicted_y=np.dot(Z, theta);
    return predicted_y;

In [47]:
def gradient(train_x, train_y, learning_rate, poly_degree):
    poly = PolynomialFeatures(poly_degree);
    Z=poly.fit_transform(train_x);
    theta=np.ones((Z.shape[1],1));
    for i in range(1000):
        predicted_y=np.dot(Z, theta);
        difference=predicted_y-train_y;
        #print difference
        #diff_loss=np.sum(difference**2)/ (2*train_x.shape[0]);
        gradient=np.dot(np.transpose(Z), difference)/train_x.shape[0];
        #gradient=np.dot(Z, difference
        theta=theta-(learning_rate*gradient);
    return theta;
    
    
def do_cross_validation(data_x, data_y, degree,  n_folds, learning_rate):
    cv = KFold(len(data_y), n_folds)
    error_mean = []
    error_mean_train = []
    i=1;
    for train_idx, test_idx in cv:
        theta=gradient(data_x[train_idx], data_y[train_idx], learning_rate, degree)
        predicted_y_train=predict(data_x[train_idx], theta, degree)
        MSE_train=mean_square_error(predicted_y_train, data_y[train_idx]);
        #print theta
        predicted_y=predict(data_x[test_idx], theta, degree)
        #print predicted_y
        #print data_y[test_idx]
        MSE=mean_square_error(predicted_y, data_y[test_idx]);
        print "Testing Error(Custom Model) For fold: %d MSE = %f" %(i, MSE)
        print "Training Error For fold: %d MSE = %f" %(i, MSE_train)
        error_mean.append(MSE);
        error_mean_train.append(MSE_train);
        i=i+1;
    avg_custom=np.mean(error_mean);
    avg_custom_train=np.mean(error_mean_train);
    return avg_custom, avg_custom_train;

In [48]:
def run_gradient_iterative(filename, fold, degree, learning_rate):
    input_data=read_input(filename)
    data_x=create_feature_matrix(input_data)
    data_y=create_y_matrix(input_data)
    #print "after y"
    avg_custom, avg_custom_train=do_cross_validation(data_x, data_y, degree, fold, learning_rate)
    print "Testing Error (Custom Model) Average MSE: %f" %(avg_custom)
    print "Training Error Average MSE: %f" %(avg_custom_train)

In [68]:
run_gradient_iterative("mvar-set1.dat", 10, 3, 0.01)

Testing Error(Custom Model) For fold: 1 MSE = 0.268468
Training Error For fold: 1 MSE = 0.258976
Testing Error(Custom Model) For fold: 2 MSE = 0.264439
Training Error For fold: 2 MSE = 0.259578
Testing Error(Custom Model) For fold: 3 MSE = 0.235111
Training Error For fold: 3 MSE = 0.262601
Testing Error(Custom Model) For fold: 4 MSE = 0.254510
Training Error For fold: 4 MSE = 0.260823
Testing Error(Custom Model) For fold: 5 MSE = 0.272545
Training Error For fold: 5 MSE = 0.258215
Testing Error(Custom Model) For fold: 6 MSE = 0.291181
Training Error For fold: 6 MSE = 0.257053
Testing Error(Custom Model) For fold: 7 MSE = 0.271902
Training Error For fold: 7 MSE = 0.259030
Testing Error(Custom Model) For fold: 8 MSE = 0.213760
Training Error For fold: 8 MSE = 0.264496
Testing Error(Custom Model) For fold: 9 MSE = 0.286373
Training Error For fold: 9 MSE = 0.257448
Testing Error(Custom Model) For fold: 10 MSE = 0.269508
Training Error For fold: 10 MSE = 0.259127
Testing Error (Custom Model)

In [69]:
run_gradient_iterative("mvar-set2.dat", 10, 3, 0.01)

Testing Error(Custom Model) For fold: 1 MSE = 0.012119
Training Error For fold: 1 MSE = 0.012962
Testing Error(Custom Model) For fold: 2 MSE = 0.012920
Training Error For fold: 2 MSE = 0.013030
Testing Error(Custom Model) For fold: 3 MSE = 0.014040
Training Error For fold: 3 MSE = 0.012979
Testing Error(Custom Model) For fold: 4 MSE = 0.014028
Training Error For fold: 4 MSE = 0.012839
Testing Error(Custom Model) For fold: 5 MSE = 0.012428
Training Error For fold: 5 MSE = 0.012898
Testing Error(Custom Model) For fold: 6 MSE = 0.011890
Training Error For fold: 6 MSE = 0.012875
Testing Error(Custom Model) For fold: 7 MSE = 0.013048
Training Error For fold: 7 MSE = 0.012975
Testing Error(Custom Model) For fold: 8 MSE = 0.012210
Training Error For fold: 8 MSE = 0.012899
Testing Error(Custom Model) For fold: 9 MSE = 0.014704
Training Error For fold: 9 MSE = 0.012946
Testing Error(Custom Model) For fold: 10 MSE = 0.013499
Training Error For fold: 10 MSE = 0.013023
Testing Error (Custom Model)

In [51]:
run_gradient_iterative("mvar-set3.dat", 10, 2, 0.01)

Testing Error(Custom Model) For fold: 1 MSE = 0.248989
Training Error For fold: 1 MSE = 0.253424
Testing Error(Custom Model) For fold: 2 MSE = 0.253653
Training Error For fold: 2 MSE = 0.252877
Testing Error(Custom Model) For fold: 3 MSE = 0.254279
Training Error For fold: 3 MSE = 0.252797
Testing Error(Custom Model) For fold: 4 MSE = 0.254499
Training Error For fold: 4 MSE = 0.252862
Testing Error(Custom Model) For fold: 5 MSE = 0.256500
Training Error For fold: 5 MSE = 0.252603
Testing Error(Custom Model) For fold: 6 MSE = 0.252571
Training Error For fold: 6 MSE = 0.252911
Testing Error(Custom Model) For fold: 7 MSE = 0.252700
Training Error For fold: 7 MSE = 0.253019
Testing Error(Custom Model) For fold: 8 MSE = 0.250606
Training Error For fold: 8 MSE = 0.253246
Testing Error(Custom Model) For fold: 9 MSE = 0.252032
Training Error For fold: 9 MSE = 0.253124
Testing Error(Custom Model) For fold: 10 MSE = 0.255158
Training Error For fold: 10 MSE = 0.252820
Testing Error (Custom Model)