In [38]:
import numpy as np
import math
from sklearn.cross_validation import KFold
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
%matplotlib inline
import timeit

In [41]:
def read_input(filename):
    input_data=[];
    f=open(filename);
    for line in f:
        input_data.append(line.split());
    return input_data;

def create_feature_matrix(input_data):
    x_list=[a[0:len(input_data[0])-1] for a in input_data];
    x=np.matrix(x_list);
    #x=np.transpose(x)
    return x.astype(np.float);

def create_y_matrix(input_data):
    y_list=[a[len(input_data[0])-1] for a in input_data];
    y=np.matrix(y_list);
    y=np.transpose(y)
    return y.astype(np.float);

def mean_square_error(predicted_y, training_data_y):
    difference_y_num=np.empty([predicted_y.shape[0], predicted_y.shape[1]]);
    for i in range(predicted_y.shape[0]):
        for j in range(predicted_y.shape[1]):
            difference_y_num[i][j]=predicted_y[i][j]-training_data_y[i][j];
            difference_y_num[i][j]=(difference_y_num[i][j]*difference_y_num[i][j])
    difference_y=difference_y_num.sum()
    MSE=(difference_y)/len(predicted_y);
    return MSE;

def create_gram_matrix(train_x, sigma):
    gram_matrix=np.empty([train_x.shape[0], train_x.shape[0]]);
    for i in range(train_x.shape[0]):
        for j in range(train_x.shape[0]):
            gram_matrix[i][j]=math.exp(-np.linalg.norm(train_x[i]-train_x[j])) /(2*(sigma**2))
    #print gram_matrix
    return gram_matrix;
            
def get_alpha(gram_matrix, train_y):
    alpha=np.dot(np.linalg.inv(gram_matrix), train_y);
    #print alpha
    return alpha;

def predict(alpha, train_x, test_x, sigma):
    predict_y=np.empty([test_x.shape[0], 1]);
    alpha_t=np.transpose(alpha);
    for i in range(test_x.shape[0]):
        predicted_y=0;
        for j in range(train_x.shape[0]):
            predicted_y += alpha[j]*(math.exp(-np.linalg.norm(test_x[i]-train_x[j]))/(2*(sigma**2)));
        predict_y[i]=predicted_y;
    
    return predict_y;

def do_cross_validation(data_x, data_y, sigma,  n_folds=10):
    cv = KFold(len(data_y), n_folds)
    error_mean = []
    i=0;
    for train_idx, test_idx in cv:
        gram_matrix=create_gram_matrix(data_x[train_idx], sigma)
        alpha=get_alpha(gram_matrix, data_y[train_idx])
        predicted_y=predict(alpha, data_x[train_idx], data_x[test_idx], sigma)
        #print predicted_y
        #print data_y[test_idx]
        MSE=mean_square_error(predicted_y, data_y[test_idx]);
        print "(Custom Model) For fold: %d RMS = %f" %(i, MSE)
        error_mean.append(MSE);
        i=i+1;
    avg_custom=np.mean(error_mean);
    return avg_custom;

def run_gaussian_kernel(filename, sigma, fold):
    input_data=read_input("mvar-set1.dat")
    data_x=create_feature_matrix(input_data)
    data_y=create_y_matrix(input_data)
    start_time = timeit.default_timer()
    avg_custom=do_cross_validation(data_x, data_y, 1, 10)
    print "The total time taken by Gaussian Kernel is: %.15f" %(timeit.default_timer() - start_time)
    print "(Custom Model) Average MSE: %f" %(avg_custom)

In [None]:
run_gaussian_kernel("mvar-set1.dat", 1, 10)

In [None]:
run_gaussian_kernel("mvar-set1.dat", 0.5, 10)

In [None]:
run_gaussian_kernel("mvar-set1.dat", 2, 10)