In [4]:
import numpy as np
import kaggle
from sklearn.metrics import accuracy_score
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import cross_val_score, KFold
from operator import itemgetter
from itertools import product
from sklearn.kernel_ridge import KernelRidge

# Read in train and test synthetic data
def read_synthetic_data():
	print('Reading synthetic data ...')
	train_x = np.loadtxt('../../Data/Synthetic/data_train.txt', delimiter = ',', dtype=float)
	train_y = np.loadtxt('../../Data/Synthetic/label_train.txt', delimiter = ',', dtype=float)
	test_x = np.loadtxt('../../Data/Synthetic/data_test.txt', delimiter = ',', dtype=float)
	test_y = np.loadtxt('../../Data/Synthetic/label_test.txt', delimiter = ',', dtype=float)

	return (train_x, train_y, test_x, test_y)

# Read in train and test credit card data
def read_creditcard_data():
	print('Reading credit card data ...')
	train_x = np.loadtxt('../../Data/CreditCard/data_train.txt', delimiter = ',', dtype=float)
	train_y = np.loadtxt('../../Data/CreditCard/label_train.txt', delimiter = ',', dtype=float)
	test_x = np.loadtxt('../../Data/CreditCard/data_test.txt', delimiter = ',', dtype=float)

	return (train_x, train_y, test_x)

# Read in train and test tumor data
def read_tumor_data():
	print('Reading tumor data ...')
	train_x = np.loadtxt('../../Data/Tumor/data_train.txt', delimiter = ',', dtype=float)
	train_y = np.loadtxt('../../Data/Tumor/label_train.txt', delimiter = ',', dtype=float)
	test_x = np.loadtxt('../../Data/Tumor/data_test.txt', delimiter = ',', dtype=float)

	return (train_x, train_y, test_x)

def parse_param_grid(param_grid):
    for p in param_grid:
            items = sorted(p.items())
            keys, values = zip(*items)
            for v in product(*values):
                params = dict(zip(keys, v))
                yield params


def gridsearch(X, y, model, paramgridIterator, cv=5):
    result = []
    maxacc = 0
    bestparam = {}
    accuracies = []
    for param in paramgridIterator:
        model = model.set_params(**param)
        accuracies = cross_val_score(model,X,y,scoring='accuracy',cv=5)
        acc = np.mean(accuracies)
        if abs(acc-maxacc)>0:
            maxacc = acc
            bestparam = param
        print(str(param)+"Scores="+str(acc))
        print('Error', 1-acc)
    return bestparam,maxacc


# Compute MSE
def compute_MSE(y, y_hat):
	# mean squared error
	return np.mean(np.power(y - y_hat, 2))

train_x, train_y, test_x = read_creditcard_data()
print('Train=', train_x.shape)
print('Test=', test_x.shape)

Reading credit card data ...
Train= (511, 8)
Test= (256, 8)


In [None]:
param_grid = {'alpha' : [
]}
paramgridIterator = parse_param_grid([param_grid])
model = KernelRidge()
bestparam = gridsearch(train_x, train_y, model, paramgridIterator)[0]
print('Best parameters',bestparam)
bestmodel = KernelRidge(max_depth = bestparam['max_depth'])
test_y = bestmodel.predict(test_x)
file_name = '../Predictions/KernelCredit.csv'
# Writing output in Kaggle format    
print('Writing output to ', file_name)
kaggle.kaggleize(test_y, file_name)