In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mltools as ml

from sklearn.metrics import accuracy_score, roc_auc_score,mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVC,LinearSVC

X = np.genfromtxt("X_train.txt", delimiter=None)
Y = np.genfromtxt("Y_train.txt", delimiter=None)
Xtest = np.genfromtxt("X_test.txt",delimiter=None)
X,Y = ml.shuffleData(X,Y)
Xtr, Xte, Ytr, Yte = ml.splitData(X, Y, 0.75)

In [None]:
C = [0.01, 0.1, 0.5, 0.8, 1,1.3,10,100]
svcVMse = np.zeros(len(C))
svcTMse = np.zeros(len(C))
for i,c in enumerate(C):
    svr_c = SVC(C=c, kernel='rbf')
    predict_svr_c = svr_c.fit(Xtr, Ytr)
    svcVMse[i] = mean_squared_error(Yte, predict_svr_c.predict(Xte))
    svcTMse[i] = mean_squared_error(Ytr, predict_svr_c.predict(Xtr))
    

In [None]:
for i in range(len(svcVMse)):
    print "c = {:}: VMse = {:05f}, TMse = {:05f}".format(C[i],svcVMse[i],svcTMse[i])

In [None]:
G = [0.01, 0.1, 0.5, 0.8, 1,1.3,10,100]
svcVMse2 = np.zeros(len(G))
svcTMse2 = np.zeros(len(G))
for i,g in enumerate(G):
    svr_c = SVC(C=0.8, kernel='rbf', gamma=g)
    predict_svr_c = svr_c.fit(Xtr, Ytr)
    svcVMse2[i] = mean_squared_error(Yte, predict_svr_c.predict(Xte))
    svcTMse2[i] = mean_squared_error(Ytr, predict_svr_c.predict(Xtr))

In [None]:
for i in range(len(svcVMse2)):
    print "g = {:}: VMse = {:05f}, TMse = {:05f}".format(G[i],svcVMse2[i],svcTMse2[i])

In [15]:
from sklearn.cluster import KMeans,MiniBatchKMeans,AffinityPropagation
from sklearn.linear_model import LinearRegression, ARDRegression

clfs = [KMeans(n_clusters=8),
        SVC(C=0.8, kernel='rbf', gamma=0.1),
        GradientBoostingRegressor(n_estimators=16000)
       ]

In [16]:
temp_results_t = np.zeros((  len(Ytr)        ,len(clfs) ))   #x1,x2
temp_results_v = np.zeros((  len(Yte)        ,len(clfs) ))   #y1,y2
test1        = np.zeros((  Xtest.shape[0] ,len(clfs) ))   
for i, clf in enumerate(clfs):
    clf.fit(Xtr,Ytr)
    temp_results_v[:,i] = clf.predict(Xte)
    temp_results_t[:,i] = clf.predict(Xtr)
    test1[:,i] = clf.predict(Xtest)

lr = LinearRegression(normalize=True)
lr.fit(temp_results_v, Yte)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [17]:
print "AUC on Train data: ",roc_auc_score(Ytr, lr.predict(temp_results_t))
print "AUC on Valid data: ",roc_auc_score(Yte, lr.predict(temp_results_v))

AUC on Train data:  0.947482712235
AUC on Valid data:  0.756948110858


In [18]:
Ypred_lr = lr.predict(test1)
np.savetxt('Yhat_svm_lr_6.txt', np.vstack( (np.arange(len(Ypred_lr)) , Ypred_lr) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',');