## Imports and Data

In [37]:
import numpy as np
import matplotlib.pyplot as plt
import mltools as ml
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve, auc
from sklearn import tree
from sklearn import gaussian_process
from sklearn import linear_model

X = np.genfromtxt("data/X_train.txt")
Y = np.genfromtxt("data/Y_train.txt")
Xte = np.genfromtxt('data/X_test.txt')

'''
Xtr = X[10000:20000]
Xva = X[20000:30000]
Ytr = Y[10000:20000]
Yva = Y[20000:30000]
'''

Xtr, Xva, Ytr, Yva = ml.splitData(X, Y, 0.80)

## Combining Classifiers

In [44]:
#1) Random Forest Classifier 
rf_clf = RandomForestClassifier(n_estimators=20, max_features=10, bootstrap=True, max_depth=20)

#2) K Nearest Neighbors Classifier
knn_clf = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=1,
           weights='uniform')

#3) AdaBoost Classifier
dt = tree.DecisionTreeClassifier(max_depth=3)
ada_clf = AdaBoostClassifier(base_estimator=dt, n_estimators=50)

#List of classifiers
listOfClf = [rf_clf, knn_clf, ada_clf]

listOfPredictions = []
for clf in listOfClf:
    clf.fit(Xtr, Ytr)
    
    #Use this line for testing out the AUC Curve
    listOfPredictions.append(clf.predict_proba(Xva)) 

    #Use this line for writing to Kaggle
    #listOfPredictions.append(clf.predict_proba(Xte))
    
predictions = np.mean( np.array([ listOfPredictions[0], listOfPredictions[1], listOfPredictions[2] ]), axis=0 )



In [46]:
#Testing out the AUC Curve
false_positive_rate, true_positive_rate, thresholds = roc_curve(Yva,predictions[:,1])
roc_auc = auc(false_positive_rate, true_positive_rate)
print roc_auc

0.74273934707


In [47]:
#Writing to Kaggle

# Now output a file with two columns, a row ID and a confidence in class 1:
np.savetxt('classifiers_sklearn.txt',
np.vstack( (np.arange(len(predictions)) , predictions[:,1]) ).T,
'%d, %.2f',header='ID,Prob1',comments='',delimiter=',');