In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier, 
                              GradientBoostingClassifier, ExtraTreesClassifier)
from sklearn.svm import SVC
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score
from IPython.core.debugger import Tracer



In [2]:
df = pd.read_csv('train_data.csv')

In [3]:
# Drop final call since we need it later
df.drop(["Final Call"], axis=1)

Unnamed: 0,Problem Solving,Design,CS Skills,Test Enumeration,Communication,intervieweeId,interviewerId,Score,Interviewer Call
0,7,3,-1,2,3,1,51,4.217391,0
1,-1,4,7,-1,3,1,67,4.750000,0
2,8,9,-1,9,6,1,77,8.260870,1
3,5,2,5,9,2,1,7,4.392857,0
4,3,3,2,6,-1,2,39,3.280000,0
5,5,4,3,-1,6,2,8,4.375000,0
6,3,9,7,9,-1,2,54,6.680000,1
7,-1,-1,5,7,-1,2,23,5.888889,1
8,-1,2,9,-1,5,3,99,4.750000,0
9,3,-1,7,-1,9,3,80,5.375000,0


In [4]:
df = pd.get_dummies(data=df, columns=['intervieweeId', 'interviewerId'])

In [5]:
# Some useful parameters which will come in handy later on
ntrain = df.shape[0]
SEED = 0 # for reproducibility
NFOLDS = 5 # set folds for out-of-fold prediction
kf = KFold(ntrain, n_folds= NFOLDS, random_state=SEED)

# Class to extend the Sklearn classifier
class SklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        return self.clf.predict(x)
    
    def predict_proba(self, x):
        return self.clf.predict_proba(x)
    
    def fit(self,x,y):
        return self.clf.fit(x,y)
    
    def feature_importances(self,x,y):
        print(self.clf.fit(x,y).feature_importances_)

In [6]:
def get_oof(clf, x_train, y_train):
    oof_train = np.zeros((ntrain,))
    meanAccuracyScore = 0.0
    
    for i, (train_index, valid_index) in enumerate(kf):
        x_tr = x_train[train_index]
        y_tr = y_train[train_index]
        x_va = x_train[valid_index]
        y_va = y_train[valid_index]
        
        clf.train(x_tr, y_tr)
        oof_train[valid_index] = clf.predict(x_va)
        print(clf.predict_proba(x_va))
        accuracy = accuracy_score(y_va, oof_train[valid_index])
        #print(accuracy)
        meanAccuracyScore = meanAccuracyScore + accuracy

    print("Accuracy Score", meanAccuracyScore / NFOLDS)
    #oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1)

In [7]:
# Put in our parameters for said classifiers
# Random Forest parameters
rf_params = {
    'n_jobs': -1,
    'n_estimators': 500,
     'warm_start': True, 
     #'max_features': 0.2,
    'max_depth': 6,
    'min_samples_leaf': 2,
    'max_features' : 'sqrt',
    'verbose': 0
}

# Extra Trees Parameters
et_params = {
    'n_jobs': -1,
    'n_estimators':500,
    #'max_features': 0.5,
    'max_depth': 8,
    'min_samples_leaf': 2,
    'verbose': 0
}

# AdaBoost parameters
ada_params = {
    'n_estimators': 500,
    'learning_rate' : 0.75
}

# Gradient Boosting parameters
gb_params = {
    'n_estimators': 500,
     #'max_features': 0.2,
    'max_depth': 5,
    'min_samples_leaf': 2,
    'verbose': 0
}

In [8]:
# Create 5 objects that represent our 4 models
rf = SklearnHelper(clf=RandomForestClassifier, seed=SEED, params=rf_params)
et = SklearnHelper(clf=ExtraTreesClassifier, seed=SEED, params=et_params)
ada = SklearnHelper(clf=AdaBoostClassifier, seed=SEED, params=ada_params)
gb = SklearnHelper(clf=GradientBoostingClassifier, seed=SEED, params=gb_params)

In [9]:
y_train = df['Interviewer Call'].ravel()
train = df.drop(['Interviewer Call','Score'], axis=1)
x_train = df.values # Creates an array of the train data

# Create our OOF train and test predictions. These base results will be used as new features
print("Extra Trees")
et_oof_train = get_oof(et, x_train, y_train) # Extra Trees
print("Random Forest")
rf_oof_train = get_oof(rf,x_train, y_train) # Random Forest
print("Ada boost")
ada_oof_train = get_oof(ada, x_train, y_train) # AdaBoost 
print("Gradient Boost")
gb_oof_train = get_oof(gb,x_train, y_train) # Gradient Boost

print("Training is complete")

Extra Trees
[[0.79736258 0.20263742]
 [0.79104294 0.20895706]
 [0.29783744 0.70216256]
 [0.76757764 0.23242236]
 [0.81310166 0.18689834]
 [0.79883497 0.20116503]
 [0.33902328 0.66097672]
 [0.41196098 0.58803902]
 [0.77221272 0.22778728]
 [0.76868723 0.23131277]
 [0.76918893 0.23081107]
 [0.80300018 0.19699982]
 [0.80005568 0.19994432]
 [0.34909333 0.65090667]
 [0.75138048 0.24861952]
 [0.77868398 0.22131602]
 [0.74714706 0.25285294]
 [0.37307578 0.62692422]
 [0.82500424 0.17499576]
 [0.74338534 0.25661466]
 [0.80046006 0.19953994]
 [0.77320494 0.22679506]
 [0.78855679 0.21144321]
 [0.74534057 0.25465943]
 [0.78724954 0.21275046]
 [0.78421062 0.21578938]
 [0.39708915 0.60291085]
 [0.38094386 0.61905614]
 [0.30825159 0.69174841]
 [0.36560406 0.63439594]
 [0.75282061 0.24717939]
 [0.76398208 0.23601792]
 [0.25494982 0.74505018]
 [0.32354074 0.67645926]
 [0.76166    0.23834   ]
 [0.30935588 0.69064412]
 [0.42754061 0.57245939]
 [0.74676    0.25324   ]
 [0.39524598 0.60475402]
 [0.80251133 

[[0.79892967 0.20107033]
 [0.3898391  0.6101609 ]
 [0.80152813 0.19847187]
 [0.34377296 0.65622704]
 [0.77160141 0.22839859]
 [0.78476211 0.21523789]
 [0.34410154 0.65589846]
 [0.40981872 0.59018128]
 [0.40780989 0.59219011]
 [0.74679628 0.25320372]
 [0.79101602 0.20898398]
 [0.37594175 0.62405825]
 [0.78282394 0.21717606]
 [0.79344708 0.20655292]
 [0.72063256 0.27936744]
 [0.76922289 0.23077711]
 [0.38340077 0.61659923]
 [0.77552437 0.22447563]
 [0.77231078 0.22768922]
 [0.77794194 0.22205806]
 [0.82434131 0.17565869]
 [0.36102802 0.63897198]
 [0.78364531 0.21635469]
 [0.748673   0.251327  ]
 [0.28677613 0.71322387]
 [0.68237989 0.31762011]
 [0.25213372 0.74786628]
 [0.27368656 0.72631344]
 [0.40913669 0.59086331]
 [0.76352725 0.23647275]
 [0.45626711 0.54373289]
 [0.81382922 0.18617078]
 [0.81582442 0.18417558]
 [0.79624965 0.20375035]
 [0.79451791 0.20548209]
 [0.74669006 0.25330994]
 [0.33205412 0.66794588]
 [0.3304554  0.6695446 ]
 [0.27690692 0.72309308]
 [0.69509891 0.30490109]


[[0.73509852 0.26490148]
 [0.36980134 0.63019866]
 [0.38509003 0.61490997]
 [0.7400334  0.2599666 ]
 [0.32475374 0.67524626]
 [0.7805212  0.2194788 ]
 [0.4109643  0.5890357 ]
 [0.77296113 0.22703887]
 [0.36943131 0.63056869]
 [0.76412028 0.23587972]
 [0.37753295 0.62246705]
 [0.76368845 0.23631155]
 [0.79100923 0.20899077]
 [0.78269906 0.21730094]
 [0.74520507 0.25479493]
 [0.37155884 0.62844116]
 [0.40190336 0.59809664]
 [0.75778361 0.24221639]
 [0.33765876 0.66234124]
 [0.79970081 0.20029919]
 [0.70623109 0.29376891]
 [0.28107999 0.71892001]
 [0.29209177 0.70790823]
 [0.28377361 0.71622639]
 [0.78879318 0.21120682]
 [0.80532411 0.19467589]
 [0.33294344 0.66705656]
 [0.79969521 0.20030479]
 [0.76595418 0.23404582]
 [0.77303862 0.22696138]
 [0.36986024 0.63013976]
 [0.34071733 0.65928267]
 [0.36404509 0.63595491]
 [0.75569694 0.24430306]
 [0.34265897 0.65734103]
 [0.80534899 0.19465101]
 [0.29719471 0.70280529]
 [0.28456886 0.71543114]
 [0.28715487 0.71284513]
 [0.25950506 0.74049494]


  warn("Warm-start fitting without increasing n_estimators does not "


[[0.78122735 0.21877265]
 [0.22655626 0.77344374]
 [0.19625852 0.80374148]
 [0.25763461 0.74236539]
 [0.81663048 0.18336952]
 [0.79474501 0.20525499]
 [0.78916124 0.21083876]
 [0.24986033 0.75013967]
 [0.22197131 0.77802869]
 [0.23257068 0.76742932]
 [0.19824901 0.80175099]
 [0.76670603 0.23329397]
 [0.2929991  0.7070009 ]
 [0.78281259 0.21718741]
 [0.83054109 0.16945891]
 [0.27048615 0.72951385]
 [0.21962996 0.78037004]
 [0.29485198 0.70514802]
 [0.84789781 0.15210219]
 [0.83569176 0.16430824]
 [0.8448779  0.1551221 ]
 [0.84985706 0.15014294]
 [0.81690864 0.18309136]
 [0.81983651 0.18016349]
 [0.7737536  0.2262464 ]
 [0.27798826 0.72201174]
 [0.31900489 0.68099511]
 [0.81094293 0.18905707]
 [0.81687422 0.18312578]
 [0.29591617 0.70408383]
 [0.84730978 0.15269022]
 [0.22698319 0.77301681]
 [0.26561905 0.73438095]
 [0.85403002 0.14596998]
 [0.85395403 0.14604597]
 [0.73416964 0.26583036]
 [0.31559039 0.68440961]
 [0.29200371 0.70799629]
 [0.84473805 0.15526195]
 [0.79472697 0.20527303]


[[0.8501746  0.1498254 ]
 [0.85417093 0.14582907]
 [0.85384051 0.14615949]
 [0.85151027 0.14848973]
 [0.85234121 0.14765879]
 [0.85157342 0.14842658]
 [0.81987906 0.18012094]
 [0.81984762 0.18015238]
 [0.84460436 0.15539564]
 [0.31202736 0.68797264]
 [0.8249912  0.1750088 ]
 [0.79683563 0.20316437]
 [0.32701126 0.67298874]
 [0.34580723 0.65419277]
 [0.8154722  0.1845278 ]
 [0.82670305 0.17329695]
 [0.85227869 0.14772131]
 [0.85434392 0.14565608]
 [0.81817298 0.18182702]
 [0.7916456  0.2083544 ]
 [0.21793869 0.78206131]
 [0.77245972 0.22754028]
 [0.23486604 0.76513396]
 [0.288851   0.711149  ]
 [0.82593021 0.17406979]
 [0.26280248 0.73719752]
 [0.22424649 0.77575351]
 [0.84393199 0.15606801]
 [0.24796116 0.75203884]
 [0.21743392 0.78256608]
 [0.23368699 0.76631301]
 [0.77660476 0.22339524]
 [0.29647972 0.70352028]
 [0.84793197 0.15206803]
 [0.82588315 0.17411685]
 [0.85174996 0.14825004]
 [0.84329653 0.15670347]
 [0.8139972  0.1860028 ]
 [0.84891169 0.15108831]
 [0.34576449 0.65423551]


[[9.99771804e-01 2.28195613e-04]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [9.99771804e-01 2.28195613e-04]
 [3.60696292e-04 9.99639304e-01]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [3.60696292e-04 9.99639304e-01]
 [3.60696292e-04 9.99639304e-01]
 [9.99771804e-01 2.28195613e-04]
 [9.99771804e-01 2.28195613e-04]
 [3.60696292e-04 9.99639304e-01]
 [9.997718

[[9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [3.43997071e-04 9.99656003e-01]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [3.43997071e-04 9.99656003e-01]
 [3.43997071e-04 9.99656003e-01]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [9.99754709e-01 2.45291273e-04]
 [3.43997071e-04 9.99656003e-01]
 [9.99754709e-01 2.45291273e-04]
 [3.43997071e-04 9.99656003e-01]
 [3.43997071e-04 9.99656003e-01]
 [9.99754709e-01 2.45291273e-04]
 [3.43997071e-04 9.99656003e-01]
 [3.43997071e-04 9.99656003e-01]
 [9.99754709e-01 2.45291273e-04]
 [3.43997071e-04 9.99656003e-01]
 [3.43997071e-04 9.99656003e-01]
 [3.439970

In [10]:
import pickle
pickle_out = open("extra_trees.pickle", "wb")
pickle.dump(forest, pickle_out)
pickle_out.close()

NameError: name 'forest' is not defined

In [11]:
from sklearn.ensemble import ExtraTreesClassifier
forest = ExtraTreesClassifier(n_estimators=250, random_state=0)
forest.fit(x_train, y_train)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=250, n_jobs=1,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

In [12]:
import pickle
pickle_out = open("extra_trees.pickle", "wb")
pickle.dump(forest, pickle_out)
pickle_out.close()

In [13]:
pickle_in = open("extra_trees.pickle", "rb")
classifier = pickle.load(pickle_in)
classifier.predict_proba(x_train[1].reshape(1, -1))[0][0]

1.0