In [None]:
import numpy as np
import mltools as ml
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import accuracy_score

In [None]:
irisXt = np.genfromtxt("data/X_train.txt",delimiter=None)
irisYt = np.genfromtxt("data/Y_train.txt",delimiter=None)
irisXv = np.genfromtxt("data/X_test.txt",delimiter=None)

Xtr,Ytr = irisXt,irisYt


In [None]:
print("Random Forest")

class randomForest(ml.base.classifier):

    def __init__(self, learners):
        self.learners=learners
        self.classes=learners[0].classes

    def predictSoft(self,X):
        ysoft = np.zeros((X.shape[0],len(self.classes)));
        for i in range(len(self.learners)): ysoft+=self.learners[i].predictSoft(X);
        return ysoft/len(self.learners);

#*********************
# Initialize Learner *
#*********************

numEnsemble = 500
ensemble = [ None ] * numEnsemble
for j in range(numEnsemble):
    Xb,Yb = ml.bootstrapData(Xtr,Ytr, n_boot=66000)
    ensemble[j] = ml.dtree.treeClassify(Xb, Yb, maxDepth=50, minLeaf=4, nFeatures=4)

    
rf = randomForest(ensemble)
# rfAUC = rf.auc(Xva, Yva) - 0.02
# print(rfAUC)

#******************
# Make Prediction *
#******************
yPredictRF = rf.predictSoft(irisXv)[:, 1]

np.savetxt('Yhat_dtree_bags.txt',
np.vstack( (np.arange(len(yPredictRF)) , yPredictRF) ).T,'%d, %.2f',header='ID,Prob1',comments='',delimiter=',');
print("Finish Saving Random Forest Result")

In [None]:
print("Gradeint Boosting")

#*********************
# Initialize Learner *
#*********************

learner = GradientBoostingClassifier(learning_rate=0.02, n_estimators=3000, max_depth=9, min_samples_leaf=32, max_features="log2" )

learner.fit(Xtr, Ytr)

# gbAUC = learner.score(Xva, Yva)
# print(gbAUC)
#******************
# Make Prediction *
#******************

yPredictGB = learner.predict_proba(irisXv)[:,1]

np.savetxt('Yhat_gradient_boost.txt',
np.vstack( (np.arange(len(yPredictGB)) , yPredictGB) ).T,'%d, %.2f',header='ID,Prob1',comments='',delimiter=',');
print("Finish Saving Gradient Boost Result")

In [None]:
print("AdaBoost")

#*********************
# Initialize Learner *
#*********************

learner =  AdaBoostClassifier(n_estimators=2500, learning_rate = 0.005, algorithm='SAMME.R',
                              base_estimator=DecisionTreeClassifier(max_depth=12, min_samples_leaf=8, max_features="log2"))

learner.fit(Xtr, Ytr)

# abAUC = learner.score(Xva, Yva)
# print(abAUC)

#******************
# Make Prediction *
#******************
yPredictAB = learner.predict_proba(irisXv)[:,1]

np.savetxt('Yhat_adaboost.txt',
np.vstack( (np.arange(len(yPredictAB)) , yPredictAB) ).T,'%d, %.2f',header='ID,Prob1',comments='',delimiter=',');
print("Finish Saving adaBoost Result")


In [None]:
yPredict = 0.2 * yPredictRF + 0.8 * yPredictGB
np.savetxt('Yhat_final.txt',
np.vstack( (np.arange(len(yPredict)) , yPredict) ).T,'%d, %.2f',header='ID,Prob1',comments='',delimiter=',');
print("Finish Saving final Result")