In [1]:
import numpy as np
np.random.seed(0)
import mltools as ml
import matplotlib.pyplot as plt 

In [2]:
X = np.genfromtxt('data/X_train.txt', delimiter=',')
Y = np.genfromtxt('data/Y_train.txt', delimiter=',')
X,Y = ml.shuffleData(X,Y)

In [3]:
Xtr, Ytr = X[:3710,:], Y[:3710]
Xva, Yva = X[3710:,:], Y[3710:]

In [4]:
class BaggedTree(ml.base.classifier):
    def __init__(self, learners):
        """Constructs a BaggedTree class with a set of learners. """
        self.learners = learners
    
    def predictSoft(self, X):
        """Predicts the probabilities with each bagged learner and average over the results. """
        n_bags = len(self.learners)
        preds = [self.learners[l].predictSoft(X) for l in range(n_bags)]
        return np.mean(preds, axis=0)

In [12]:

a = []
b = [10,20,40,80]
for i in b:
    bags = []
    for l in range(i):
        Xi, Yi = ml.bootstrapData(Xtr, Ytr, Xtr.shape[0])
        tree = ml.dtree.treeClassify(Xi, Yi, maxDepth = 20, nFeatures=20)
        bags.append(tree)
    bt = BaggedTree(bags)
    bt.classes = np.unique(Ytr)
    a.append(bt)

In [13]:

for i in a:
    print(i.err(Xva,Yva))

0.37462967950444387
0.3528144357662268
0.34527336385671964
0.33638567196337193


In [14]:
bags = []
for i in range(80):
    Xi, Yi = ml.bootstrapData(X, Y, X.shape[0])
    tree = ml.dtree.treeClassify(Xi, Yi, maxDepth = 20, nFeatures=20)
    bags.append(tree)
bt = BaggedTree(bags)
bt.classes = np.unique(Y)

In [17]:
Xte = np.genfromtxt('data/X_test.txt', delimiter=',')
x1 = bt.predictSoft(Xte)[:,1]
print(x1)

[0.34262116 0.20234897 0.41517511 ... 0.4291518  0.85459643 0.52882896]


In [18]:
M = X.shape[0]
nEns = 100
en = [None]*nEns 
YHat = np.zeros((M,nEns)) 
def sigma(z): 
    return np.exp(-z)/(1.+np.exp(-z))
f = np.zeros(Y.shape)
alpha = 0.5
for l in range(nEns): # this is a lot faster than the bagging loop:
    dJ = 1.*Y - sigma(f)
    en[l] = ml.dtree.treeRegress(X,dJ, maxDepth=3) # train and save learner
    f -= alpha*en[l].predict(X) 

In [19]:
a = np.zeros((Xte.shape[0],39)) 
for l in range(39):
    a[:,l] = -alpha*en[l].predict(Xte)
preds = sigma(a.sum(axis=1))

In [21]:
x2 = preds
print(x2)

[0.35828508 0.20386771 0.5047032  ... 0.47998709 0.9209455  0.4204067 ]


In [26]:
c = (0.2*x1+0.8*x2)
print(c)

[0.3551523  0.20356396 0.48679758 ... 0.46982003 0.90767569 0.44209116]


In [27]:
Yte = np.vstack((np.arange(Xte.shape[0]), c)).T
np.savetxt('Y_submit.txt',Yte,'%d, %.2f',header='ID,Predicted',comments='',delimiter=',')