In [89]:
import sklearn.datasets as ds
from sklearn.model_selection import cross_val_score as cv_score
import sklearn.tree as tr
from sklearn.ensemble import BaggingClassifier as bc
from sklearn.ensemble import RandomForestClassifier as rf
import math

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [90]:
df = ds.load_digits()

In [91]:
X = df['data']
y = df['target']

In [37]:
def quality(model):
    return cv_score(model, X, y, cv=10, n_jobs=-2).mean()

In [60]:
def answer(question, answer):
    print(answer)
    with open(question, "w") as f:
        f.write(answer)

In [77]:
%%time
ans1 = quality(tr.DecisionTreeClassifier())

CPU times: user 54.6 ms, sys: 3.98 ms, total: 58.6 ms
Wall time: 89.3 ms


In [61]:
answer('1.txt', str(ans1))

0.8286013766652399


In [76]:
%%time
ans2 = quality(bc(n_estimators=100, n_jobs=-2))

CPU times: user 70.9 ms, sys: 8.52 ms, total: 79.4 ms
Wall time: 2.72 s


In [62]:
answer('2.txt', str(ans2))

0.922193838474484


In [75]:
%%time
ans3 = quality(bc(n_estimators=100, max_features=int(math.sqrt(X.shape[1])), n_jobs=-2))

CPU times: user 70.3 ms, sys: 3.83 ms, total: 74.1 ms
Wall time: 1.01 s


In [64]:
answer('3.txt', str(ans3))

0.9365589185725612


In [74]:
%%time
ans4 = quality(bc(base_estimator=tr.DecisionTreeClassifier(max_features='sqrt'), n_estimators=100, n_jobs=-2))

CPU times: user 51.1 ms, sys: 31.8 ms, total: 82.9 ms
Wall time: 1.18 s


In [65]:
answer('4.txt', str(ans4))

0.9505601646228452


In [84]:
%%time
n_tree = range(5, 101, 5)
for n in n_tree:
    print(f"n = {n}:", quality(rf(n_estimators=n, n_jobs=-2)))

n = 5: 0.8831373715201598
n = 10: 0.9193089281297551
n = 15: 0.933758322211361
n = 20: 0.9422531850820162
n = 25: 0.9461248619015784
n = 30: 0.9460792380717743
n = 35: 0.9438127938649524
n = 40: 0.951027112188733
n = 45: 0.9494390156957454
n = 50: 0.9489175855536448
n = 55: 0.9544595907732283
n = 60: 0.9539006264703683
n = 65: 0.9483275990639367
n = 70: 0.9494342696852485
n = 75: 0.9499957728121899
n = 80: 0.9533762523357006
n = 85: 0.9516262282978
n = 90: 0.9483213568666832
n = 95: 0.9466345895304901
n = 100: 0.9522613822650812
CPU times: user 2.34 s, sys: 230 ms, total: 2.57 s
Wall time: 16.4 s


In [82]:
%%time
n_features = [5, 10, 40, 50, 60]
for n in n_features:
    print(f"n = {n}:", quality(rf(n_estimators=100, max_features=n, n_jobs=-2)))

n = 5: 0.9522207852793263
n = 10: 0.9494239990759551
n = 40: 0.941640778716829
n = 50: 0.9344188755827109
n = 60: 0.929944572419599
CPU times: user 512 ms, sys: 39.7 ms, total: 552 ms
Wall time: 8.22 s


In [87]:
%%time
max_depths = [None, 5, 6]
for n in max_depths:
    print(f"n = {n}:", quality(rf(n_estimators=100, max_depth=n, n_jobs=-2)))

n = None: 0.9572250195748229
n = 5: 0.9125941004721698
n = 6: 0.925433463735132
CPU times: user 297 ms, sys: 31.6 ms, total: 328 ms
Wall time: 2.92 s


In [88]:
answer('5.txt', '2 3 4 7')

2 3 4 7
