In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [5]:
X, y = make_moons(n_samples = 500, noise=0.3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

In [6]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators = [('lr', log_clf),('rf', rnd_clf),('svm', svm_clf)],
    voting = 'hard'
)

In [7]:
voting_clf.fit(X_train, y_train)



VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)), ('rf', RandomFo...f', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=None, voting='hard', weights=None)

In [8]:
from sklearn.metrics import accuracy_score

In [9]:
for clf in (log_clf, svm_clf, rnd_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_predict = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_predict))

LogisticRegression 0.864
SVC 0.888
RandomForestClassifier 0.912
VotingClassifier 0.888




In [10]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

voting_clf = VotingClassifier(
    estimators = [('lr', log_clf),('rf', rnd_clf),('svm', svm_clf)],
    voting = 'soft'
)

for clf in (log_clf, svm_clf, rnd_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_predict = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_predict))


LogisticRegression 0.864
SVC 0.888
RandomForestClassifier 0.888
VotingClassifier 0.92




In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier


In [12]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=True, n_jobs=-1)
bag_clf.fit(X_train, y_train)
y_predict = bag_clf.predict(X_test)
print(bag_clf.__class__.__name__, accuracy_score(y_test, y_predict))

BaggingClassifier 0.912


In [18]:
bag_clf2 = BaggingClassifier(DecisionTreeClassifier(),n_estimators=500, bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf2.fit(X_train, y_train)
y_predict = bag_clf2.predict(X_test)
print(bag_clf2.oob_score_)
print(bag_clf2.__class__.__name__, accuracy_score(y_test, y_predict))

0.8986666666666666
BaggingClassifier 0.912


In [20]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators = 500, n_jobs=-1, max_leaf_nodes=16 )
rnd_clf.fit(X_train, y_train)
y_predict = rnd_clf.predict(X_test)
print(rnd_clf.__class__.__name__, accuracy_score(y_test, y_predict))

RandomForestClassifier 0.912


In [22]:
bag_clf3 = BaggingClassifier(
        DecisionTreeClassifier(splitter='random', max_leaf_nodes=16), n_estimators=500, n_jobs=-1, max_samples=1.0, bootstrap=True)
bag_clf3.fit(X_train, y_train)
y_predict = bag_clf3.predict(X_test)
print(bag_clf3.__class__.__name__, accuracy_score(y_test, y_predict))

BaggingClassifier 0.912


In [23]:
from sklearn.ensemble import AdaBoostClassifier
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm='SAMME.R', learning_rate=0.5)
ada_clf.fit(X_train, y_train)
y_predict = ada_clf.predict(X_test)
print(ada_clf.__class__.__name__, accuracy_score(y_test, y_predict))

AdaBoostClassifier 0.896


In [24]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
gbrt.fit(X_train, y_train)
y_predict = gbrt.predict(X_test)
print(mean_squared_error(y_test, y_predict))

0.08239669753072186


In [29]:
import numpy as np
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y)

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=12)
gbrt.fit(X_train, y_train)
errors = [mean_squared_error(y_val,y_pred) for y_pred in gbrt.staged_predict(X_val)]
print(errors)
err_n = np.argmin(errors)

gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=err_n)
gbrt_best.fit(X_train, y_train)
y_predict = gbrt_best.predict(X_val)
print(mean_squared_error(y_val, y_predict))


[0.22535136085701532, 0.20464249946397559, 0.18619003481916896, 0.1729699538388438, 0.16275993123567267, 0.1528150048606917, 0.1465039859755851, 0.14006856124133513, 0.13630300630792605, 0.13219457560859418, 0.13004174699639753, 0.1278676956951201]
0.13004174699639756


In [32]:
grbt = GradientBoostingRegressor(max_depth=2, warm_start=True)

min_err = float('inf')
going = 0
for n_estimators in range(1,1000):
    grbt.n_estimators = n_estimators
    grbt.fit(X_train, y_train)
    y_predict = grbt.predict(X_test)
    error = mean_squared_error(y_test, y_predict)
    if error < min_err:
        min_err = error
        going = 0
    else:
        going += 1
        if going > 5:
            break;
print(grbt.n_estimators)
# y_p = grbt
    
    

359
