In [1]:
import numpy as np
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
from sklearn.datasets import make_gaussian_quantiles
from sklearn import ensemble

In [4]:
X,Y = make_classification(n_samples = 10000, n_features = 12, 
                     n_informative = 10, n_classes = 5, random_state = 0 )
print(X.shape)
print(Y.shape)

(10000, 12)
(10000,)


In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.3)

In [7]:
score = np.array([])
for j in np.arange(20) + 2:
    mod_reg = tree.DecisionTreeClassifier(max_depth = j)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.3940098381622313
j-> 3 Mean-> 0.4485724697769314
j-> 4 Mean-> 0.49014219306690654
j-> 5 Mean-> 0.5251411108690611
j-> 6 Mean-> 0.5544099339671922
j-> 7 Mean-> 0.5854243866959938
j-> 8 Mean-> 0.6122720275281148
j-> 9 Mean-> 0.6322778683513348
j-> 10 Mean-> 0.6356998950596029
j-> 11 Mean-> 0.6425607705623447
j-> 12 Mean-> 0.640419339953011
j-> 13 Mean-> 0.6369882996142036
j-> 14 Mean-> 0.6399848293195973
j-> 15 Mean-> 0.6387065100524938
j-> 16 Mean-> 0.635843980801529
j-> 17 Mean-> 0.6379882996142034
j-> 18 Mean-> 0.6358540368087813
j-> 19 Mean-> 0.6337070687195632
j-> 20 Mean-> 0.6357023522577412
j-> 21 Mean-> 0.6287051994687393


11

In [8]:
mod_reg = tree.DecisionTreeClassifier(max_depth = param)
mod_reg_fit = mod_reg.fit(X_train,Y_train)
error = mod_reg_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)

#mod = tree.DecisionTreeRegressor(max_depth = 3)
#mod.fit(X_train, Y_train)
#plt.figure(figsize = (12,12))
#tree.plot_tree(mod)
#plt.show()  

11
Error - >  0.659


In [15]:
Y_predict = mod_reg_fit.predict(X_train)
print(classification_report(Y_train, Y_predict))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1409
           1       0.92      0.90      0.91      1409
           2       0.86      0.88      0.87      1394
           3       0.90      0.85      0.87      1399
           4       0.90      0.88      0.89      1389

    accuracy                           0.89      7000
   macro avg       0.89      0.89      0.89      7000
weighted avg       0.89      0.89      0.89      7000



In [16]:
Y_predict = mod_reg_fit.predict(X_test)
print(classification_report(Y_test, Y_predict))

              precision    recall  f1-score   support

           0       0.65      0.73      0.69       586
           1       0.68      0.64      0.66       598
           2       0.64      0.62      0.63       614
           3       0.66      0.64      0.65       592
           4       0.66      0.67      0.67       610

    accuracy                           0.66      3000
   macro avg       0.66      0.66      0.66      3000
weighted avg       0.66      0.66      0.66      3000



In [17]:
score = np.array([])
for j in np.arange(8) + 2:
    mod_reg = ensemble.GradientBoostingClassifier(max_depth = j)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.675305917654582
j-> 3 Mean-> 0.7420147314767316
j-> 4 Mean-> 0.7853088082003425
j-> 5 Mean-> 0.8067253852694602
j-> 6 Mean-> 0.8157235804351746
j-> 7 Mean-> 0.8220081071141625
j-> 8 Mean-> 0.826296290310113
j-> 9 Mean-> 0.8261503528855096


8

In [19]:
mod_reg_boost = ensemble.GradientBoostingClassifier(max_depth = param)
mod_reg_boost_fit = mod_reg_boost.fit(X_train,Y_train)
error = mod_reg_boost_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)

8
Error - >  0.8396666666666667


In [22]:
Y_predict = mod_reg_boost_fit.predict(X_train)
print(classification_report(Y_train, Y_predict))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1409
           1       1.00      1.00      1.00      1409
           2       1.00      1.00      1.00      1394
           3       1.00      1.00      1.00      1399
           4       1.00      1.00      1.00      1389

    accuracy                           1.00      7000
   macro avg       1.00      1.00      1.00      7000
weighted avg       1.00      1.00      1.00      7000



In [23]:
Y_predict = mod_reg_boost_fit.predict(X_test)
print(classification_report(Y_test, Y_predict))

              precision    recall  f1-score   support

           0       0.84      0.85      0.85       586
           1       0.85      0.87      0.86       598
           2       0.86      0.82      0.84       614
           3       0.81      0.82      0.82       592
           4       0.84      0.84      0.84       610

    accuracy                           0.84      3000
   macro avg       0.84      0.84      0.84      3000
weighted avg       0.84      0.84      0.84      3000



In [30]:
score = np.array([])
for j in np.arange(20) + 2:
    mod_reg = ensemble.RandomForestClassifier(max_depth = j,
                                             max_features = 4,
                                             n_estimators = 200)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.48802804227598856
j-> 3 Mean-> 0.5388736861954186
j-> 4 Mean-> 0.5957394596974999
j-> 5 Mean-> 0.6435935351562042
j-> 6 Mean-> 0.6907367951952289
j-> 7 Mean-> 0.7194372582915797
j-> 8 Mean-> 0.7474316458542687
j-> 9 Mean-> 0.7688588785900508
j-> 10 Mean-> 0.7834328229201071
j-> 11 Mean-> 0.7932945135848624
j-> 12 Mean-> 0.8023003017036417
j-> 13 Mean-> 0.8068651679866201
j-> 14 Mean-> 0.8090135743214596
j-> 15 Mean-> 0.8115735437763083
j-> 16 Mean-> 0.814720308075023
j-> 17 Mean-> 0.8142933728265275
j-> 18 Mean-> 0.8167201101405686
j-> 19 Mean-> 0.8135749761658808


KeyboardInterrupt: 

In [27]:
mod_reg_rf = ensemble.RandomForestClassifier(max_depth = param,
                                             max_features = 4,
                                             n_estimators = 100)
mod_reg_rf_fit = mod_reg_rf.fit(X_train,Y_train)
error = mod_reg_rf_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)

20
Error - >  0.823


In [28]:
Y_predict = mod_reg_rf_fit.predict(X_train)
print(classification_report(Y_train, Y_predict))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1409
           1       1.00      1.00      1.00      1409
           2       1.00      1.00      1.00      1394
           3       1.00      1.00      1.00      1399
           4       1.00      1.00      1.00      1389

    accuracy                           1.00      7000
   macro avg       1.00      1.00      1.00      7000
weighted avg       1.00      1.00      1.00      7000



In [29]:
Y_predict = mod_reg_rf_fit.predict(X_test)
print(classification_report(Y_test, Y_predict))

              precision    recall  f1-score   support

           0       0.82      0.86      0.84       586
           1       0.84      0.85      0.85       598
           2       0.83      0.78      0.81       614
           3       0.80      0.78      0.79       592
           4       0.83      0.84      0.83       610

    accuracy                           0.82      3000
   macro avg       0.82      0.82      0.82      3000
weighted avg       0.82      0.82      0.82      3000



In [31]:
score = np.array([])
for j in np.arange(20) + 2:
    mod_reg = ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth = j), 
                                         n_estimators = 100)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.630723634310825
j-> 3 Mean-> 0.6761464187918269
j-> 4 Mean-> 0.6977023827091956
j-> 5 Mean-> 0.7167348685551254
j-> 6 Mean-> 0.74812932498495
j-> 7 Mean-> 0.7771606115120553
j-> 8 Mean-> 0.8008672000356046
j-> 9 Mean-> 0.8167207156560299
j-> 10 Mean-> 0.8225889568971372
j-> 11 Mean-> 0.8249991051957208
j-> 12 Mean-> 0.8347212052217217


KeyboardInterrupt: 

In [None]:
mod_reg_ada = ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth = param), 
                                         n_estimators = 100)
mod_reg_ada_fit = mod_reg_ada.fit(X_train,Y_train)
error = mod_reg_ada_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)