In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, f1_score

import numpy as np

In [None]:
def runBaggingClassifier(i, j):
    TRAIN_PATH = f'all_data/train_c{str(i)}_d{str(j)}.csv'
    TEST_PATH = f'all_data/test_c{str(i)}_d{str(j)}.csv'
    VAL_PATH = f'all_data/valid_c{str(i)}_d{str(j)}.csv'

    train_data = np.genfromtxt(TRAIN_PATH, delimiter = ',')
    val_data = np.genfromtxt(VAL_PATH, delimiter = ',')
    test_data = np.genfromtxt(TEST_PATH, delimiter = ',')

    X_train = train_data[:, :-1]
    y_train = train_data[:, -1]

    X_val = val_data[:, :-1]
    y_val = val_data[:, -1]

    X_test = test_data[:, :-1]
    y_test = test_data[:, -1]

    param_grid = {
    'n_estimators': [10, 20, 30, 40],
    'max_samples': [0.5, 0.7, 0.9],
    'max_features': [0.5, 0.7, 0.9]
    }

    dt = DecisionTreeClassifier()

    clf = BaggingClassifier(dt, bootstrap=True, oob_score=True, n_jobs=-1)

    grid_search = GridSearchCV(clf, param_grid=param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X_val, y_val)

    print('Best Parameters found -', grid_search.best_params_)

    X_trainval = np.concatenate((X_train, X_val), axis = 0)
    y_trainval = np.concatenate((y_train, y_val), axis = 0)
    
    best_params = grid_search.best_params_

    dt = DecisionTreeClassifier()

    clf = BaggingClassifier(
    dt, n_estimators=best_params['n_estimators'],
    max_samples=best_params['max_samples'],
    max_features=best_params['max_features']
    )

    clf.fit(X_trainval, y_trainval)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print('Test set accuracy =', accuracy)
    print('Test set f1 score =', f1)

In [None]:
runBaggingClassifier(300, 100)

Best Parameters found - {'max_features': 0.7, 'max_samples': 0.7, 'n_estimators': 40}
Test set accuracy = 0.785
Test set f1 score = 0.7724867724867724


In [None]:
runBaggingClassifier(300, 1000)

Best Parameters found - {'max_features': 0.7, 'max_samples': 0.9, 'n_estimators': 40}
Test set accuracy = 0.8435
Test set f1 score = 0.8429503261414952


In [None]:
runBaggingClassifier(300, 5000)



Best Parameters found - {'max_features': 0.7, 'max_samples': 0.9, 'n_estimators': 40}
Test set accuracy = 0.9233
Test set f1 score = 0.9238860772055175


In [None]:
runBaggingClassifier(500, 100)

Best Parameters found - {'max_features': 0.9, 'max_samples': 0.5, 'n_estimators': 40}
Test set accuracy = 0.815
Test set f1 score = 0.8159203980099502


In [None]:
runBaggingClassifier(500, 1000)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.9, 'n_estimators': 40}
Test set accuracy = 0.9085
Test set f1 score = 0.9064895247828308


In [None]:
runBaggingClassifier(500, 5000)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.9, 'n_estimators': 40}
Test set accuracy = 0.9438
Test set f1 score = 0.9437775110044017


In [None]:
runBaggingClassifier(1000, 100)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.7, 'n_estimators': 40}
Test set accuracy = 0.965
Test set f1 score = 0.964102564102564


In [None]:
runBaggingClassifier(1000, 1000)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.5, 'n_estimators': 40}
Test set accuracy = 0.9615
Test set f1 score = 0.961824491819534


In [None]:
runBaggingClassifier(1000, 5000)



Best Parameters found - {'max_features': 0.5, 'max_samples': 0.7, 'n_estimators': 40}
Test set accuracy = 0.9814
Test set f1 score = 0.9813776531838205


In [None]:
runBaggingClassifier(1500, 100)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.9, 'n_estimators': 40}
Test set accuracy = 0.995
Test set f1 score = 0.9950248756218906


In [None]:
runBaggingClassifier(1500, 1000)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.7, 'n_estimators': 40}
Test set accuracy = 0.997
Test set f1 score = 0.997002997002997


In [None]:
runBaggingClassifier(1500, 5000)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.7, 'n_estimators': 40}
Test set accuracy = 0.9981
Test set f1 score = 0.9981001899810018


In [None]:
runBaggingClassifier(1800, 100)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.5, 'n_estimators': 30}
Test set accuracy = 0.985
Test set f1 score = 0.9850746268656716


In [None]:
runBaggingClassifier(1800, 1000)

Best Parameters found - {'max_features': 0.5, 'max_samples': 0.9, 'n_estimators': 40}
Test set accuracy = 0.9995
Test set f1 score = 0.9995002498750626


In [None]:
runBaggingClassifier(1800, 5000)



Best Parameters found - {'max_features': 0.5, 'max_samples': 0.9, 'n_estimators': 20}
Test set accuracy = 0.9997
Test set f1 score = 0.9996999699969997
