
#### Load the dataset.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors  import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble  import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

df = pd.read_csv('../dataset/preprocessed-sam-dataset.csv', sep='|',
                 dtype = {'CZ': 'float32', 'FZ': 'float32', 'Fp1': 'float32', 'F3': 'float32',
                          'FC1': 'float32', 'FC5': 'float32', 'FT9': 'float32', 'T7': 'float32',
                          'CP5': 'float32', 'P3': 'float32', 'P7': 'float32', 'PO9': 'float32',
                          'PZ': 'float32', 'O2': 'float32', 'P4': 'float32', 'CP6': 'float32',
                          'FT10': 'float32', 'FC6': 'float32', 'F8': 'float32', 'Fp2': 'float32',
                          'Scale': 'int8'})


#### Display the dataset size.

In [2]:
df.shape

(140800, 21)

#### GridSearch to evaluate KNN hyperparameters.

In [3]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

model = KNeighborsClassifier(n_jobs = 4)

params = {
    'n_neighbors': [3, 7, 11, 21],
    'p': [1, 2],
    'leaf_size' : [1, 5]
}
strat_k_fold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(model, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)


Fitting 10 folds for each of 16 candidates, totalling 160 fits
{'leaf_size': 1, 'n_neighbors': 21, 'p': 2}


#### GridSearch to evaluate Decision Tree hyperparameters.

In [10]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

model = DecisionTreeClassifier(random_state = 42)

params = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 7, 21, 51, 71]
}
strat_k_fold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(model, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)

Fitting 10 folds for each of 10 candidates, totalling 100 fits
{'criterion': 'entropy', 'max_depth': 7}


#### GridSearch to evaluate Logistic Regression hyperparameters.

In [8]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

model = LogisticRegression(random_state = 42, n_jobs = 4)

params = {
    'solver' : ['newton-cg', 'lbfgs', 'saga'],
    'multi_class' : ['ovr', 'multinomial']
}
strat_k_fold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(model, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)

Fitting 10 folds for each of 6 candidates, totalling 60 fits
{'multi_class': 'ovr', 'solver': 'newton-cg'}


#### GridSearch to evaluate Random Forest hyperparameters.

In [11]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

model = RandomForestClassifier(random_state = 42, n_jobs = 4)

params = {
    'n_estimators': [100, 300],
    'max_depth': [7, 21, 41, 71],
    'criterion': ['gini', 'entropy']
}
strat_k_fold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(model, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)

Fitting 10 folds for each of 16 candidates, totalling 160 fits
{'criterion': 'gini', 'max_depth': 21, 'n_estimators': 300}


#### GridSearch to evaluate Gradient Boosting hyperparameters.

In [None]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

model = GradientBoostingClassifier(random_state = 42)

params = {
    'n_estimators': [100, 300],
    'max_depth': [3, 7, 21],
    'learning_rate': [0.01, 0.1, 0.2, 0.3, 1]
}
strat_k_fold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(model, cv = strat_k_fold, param_grid = params, n_jobs = 2, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)

Fitting 10 folds for each of 30 candidates, totalling 300 fits


#### GridSearch to evaluate XGBoost hyperparameters.

In [3]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

model = XGBClassifier(tree_method = 'gpu_hist')

params = {
    'max_depth': [3, 7, 21, 51]
}
strat_k_fold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(model, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)

Fitting 10 folds for each of 6 candidates, totalling 60 fits


30 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\DeveloperTools\anaconda3\envs\opencv\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\DeveloperTools\anaconda3\envs\opencv\lib\site-packages\xgboost\core.py", line 620, in inner_f
    return func(**kwargs)
  File "C:\DeveloperTools\anaconda3\envs\opencv\lib\site-packages\xgboost\sklearn.py", line 1516, in fit
    self._Booster = train(
  File "C:\DeveloperTools\anaconda3\envs\opencv\lib\site-packages\xgboost\core.py", line 620, in inner_f
    return func(**kwargs)
  File "

{'max_depth': 7}
