# Libraries Imported

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn import metrics


# Training Data

In [2]:
train = pd.read_csv('dataset/train.csv')

X_train = train.copy()
y_train = X_train.pop('target')

std = StandardScaler()
std.fit(X_train)
X_train = pd.DataFrame(std.transform(X_train), columns=X_train.columns)

# Testing Data

In [3]:
test = pd.read_csv('dataset/test.csv')
X_test = test.copy()
y_test = X_test.pop('target')

std = StandardScaler()
std.fit(X_test)
X_test = pd.DataFrame(std.transform(X_test), columns=X_test.columns)

# Random Forest GridSearch

In [4]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)

param_grid = { 
    'n_estimators': [400, 500],
    'max_features': ['auto'],
    'max_depth' : [6],
    'criterion' :['entropy']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy'], 'max_depth': [6],
                         'max_features': ['auto'], 'n_estimators': [400, 500]})

# Random Forest Best Parameters

In [5]:
CV_rfc.best_params_

{'criterion': 'entropy',
 'max_depth': 6,
 'max_features': 'auto',
 'n_estimators': 500}

# Random Forest Classifier w/ Results

In [6]:
rfc = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 500, max_depth=6, criterion='entropy')

rfc = rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
rfc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.769231  0.945946  0.848485   74.000000
normal         0.826087  0.475000  0.603175   40.000000
accuracy       0.780702  0.780702  0.780702    0.780702
macro avg      0.797659  0.710473  0.725830  114.000000
weighted avg   0.789180  0.780702  0.762411  114.000000


# Decision Tree GridSearch

In [7]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

 0.51813776 0.55557286 0.51813776 0.55557286 0.51074766 0.57038768
 0.56123226 0.53123918 0.56123226 0.53123918 0.5778297  0.54814815
        nan        nan        nan        nan        nan        nan
 0.51813776 0.55557286 0.51813776 0.55557286 0.54624438 0.57038768
 0.56678782 0.53123918 0.56678782 0.53123918 0.58319488 0.56851852]


GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [0, 1, 2],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']})

# Decision Tree Best Parameters

In [8]:
CV_dtc.best_params_

{'criterion': 'entropy',
 'max_depth': 2,
 'max_features': 'log2',
 'splitter': 'best'}

# Decision Tree Classifier w/ Results

In [9]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
dtc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.711864  0.567568  0.631579   74.000000
normal         0.418182  0.575000  0.484211   40.000000
accuracy       0.570175  0.570175  0.570175    0.570175
macro avg      0.565023  0.571284  0.557895  114.000000
weighted avg   0.608818  0.570175  0.579871  114.000000


# SVC GridSearch

In [10]:
from sklearn import svm

clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(random_state=42),
             param_grid={'cache_size': [100, 200, 300], 'coef0': [0, 0.5, 1, 2],
                         'degree': [2, 3, 4], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                         'tol': [0.01, 0.001, 0.0001]})

# SVC Best Parameters

In [11]:
CV_svc.best_params_

{'cache_size': 100,
 'coef0': 2,
 'degree': 4,
 'gamma': 'scale',
 'kernel': 'poly',
 'tol': 0.01}

# SVC Classifier w/ Results

In [12]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
clf.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.779412  0.716216  0.746479   74.000000
normal         0.543478  0.625000  0.581395   40.000000
accuracy       0.684211  0.684211  0.684211    0.684211
macro avg      0.661445  0.670608  0.663937  114.000000
weighted avg   0.696628  0.684211  0.688555  114.000000


# KNN Grid Search


In [13]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_knnc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'metric': ['minkowski', 'euclidean'],
                         'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15],
                         'weights': ['uniform', 'distance']})

# KNN Best Parameters

In [14]:
CV_knnc.best_params_

{'algorithm': 'auto',
 'metric': 'minkowski',
 'n_neighbors': 2,
 'weights': 'uniform'}

# KNN Classifier w/ Results

In [15]:
knnc = KNeighborsClassifier(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train,y_train)
y_pred = knnc.predict(X_test)
knnc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.687500  0.891892  0.776471   74.000000
normal         0.555556  0.250000  0.344828   40.000000
accuracy       0.666667  0.666667  0.666667    0.666667
macro avg      0.621528  0.570946  0.560649  114.000000
weighted avg   0.641204  0.666667  0.625017  114.000000


# ANN Grid Search

# ANN Best Parameters

# ANN Classifier w/ Results

# Naive Bayes Grid Search


In [28]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, 
                                    n_repeats=3, 
                                    random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train, y_train)

GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=10, n_splits=10, random_state=900),
             estimator=GaussianNB(),
             param_grid={'var_smoothing': array([1.00000000e+00, 8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
       4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
       1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
       8.11130831e-02, 6.57933225e-02...
       1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
       5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
       2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
       1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
       4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
       1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-09])},
             scoring='accuracy')

In [23]:

sklearn.metrics.SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

# Naive Bayes Best Parameters

In [29]:
CV_nbc.best_params_

{'var_smoothing': 0.0657933224657568}

# Naive Bayes Classifier w/ Results

In [18]:
nbc = GaussianNB(var_smoothing=0.01873817422860384)
nbc = nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)
nbc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.647059  0.445946  0.528000   74.000000
normal         0.349206  0.550000  0.427184   40.000000
accuracy       0.482456  0.482456  0.482456    0.482456
macro avg      0.498133  0.497973  0.477592  114.000000
weighted avg   0.542549  0.482456  0.492626  114.000000


In [30]:
nbc = GaussianNB(var_smoothing=0.0657933224657568)
nbc = nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)
nbc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.641509  0.459459  0.535433   74.000000
normal         0.344262  0.525000  0.415842   40.000000
accuracy       0.482456  0.482456  0.482456    0.482456
macro avg      0.492886  0.492230  0.475637  114.000000
weighted avg   0.537212  0.482456  0.493471  114.000000
