# Libraries Imported

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

# Training Data

In [2]:
train = pd.read_csv('dataset/train.csv')

X_train = train.copy()
y_train = X_train.pop('target')

std = StandardScaler()
std.fit(X_train)
X_train = pd.DataFrame(std.transform(X_train), columns=X_train.columns)

# Testing Data

In [3]:
test = pd.read_csv('dataset/test.csv')
X_test = test.copy()
y_test = X_test.pop('target')

std = StandardScaler()
std.fit(X_test)
X_test = pd.DataFrame(std.transform(X_test), columns=X_test.columns)

# Random Forest GridSearch

In [4]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)

param_grid = { 
    'n_estimators': [400, 500],
    'max_features': ['auto'],
    'max_depth' : [6],
    'criterion' :['entropy']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy'], 'max_depth': [6],
                         'max_features': ['auto'], 'n_estimators': [400, 500]})

# Random Forest Best Parameters

In [5]:
CV_rfc.best_params_

{'criterion': 'entropy',
 'max_depth': 6,
 'max_features': 'auto',
 'n_estimators': 500}

# Random Forest Classifier w/ Results

In [6]:
rfc = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 500, max_depth=6, criterion='entropy')

rfc = rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
rfc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.769231  0.945946  0.848485   74.000000
normal         0.826087  0.475000  0.603175   40.000000
accuracy       0.780702  0.780702  0.780702    0.780702
macro avg      0.797659  0.710473  0.725830  114.000000
weighted avg   0.789180  0.780702  0.762411  114.000000


# Decision Tree GridSearch

In [7]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero

Traceback (most recent call last):
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\lenovo\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero

 0.51813776 0.55557286 0.51813776 0.55557286 0.51074766 0.57038768
 0.56123226 0.53123918 0.56123226 0.53123918 0.5778297  0.54814815
        nan        nan        nan        nan        nan        nan
 0.51813776 0.55557286 0.51813776 0.55557286 0.54624438 0.57038768
 0.56678782 0.53123918 0.56678782 0.53123918 0.58319488 0.56851852]


GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [0, 1, 2],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']})

# Decision Tree Best Parameters

In [8]:
CV_dtc.best_params_

{'criterion': 'entropy',
 'max_depth': 2,
 'max_features': 'log2',
 'splitter': 'best'}

# Decision Tree Classifier w/ Results

In [9]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
dtc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.711864  0.567568  0.631579   74.000000
normal         0.418182  0.575000  0.484211   40.000000
accuracy       0.570175  0.570175  0.570175    0.570175
macro avg      0.565023  0.571284  0.557895  114.000000
weighted avg   0.608818  0.570175  0.579871  114.000000


# SVC GridSearch

In [16]:
from sklearn import svm

clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(random_state=42),
             param_grid={'cache_size': [100, 200, 300], 'coef0': [0, 0.5, 1, 2],
                         'degree': [2, 3, 4], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                         'tol': [0.01, 0.001, 0.0001]})

# SVC Best Parameters

In [17]:
CV_svc.best_params_

{'cache_size': 100,
 'coef0': 2,
 'degree': 4,
 'gamma': 'scale',
 'kernel': 'poly',
 'tol': 0.01}

# SVC Classifier w/ Results

In [19]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
clf.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.779412  0.716216  0.746479   74.000000
normal         0.543478  0.625000  0.581395   40.000000
accuracy       0.684211  0.684211  0.684211    0.684211
macro avg      0.661445  0.670608  0.663937  114.000000
weighted avg   0.696628  0.684211  0.688555  114.000000
