In [78]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix
from prettytable import PrettyTable
from sklearn.decomposition import PCA

In [79]:
df = pd.read_excel('Dry_Bean_Dataset.xlsx', sheet_name='Dry_Beans_Dataset', engine = 'openpyxl')
df.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.27275,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [80]:
print(df.shape)

(13611, 17)


In [81]:
labels = df['Class'].value_counts().index.tolist()
labels

['DERMASON', 'SIRA', 'SEKER', 'HOROZ', 'CALI', 'BARBUNYA', 'BOMBAY']

In [82]:
df_train_val, df_test = train_test_split(df, test_size = 0.2, random_state = 23)


In [83]:
print(df_train_val.shape)
print(df_test.shape)

(10888, 17)
(2723, 17)


In [84]:
X_train_val = df_train_val.drop(['Class'], axis = 1)
y_train_val = df_train_val['Class']
X_test = df_test.drop(['Class'], axis = 1)
y_test = df_test['Class']

In [85]:
pca = PCA(n_components = 2) #mle be auto n_components

---

In [86]:
model = RandomForestClassifier(n_estimators=20, max_depth=50, min_samples_leaf=5)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0

K = 5
kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### Random Forest #######################################################")
print("##### n_estimator = 20, max_depth = 50, min_samples_leaf = 5 ###")
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))


##### Validation ##########################################################
##### 5-Fold ##############################################################
##### Random Forest #######################################################
##### n_estimator = 20, max_depth = 50, min_samples_leaf = 5 ###
+----------+-------------------+--------------------+--------------------+
|  Class   |      Accuracy     |     Precision      |       Recall       |
+----------+-------------------+--------------------+--------------------+
| DERMASON | 0.917156068135314 | 0.9424483219112363 | 0.9028322952090285 |
|   SIRA   | 0.917156068135314 | 0.9976190476190476 | 0.9943661971830986 |
|  SEKER   | 0.917156068135314 | 0.9206450758522854 | 0.9399018428135862 |
|  HOROZ   | 0.917156068135314 | 0.9035473974640891 | 0.9212796620526678 |
|   CALI   | 0.917156068135314 | 0.9558980313869803 | 0.9417347489619317 |
| BARBUNYA | 0.917156068135314 | 0.9395123614914528 | 0.9420067084795605 |
|  BOMBAY  | 0.917156068135314 |

In [87]:
model = RandomForestClassifier(n_estimators=20, max_depth=50, min_samples_leaf=5)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0

K = 2
kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### Random Forest #######################################################")
print("##### n_estimator = 20, max_depth = 50, min_samples_leaf = 5 ##############")
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### Validation ##########################################################
##### 2-Fold ##############################################################
##### Random Forest #######################################################
##### n_estimator = 20, max_depth = 50, min_samples_leaf = 5 ##############
+----------+--------------------+--------------------+--------------------+
|  Class   |      Accuracy      |     Precision      |       Recall       |
+----------+--------------------+--------------------+--------------------+
| DERMASON | 0.9142174871418075 | 0.9369855111051943 | 0.900652612157037  |
|   SIRA   | 0.9142174871418075 |        1.0         |      0.99375       |
|  SEKER   | 0.9142174871418075 | 0.9232110980309433 | 0.937303606164896  |
|  HOROZ   | 0.9142174871418075 | 0.901124690303372  | 0.9188708136476069 |
|   CALI   | 0.9142174871418075 | 0.9590335683063209 | 0.9392664425580042 |
| BARBUNYA | 0.9142174871418075 |  0.93456373703303  | 0.9362608584568217 |
|  BOMBAY  |

In [88]:
K = 5
n_estimator = 5
max_depth = 50
min_samples_leaf = 5

model = RandomForestClassifier(n_estimators=n_estimator, max_depth=max_depth, min_samples_leaf=min_samples_leaf)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0


kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### Random Forest #######################################################")
print("##### n_estimator = %s, max_depth = %s, min_samples_leaf = %s #############" %(n_estimator, max_depth, min_samples_leaf))
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### Validation ##########################################################
##### 5-Fold ##############################################################
##### Random Forest #######################################################
##### n_estimator = 5, max_depth = 50, min_samples_leaf = 5 #############
+----------+--------------------+--------------------+--------------------+
|  Class   |      Accuracy      |     Precision      |       Recall       |
+----------+--------------------+--------------------+--------------------+
| DERMASON | 0.9128395914715703 | 0.9210052446523906 | 0.9017841042435514 |
|   SIRA   | 0.9128395914715703 |        1.0         | 0.9956272401433692 |
|  SEKER   | 0.9128395914715703 | 0.9265334841212391 | 0.9193961402086277 |
|  HOROZ   | 0.9128395914715703 | 0.8992315718171211 | 0.9180603134931873 |
|   CALI   | 0.9128395914715703 | 0.9500359252851315 | 0.9456399744101083 |
| BARBUNYA | 0.9128395914715703 | 0.9371052070189381 | 0.9430715977874156 |
|  BOMBAY  | 0

In [89]:
K = 5
n_estimator = 5
max_depth = 5
min_samples_leaf = 5

model = RandomForestClassifier(n_estimators=n_estimator, max_depth=max_depth, min_samples_leaf=min_samples_leaf)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0


kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### Random Forest #######################################################")
print("##### n_estimator = %s, max_depth = %s, min_samples_leaf = %s #############" %(n_estimator, max_depth, min_samples_leaf))
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### Validation ##########################################################
##### 5-Fold ##############################################################
##### Random Forest #######################################################
##### n_estimator = 5, max_depth = 5, min_samples_leaf = 5 #############
+----------+--------------------+--------------------+--------------------+
|  Class   |      Accuracy      |     Precision      |       Recall       |
+----------+--------------------+--------------------+--------------------+
| DERMASON | 0.8823475073109683 | 0.8625150113216886 | 0.6829030512398309 |
|   SIRA   | 0.8823475073109683 | 0.9931653712141518 | 0.9907991681521093 |
|  SEKER   | 0.8823475073109683 | 0.7688648465131175 | 0.9026491135574115 |
|  HOROZ   | 0.8823475073109683 | 0.8997217988040764 | 0.9189144618008672 |
|   CALI   | 0.8823475073109683 | 0.9792972268851333 | 0.8975493772212608 |
| BARBUNYA | 0.8823475073109683 | 0.9462442455087563 | 0.9232804990384176 |
|  BOMBAY  | 0.

In [90]:
K = 5
n_estimator = 5
max_depth = 50
min_samples_leaf = 1

model = RandomForestClassifier(n_estimators=n_estimator, max_depth=max_depth, min_samples_leaf=min_samples_leaf)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0


kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### Random Forest #######################################################")
print("##### n_estimator = %s, max_depth = %s, min_samples_leaf = %s #############" %(n_estimator, max_depth, min_samples_leaf))
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### Validation ##########################################################
##### 5-Fold ##############################################################
##### Random Forest #######################################################
##### n_estimator = 5, max_depth = 50, min_samples_leaf = 1 #############
+----------+--------------------+--------------------+--------------------+
|  Class   |      Accuracy      |     Precision      |       Recall       |
+----------+--------------------+--------------------+--------------------+
| DERMASON | 0.9114623075453242 | 0.9151196641037966 | 0.9200813820282147 |
|   SIRA   | 0.9114623075453242 |        1.0         | 0.9976190476190476 |
|  SEKER   | 0.9114623075453242 | 0.927139223502398  | 0.9308428501258247 |
|  HOROZ   | 0.9114623075453242 | 0.8982344034709921 | 0.9090153639133691 |
|   CALI   | 0.9114623075453242 | 0.9467700145737032 | 0.9337022983232153 |
| BARBUNYA | 0.9114623075453242 | 0.9391082122342287 | 0.9423461688228583 |
|  BOMBAY  | 0

In [91]:
K = 5
n_estimator = 5
max_depth = 50
min_samples_leaf = 5

model = RandomForestClassifier(n_estimators=n_estimator, max_depth=max_depth, min_samples_leaf=min_samples_leaf)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0


kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    ### PCA ###
    pca.fit(X_train)
    X_train = pca.transform(X_train)
    X_val = pca.transform(X_val)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### With PCA ############################################################")
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### Random Forest #######################################################")
print("##### n_estimator = %s, max_depth = %s, min_samples_leaf = %s #############" %(n_estimator, max_depth, min_samples_leaf))
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
### PCA ###
X_test_pca = pca.transform(X_test)

y_pred = model.predict(X_test_pca)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### With PCA ############################################################
##### Validation ##########################################################
##### 5-Fold ##############################################################
##### Random Forest #######################################################
##### n_estimator = 5, max_depth = 50, min_samples_leaf = 5 #############
+----------+--------------------+--------------------+---------------------+
|  Class   |      Accuracy      |     Precision      |        Recall       |
+----------+--------------------+--------------------+---------------------+
| DERMASON | 0.6788208219076386 | 0.5279928160378344 | 0.49807329237788567 |
|   SIRA   | 0.6788208219076386 | 0.997752808988764  |  0.9831078286362839 |
|  SEKER   | 0.6788208219076386 | 0.666714244770184  |  0.6557320461969571 |
|  HOROZ   | 0.6788208219076386 | 0.7945675093860683 |  0.8418952864097294 |
|   CALI   | 0.6788208219076386 | 0.6175404652131924 |  0.6004324202086083 |
| BARB

---

In [92]:
from sklearn.neighbors import KNeighborsClassifier

In [93]:
K = 5
n_neighbors = 10

model = KNeighborsClassifier(n_neighbors=n_neighbors)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0


kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### KNN #################################################################")
print("##### n_neighbors = %s ####################################################" %(n_neighbors))
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### Validation ##########################################################
##### 5-Fold ##############################################################
##### KNN #################################################################
##### n_neighbors = 10 ####################################################
+----------+-------------------+--------------------+--------------------+
|  Class   |      Accuracy     |     Precision      |       Recall       |
+----------+-------------------+--------------------+--------------------+
| DERMASON | 0.699301740839303 | 0.5077754868812182 | 0.5124196168801095 |
|   SIRA   | 0.699301740839303 |        1.0         | 0.9951461988304093 |
|  SEKER   | 0.699301740839303 | 0.6582800302948262 | 0.6074759068350069 |
|  HOROZ   | 0.699301740839303 | 0.7762330719292002 | 0.8925205042483771 |
|   CALI   | 0.699301740839303 | 0.6719816431051339 | 0.6531715490093185 |
| BARBUNYA | 0.699301740839303 | 0.7411912617379782 | 0.5123427372257862 |
|  BOMBAY  | 0.699301

In [94]:
K = 5
n_neighbors = 5

model = KNeighborsClassifier(n_neighbors=n_neighbors)
acc_sum = 0
pre_sum = 0
rec_sum = 0
cm_sum = 0


kf = KFold(n_splits=K, shuffle=True)
for train_id, val_id in kf.split(df_train_val):
    X_train, X_val = X_train_val.iloc[train_id], X_train_val.iloc[val_id]
    y_train, y_val = y_train_val.iloc[train_id], y_train_val.iloc[val_id]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc_sum += accuracy_score(y_val, y_pred)
    pre_sum += precision_score(y_val, y_pred, average = None)
    rec_sum += recall_score(y_val, y_pred, average = None)
    cm_sum += confusion_matrix(y_val, y_pred)

acc_avg = acc_sum / K
pre_avg = pre_sum / K
rec_avg = rec_sum / K
cm_avg = cm_sum / K
# print(acc_sum / K)
# print(pre_sum / K)
# print(rec_sum / K)
# print(cm_sum / K)
print("##### Validation ##########################################################")
print("##### {}-Fold ##############################################################".format(K))
print("##### KNN #################################################################")
print("##### n_neighbors = %s ####################################################" %(n_neighbors))
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg, acc_avg])
myTable.add_column("Precision", pre_avg)
myTable.add_column("Recall", rec_avg)
print(myTable)
print(pd.DataFrame(cm_avg))
print("##### Test ################################################################")
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
myTable = PrettyTable()
myTable.add_column("Class", labels)
myTable.add_column("Accuracy", [acc, acc, acc, acc, acc, acc, acc,])
myTable.add_column("Precision", precision_score(y_test, y_pred, average=None))
myTable.add_column("Recall", recall_score(y_test, y_pred, average=None))
print(myTable)
print(pd.DataFrame(confusion_matrix(y_test, y_pred)))

##### Validation ##########################################################
##### 5-Fold ##############################################################
##### KNN #################################################################
##### n_neighbors = 5 ####################################################
+----------+--------------------+--------------------+---------------------+
|  Class   |      Accuracy      |     Precision      |        Recall       |
+----------+--------------------+--------------------+---------------------+
| DERMASON | 0.7080250030264646 | 0.4804265896901253 | 0.48120346769793015 |
|   SIRA   | 0.7080250030264646 | 0.9974999999999999 |  0.9952094090648307 |
|  SEKER   | 0.7080250030264646 | 0.6335463570940697 |  0.6256657418303482 |
|  HOROZ   | 0.7080250030264646 | 0.7896708955592098 |  0.887679266048567  |
|   CALI   | 0.7080250030264646 | 0.7025064240178274 |  0.6660646763579334 |
| BARBUNYA | 0.7080250030264646 | 0.7630251579398297 |  0.5971482524721444 |
|  B