# **A Quick Tutorial on AI Techniques**
> ## *Author* : [Rathachai CHAWUTHAI](https://rathachai.creatier.pro/) , Ph.D
> ### *Affiliation* : Computer Engineering, King Mongkut's Institute of Technology Ladkrabang (KMITL)
> #### *Updated Date* : 2022-04-15
---

> <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>.



---

## **Answer Function**

In [372]:
def print_answer(model):
    import pandas as pd
    
    ANSWER_CSV_PATH = "./Ex1-Class-unseen.csv"
    answer_df = pd.read_csv(ANSWER_CSV_PATH)
    print(model.predict(answer_df))
    print(model.predict_proba(answer_df))
    print(model.classes_)
    
    

## **Decision Tree**

### Import Libraries

In [373]:
import numpy as np
import pandas as pd

from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

### Dataset

In [374]:
CSV_PATH = "./Ex1-Class-train.csv"
df = pd.read_csv(CSV_PATH)

In [375]:
df

Unnamed: 0,x1,x2,x3,x4,x5,y
0,-17.20,30.38,7.48,26.15,21.32,B
1,5.23,-11.30,-4.49,-8.77,-6.76,A
2,2.31,6.24,9.27,13.17,-3.58,B
3,1.24,6.91,8.41,14.41,-2.43,B
4,10.53,-23.79,-9.82,-23.38,-12.86,A
...,...,...,...,...,...,...
185,24.23,-34.07,-4.41,15.78,-37.05,B
186,5.88,-25.08,-18.39,-14.31,-9.34,A
187,0.53,1.10,2.35,-5.73,0.57,A
188,13.01,-36.62,-21.88,-1.76,-21.85,A


### Train-Test Split

In [376]:
x_columns = ["x1","x2","x3","x4","x5"]
y_column = 'y';
X = df[x_columns]
y = df[y_column]

In [377]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [378]:
model = DecisionTreeClassifier()

In [379]:
model.fit(X_train.values, y_train)

In [380]:
print(tree.export_text(model))

|--- feature_2 <= 6.36
|   |--- feature_4 <= 8.21
|   |   |--- feature_3 <= 7.57
|   |   |   |--- feature_0 <= 0.40
|   |   |   |   |--- class: C
|   |   |   |--- feature_0 >  0.40
|   |   |   |   |--- feature_3 <= 2.91
|   |   |   |   |   |--- class: A
|   |   |   |   |--- feature_3 >  2.91
|   |   |   |   |   |--- feature_3 <= 3.95
|   |   |   |   |   |   |--- class: C
|   |   |   |   |   |--- feature_3 >  3.95
|   |   |   |   |   |   |--- class: A
|   |   |--- feature_3 >  7.57
|   |   |   |--- feature_4 <= -14.99
|   |   |   |   |--- class: B
|   |   |   |--- feature_4 >  -14.99
|   |   |   |   |--- feature_2 <= -5.16
|   |   |   |   |   |--- class: A
|   |   |   |   |--- feature_2 >  -5.16
|   |   |   |   |   |--- feature_0 <= -3.02
|   |   |   |   |   |   |--- class: A
|   |   |   |   |   |--- feature_0 >  -3.02
|   |   |   |   |   |   |--- feature_0 <= 3.88
|   |   |   |   |   |   |   |--- class: B
|   |   |   |   |   |   |--- feature_0 >  3.88
|   |   |   |   |   |   |   |--- c

### Prediction

In [381]:
# model.predict([[5,3,1,1]]) # NOTE : EDIT VALUE HERE

In [382]:
# model.predict_proba([[5,3,1,1]]) # NOTE : EDIT VALUE HERE

In [383]:
model.classes_

array(['A', 'B', 'C'], dtype=object)

### Evaluation

In [384]:
y_pred = model.predict(X_test)



In [385]:
y_pred

array(['A', 'B', 'C', 'B', 'A', 'C', 'A', 'C', 'C', 'B', 'B', 'B', 'A',
       'C', 'B', 'C', 'B', 'C', 'C', 'C', 'A', 'C', 'C', 'C', 'C', 'C',
       'C', 'A', 'B', 'C', 'C', 'C', 'C', 'A', 'A', 'C', 'B', 'A', 'B',
       'C', 'B', 'A', 'A', 'C', 'A', 'B', 'A', 'B', 'C', 'B', 'A', 'B',
       'B', 'B', 'C', 'B', 'A', 'A', 'B', 'B', 'C', 'A', 'C', 'A', 'A',
       'A', 'A', 'B', 'B', 'C', 'A', 'A', 'C', 'B', 'A', 'C'],
      dtype=object)

In [386]:
# Classification Metrics

y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



In [387]:
print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.8552631578947368
Precision : 0.8552631578947368
Recall : 0.8552631578947368
F1 : 0.8552631578947367
Confustion Matrix :
 [[18  1  2]
 [ 4 20  0]
 [ 2  2 27]]
Classification Report :
               precision    recall  f1-score   support

           A       0.75      0.86      0.80        21
           B       0.87      0.83      0.85        24
           C       0.93      0.87      0.90        31

    accuracy                           0.86        76
   macro avg       0.85      0.85      0.85        76
weighted avg       0.86      0.86      0.86        76

Roc score : 0.8917027118640023
Log Loss : 5.216844569477483


In [388]:
print_answer(model)

['A' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]
['A' 'B' 'C']






---



## **Logistic Regression**

### Import Libraries

In [389]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

### Dataset and Train-Test Split

In [390]:
CSV_PATH = "./Ex1-Class-train.csv"
df = pd.read_csv(CSV_PATH)

x_columns = ["x1","x2","x3","x4","x5"]
y_column = 'y';
X = df[x_columns]
y = df[y_column]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [391]:
model = LogisticRegression()

In [392]:
model.fit(X_train.values, y_train)

In [393]:
print("coef :", model.coef_)
print("intercept : ", model.intercept_)

coef : [[-0.02121881 -0.08363452 -0.11635047 -0.02681045  0.0104399 ]
 [ 0.13354705  0.0968751   0.27356717  0.23126967 -0.16707984]
 [-0.11232825 -0.01324058 -0.1572167  -0.20445922  0.15663994]]
intercept :  [ 0.9835766  -0.97965251 -0.00392409]


### Evaluation

In [394]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.868421052631579
Precision : 0.868421052631579
Recall : 0.868421052631579
F1 : 0.868421052631579
Confustion Matrix :
 [[16  5  2]
 [ 1 26  2]
 [ 0  0 24]]
Classification Report :
               precision    recall  f1-score   support

           A       0.94      0.70      0.80        23
           B       0.84      0.90      0.87        29
           C       0.86      1.00      0.92        24

    accuracy                           0.87        76
   macro avg       0.88      0.86      0.86        76
weighted avg       0.88      0.87      0.86        76

Roc score : 0.9224575326208372
Log Loss : 0.7759710321610744




### Prediction

In [395]:
# print(" - predicted class:", model.predict([(5,3)])) # NOTE : EDIT VALUE HERE
# print(" - classes :", model.classes_)
# print(" - probabilities :", model.predict_proba([(5,3)]))
print_answer(model)

['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[2.46888344e-01 7.52764975e-01 3.46681253e-04]
 [1.43700517e-02 1.00325846e-01 8.85304102e-01]
 [9.85428555e-01 1.24080331e-02 2.16341228e-03]
 [8.95071779e-02 5.27285730e-02 8.57764249e-01]
 [9.23954673e-01 5.34976464e-02 2.25476805e-02]
 [9.91104044e-01 5.20949866e-03 3.68645699e-03]
 [3.37455459e-02 2.00816941e-02 9.46172760e-01]
 [2.49541135e-05 9.99969019e-01 6.02723542e-06]
 [9.81718124e-02 5.02678723e-02 8.51560315e-01]
 [6.00582894e-02 8.74295152e-04 9.39067415e-01]]
['A' 'B' 'C']






---



## **Naïve Bayes**

### Import Libraries

In [396]:
import numpy as np
import pandas as pd

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

### Dataset and Train-Test Split

In [397]:
CSV_PATH = "./Ex1-Class-train.csv"
df = pd.read_csv(CSV_PATH)

x_columns = ["x1","x2","x3","x4","x5"]
y_column = 'y';
X = df[x_columns]
y = df[y_column]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [398]:
model = GaussianNB()

In [399]:
model.fit(X_train, y_train)

### Evaluation

In [400]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.8026315789473685
Precision : 0.8026315789473685
Recall : 0.8026315789473685
F1 : 0.8026315789473685
Confustion Matrix :
 [[25  2  1]
 [ 4 18  3]
 [ 1  4 18]]
Classification Report :
               precision    recall  f1-score   support

           A       0.83      0.89      0.86        28
           B       0.75      0.72      0.73        25
           C       0.82      0.78      0.80        23

    accuracy                           0.80        76
   macro avg       0.80      0.80      0.80        76
weighted avg       0.80      0.80      0.80        76

Roc score : 0.9250102361152281
Log Loss : 0.6100523513974528


### Prediction

In [401]:
# print(" - predicted class:", model.predict([(5,3)])) # NOTE : EDIT VALUE HERE
# print(" - classes :", model.classes_)
# print(" - probabilities :", model.predict_proba([(5,3)]))
print_answer(model)

['A' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[8.59089859e-01 1.40909220e-01 9.21358444e-07]
 [9.97808415e-05 6.85227290e-02 9.31377490e-01]
 [9.90630598e-01 9.36774286e-03 1.65959936e-06]
 [9.27092761e-03 5.30273651e-02 9.37701707e-01]
 [6.14405142e-01 3.39612251e-01 4.59826066e-02]
 [9.87671593e-01 1.22750811e-02 5.33257320e-05]
 [4.58396944e-04 2.70357250e-02 9.72505878e-01]
 [2.26533429e-03 9.37030828e-01 6.07038379e-02]
 [1.13232497e-01 1.02774799e-01 7.83992704e-01]
 [4.95612626e-04 1.48110092e-01 8.51394295e-01]]
['A' 'B' 'C']


## K-Nearest Neighbors

In [402]:
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

In [403]:
CSV_PATH = "./Ex1-Class-train.csv"
df = pd.read_csv(CSV_PATH)

x_columns = ["x1","x2","x3","x4","x5"]
y_column = 'y';
X = df[x_columns]
y = df[y_column]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [404]:
model = KNeighborsClassifier(n_neighbors=7)
model.fit(X_train.values, y_train)

In [405]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.8157894736842105
Precision : 0.8157894736842105
Recall : 0.8157894736842105
F1 : 0.8157894736842104
Confustion Matrix :
 [[24  1  1]
 [ 8 19  2]
 [ 2  0 19]]
Classification Report :
               precision    recall  f1-score   support

           A       0.71      0.92      0.80        26
           B       0.95      0.66      0.78        29
           C       0.86      0.90      0.88        21

    accuracy                           0.82        76
   macro avg       0.84      0.83      0.82        76
weighted avg       0.84      0.82      0.81        76

Roc score : 0.9655002008670387
Log Loss : 0.8194753081944663




In [406]:

print_answer(model)

['A' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[0.71428571 0.28571429 0.        ]
 [0.         0.14285714 0.85714286]
 [0.85714286 0.         0.14285714]
 [0.         0.         1.        ]
 [0.85714286 0.14285714 0.        ]
 [0.85714286 0.         0.14285714]
 [0.         0.         1.        ]
 [0.         1.         0.        ]
 [0.14285714 0.         0.85714286]
 [0.         0.         1.        ]]
['A' 'B' 'C']




## Artificial Neural Network

In [407]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

In [408]:
CSV_PATH = "./Ex1-Class-train.csv"
df = pd.read_csv(CSV_PATH)

x_columns = ["x1","x2","x3","x4","x5"]
y_column = 'y';
X = df[x_columns]
y = df[y_column]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [409]:
model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=1000, activation="tanh")
model.fit(X_train.values, y_train)

In [410]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)


Accuracy : 0.881578947368421
Precision : 0.881578947368421
Recall : 0.881578947368421
F1 : 0.881578947368421
Confustion Matrix :
 [[18  4  1]
 [ 1 22  0]
 [ 2  1 27]]
Classification Report :
               precision    recall  f1-score   support

           A       0.86      0.78      0.82        23
           B       0.81      0.96      0.88        23
           C       0.96      0.90      0.93        30

    accuracy                           0.88        76
   macro avg       0.88      0.88      0.88        76
weighted avg       0.89      0.88      0.88        76

Roc score : 0.9511439248928996
Log Loss : 0.43356906309654575




In [411]:

print_answer(model)

['A' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[5.01125587e-01 4.94648191e-01 4.22622168e-03]
 [1.35043240e-02 3.77331660e-01 6.09164016e-01]
 [9.75162105e-01 2.08613487e-02 3.97654597e-03]
 [1.11166141e-02 3.94446452e-03 9.84938921e-01]
 [7.12153808e-01 2.83432576e-01 4.41361598e-03]
 [9.89177058e-01 5.66458352e-03 5.15835895e-03]
 [4.70672648e-03 1.00468739e-02 9.85246400e-01]
 [6.55755793e-03 9.82343217e-01 1.10992249e-02]
 [2.98297010e-02 7.43231426e-04 9.69427068e-01]
 [1.94248187e-02 1.98547303e-01 7.82027878e-01]]
['A' 'B' 'C']




## K-Fold Cross-Validation

In [412]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold

from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

In [413]:
CSV_PATH = "./Ex1-Class-train.csv"
df = pd.read_csv(CSV_PATH)

x_columns = ["x1","x2","x3","x4","x5"]
y_column = 'y';
X = df[x_columns]
y = df[y_column]

In [414]:
kf = KFold(n_splits=4)

In [415]:
acc_list = []
prec_list = []
recall_list = []
f1_list = []


for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  y_pred = model.predict(X_test)
  y_pred_proba = model.predict_proba(X_test)

  acc = accuracy_score(y_test, y_pred)
  prec = precision_score(y_test, y_pred, average='micro')
  recall = recall_score(y_test, y_pred, average='micro')
  f1 = f1_score(y_test, y_pred, average='micro')
  con = confusion_matrix(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  print("Accuracy (k) :", acc)
  print("Precision (k) :", prec)
  print("Recall (k) :", recall)
  print("F1 (k) :", f1)
  print("Confustion Matrix (k) :\n", con)
  print("Classification Report (k) :\n", report)
  acc_list.append(acc)
  prec_list.append(prec)
  recall_list.append(recall)
  f1_list.append(f1)

print("------------------------------------")
print("Mean Accuracy :", np.mean(acc_list))
print("Mean Precision :", np.mean(prec_list))
print("Mean Recall :", np.mean(recall_list))
print("Mean F1 :", np.mean(f1_list))

Accuracy (k) : 0.875
Precision (k) : 0.875
Recall (k) : 0.875
F1 (k) : 0.875
Confustion Matrix (k) :
 [[11  2  0]
 [ 2 17  1]
 [ 1  0 14]]
Classification Report (k) :
               precision    recall  f1-score   support

           A       0.79      0.85      0.81        13
           B       0.89      0.85      0.87        20
           C       0.93      0.93      0.93        15

    accuracy                           0.88        48
   macro avg       0.87      0.88      0.87        48
weighted avg       0.88      0.88      0.88        48

Accuracy (k) : 0.875
Precision (k) : 0.875
Recall (k) : 0.875
F1 (k) : 0.875
Confustion Matrix (k) :
 [[12  3  1]
 [ 1 15  1]
 [ 0  0 15]]
Classification Report (k) :
               precision    recall  f1-score   support

           A       0.92      0.75      0.83        16
           B       0.83      0.88      0.86        17
           C       0.88      1.00      0.94        15

    accuracy                           0.88        48
   macro av

In [416]:

print_answer(model)

['B' 'B' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[3.56392162e-01 6.43602033e-01 5.80517498e-06]
 [3.80410478e-03 5.23873103e-01 4.72322792e-01]
 [8.95338899e-01 1.04628404e-01 3.26977117e-05]
 [2.31246470e-02 1.41071956e-03 9.75464633e-01]
 [9.01133042e-01 9.70034864e-02 1.86347187e-03]
 [9.07908466e-01 9.15599731e-02 5.31560387e-04]
 [8.55348477e-03 7.14312629e-03 9.84303389e-01]
 [1.23726712e-03 9.98762473e-01 2.60244666e-07]
 [5.67271688e-02 6.46292392e-03 9.36809907e-01]
 [1.36442582e-03 1.00766333e-03 9.97627911e-01]]
['A' 'B' 'C']


In [417]:
acc_list = []
prec_list = []
recall_list = []
f1_list = []


for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  y_pred = model.predict(X_test)
  y_pred_proba = model.predict_proba(X_test)

  acc = accuracy_score(y_test, y_pred)
  prec = precision_score(y_test, y_pred, average='micro')
  recall = recall_score(y_test, y_pred, average='micro')
  f1 = f1_score(y_test, y_pred, average='micro')
  con = confusion_matrix(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  print("Accuracy (k) :", acc)
  print("Precision (k) :", prec)
  print("Recall (k) :", recall)
  print("F1 (k) :", f1)
  print("Confustion Matrix (k) :\n", con)
  print("Classification Report (k) :\n", report)
  acc_list.append(acc)
  prec_list.append(prec)
  recall_list.append(recall)
  f1_list.append(f1)

print("------------------------------------")
print("Mean Accuracy :", np.mean(acc_list))
print("Mean Precision :", np.mean(prec_list))
print("Mean Recall :", np.mean(recall_list))
print("Mean F1 :", np.mean(f1_list))

Accuracy (k) : 0.8333333333333334
Precision (k) : 0.8333333333333334
Recall (k) : 0.8333333333333334
F1 (k) : 0.8333333333333334
Confustion Matrix (k) :
 [[11  2  0]
 [ 3 15  2]
 [ 1  0 14]]
Classification Report (k) :
               precision    recall  f1-score   support

           A       0.73      0.85      0.79        13
           B       0.88      0.75      0.81        20
           C       0.88      0.93      0.90        15

    accuracy                           0.83        48
   macro avg       0.83      0.84      0.83        48
weighted avg       0.84      0.83      0.83        48

Accuracy (k) : 0.8541666666666666
Precision (k) : 0.8541666666666666
Recall (k) : 0.8541666666666666
F1 (k) : 0.8541666666666666
Confustion Matrix (k) :
 [[12  3  1]
 [ 2 14  1]
 [ 0  0 15]]
Classification Report (k) :
               precision    recall  f1-score   support

           A       0.86      0.75      0.80        16
           B       0.82      0.82      0.82        17
           C    

In [418]:

print_answer(model)

['B' 'B' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']
[[4.02916388e-01 5.97083500e-01 1.12055746e-07]
 [5.71990960e-04 9.41334021e-01 5.80939879e-02]
 [8.88839238e-01 1.11160142e-01 6.20606331e-07]
 [1.95115919e-03 3.08287148e-03 9.94965969e-01]
 [9.65899302e-01 3.24936501e-02 1.60704761e-03]
 [9.28098321e-01 7.18894872e-02 1.21916848e-05]
 [6.76771965e-04 2.61736770e-02 9.73149551e-01]
 [5.84165696e-04 9.99382315e-01 3.35193115e-05]
 [1.44449813e-01 7.73463827e-04 8.54776723e-01]
 [3.30570104e-05 2.33503203e-05 9.99943593e-01]]
['A' 'B' 'C']


## Comparisions

In [419]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold


from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score

In [420]:
# Decision Tree

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = DecisionTreeClassifier()
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Decision Tree")
print(" Mean Accuracy :", np.mean(acc_list))

------------------------------------
Decision Tree
 Mean Accuracy : 0.8211436170212766


In [421]:
# Logistic Regression

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = LogisticRegression()
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Logistic Regression")
print(" Mean Accuracy :", np.mean(acc_list))

------------------------------------
Logistic Regression
 Mean Accuracy : 0.8950576241134751


In [422]:
# Neural Network

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Neural Network")
print("  Mean Accuracy :", np.mean(acc_list))

------------------------------------
Neural Network
  Mean Accuracy : 0.9003767730496454


## K-Fold Cross Validation 
### Computing cross-validated metrics

In [423]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [424]:
model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
scores = cross_val_score(model, X, y, cv=4, scoring="accuracy")

print("Mean Accuracy :", np.mean(scores))

Mean Accuracy : 0.9001551418439717


### ANSWER 
Tree: 0.76<br />
['A' 'B' 'A' 'C' 'B' 'A' 'C' 'B' 'C' 'C']  (Not Accurate)

LogisticRegression: 0.87<br />
['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C'] (Accurate)

Naive Bayes: 0.81<br />
['A' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']

K-NN: 0.82<br />
['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']

Aritificial Neural Network: 0.88<br />
['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C'] (Accurate)

K-Fold (1): 0.88<br />
['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C'] (Accurate)

K-Fold (2): 0.95<br />
['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C']  (Very Accurate)

Summarise (by Pop)<br />
['B' 'C' 'A' 'C' 'A' 'A' 'C' 'B' 'C' 'C'] (Aritificial Neural Network, K-fold, LogisticRegression)

Real answer (score 7/10) <br />  
['A' 'B' 'A' 'C' 'B' 'A' 'C' 'B' 'C' 'C']



---
https://rathachai.creatier.pro/

## つづく