# **A Quick Tutorial on AI Techniques**
> ## *Author* : [Rathachai CHAWUTHAI](https://rathachai.creatier.pro/) , Ph.D
> ### *Affiliation* : Computer Engineering, King Mongkut's Institute of Technology Ladkrabang (KMITL)
> #### *Updated Date* : 2022-04-15
---

> <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>.



---

## **Linear Regression**

### Import Libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Dataset

In [2]:
CSV_PATH = "https://rathachai.github.io/DA101/data/online-learning-grade.csv"

In [3]:
df = pd.read_csv(CSV_PATH)

In [4]:
df

Unnamed: 0,sid,gpa,midterm,learning_hours,final
0,s1,3.22,9,6,23
1,s2,2.67,17,9,45
2,s3,3.67,8,9,36
3,s4,3.43,17,9,46
4,s5,3.31,14,10,46
5,s6,2.56,10,7,34
6,s7,3.78,19,10,44
7,s8,3.73,18,7,43
8,s9,3.74,19,2,29
9,s10,2.72,15,5,37


### Train-Test Data

In [5]:
X = df[["midterm", "learning_hours"]]
y = df["final"]

In [6]:
X

Unnamed: 0,midterm,learning_hours
0,9,6
1,17,9
2,8,9
3,17,9
4,14,10
5,10,7
6,19,10
7,18,7
8,19,2
9,15,5


In [7]:
y

0     23
1     45
2     36
3     46
4     46
5     34
6     44
7     43
8     29
9     37
10    25
11    43
12    23
13    31
14    37
15    44
16    36
17    41
18    34
19    31
Name: final, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [9]:
X_train

Unnamed: 0,midterm,learning_hours
12,10,5
2,8,9
8,19,2
3,17,9
15,17,7
18,18,5
19,9,8
7,18,7
0,9,6
9,15,5


In [10]:
X_test

Unnamed: 0,midterm,learning_hours
17,18,8
13,11,6
11,16,10
10,11,5
1,17,9
16,12,9


In [11]:
y_train

12    23
2     36
8     29
3     46
15    44
18    34
19    31
7     43
0     23
9     37
4     46
14    37
5     34
6     44
Name: final, dtype: int64

In [12]:
y_test

17    41
13    31
11    43
10    25
1     45
16    36
Name: final, dtype: int64

### Model

In [13]:
model = LinearRegression()

In [14]:
model.fit(X_train, y_train)

In [15]:
print("coef :", model.coef_)
print("intercept : ", model.intercept_)

coef : [1.28194596 2.53537146]
intercept :  0.6964657163031376


### Evaluation

In [16]:
y_pred = model.predict(X_test)

In [17]:
y_pred

array([44.05446461, 30.0101    , 46.56131562, 27.47472854, 45.30789012,
       38.89816034])

In [18]:
y_test

17    41
13    31
11    43
10    25
1     45
16    36
Name: final, dtype: int64

In [19]:
rmse = mean_squared_error(y_test, y_pred, squared=False)

In [20]:
print("RMSE : ", rmse)

RMSE :  2.503698465752977




---



## **Decision Tree**

### Import Libraries

In [326]:
import numpy as np
import pandas as pd

from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

### Dataset

In [327]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"

In [328]:
df = pd.read_csv(CSV_PATH)

In [329]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


### Train-Test Split

In [330]:
X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]] # feature 0,1,2,3
y = df["species"]

In [331]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [332]:
model = DecisionTreeClassifier()

In [333]:
model.fit(X_train.values, y_train)

In [334]:
print(tree.export_text(model))

|--- feature_3 <= 0.80
|   |--- class: setosa
|--- feature_3 >  0.80
|   |--- feature_2 <= 4.85
|   |   |--- class: versicolor
|   |--- feature_2 >  4.85
|   |   |--- feature_3 <= 1.70
|   |   |   |--- feature_2 <= 4.95
|   |   |   |   |--- class: versicolor
|   |   |   |--- feature_2 >  4.95
|   |   |   |   |--- feature_3 <= 1.55
|   |   |   |   |   |--- class: virginica
|   |   |   |   |--- feature_3 >  1.55
|   |   |   |   |   |--- feature_0 <= 6.60
|   |   |   |   |   |   |--- class: versicolor
|   |   |   |   |   |--- feature_0 >  6.60
|   |   |   |   |   |   |--- class: virginica
|   |   |--- feature_3 >  1.70
|   |   |   |--- class: virginica



### Prediction

In [335]:
model.predict([[5,3,1,1]])

array(['versicolor'], dtype=object)

In [336]:
model.predict_proba([[5,3,1,1]])

array([[0., 1., 0.]])

In [337]:
model.classes_

array(['setosa', 'versicolor', 'virginica'], dtype=object)

### Evaluation

In [338]:
y_pred = model.predict(X_test)



In [339]:
y_pred

array(['setosa', 'versicolor', 'virginica', 'setosa', 'versicolor',
       'virginica', 'versicolor', 'virginica', 'versicolor', 'virginica',
       'versicolor', 'setosa', 'setosa', 'versicolor', 'setosa',
       'versicolor', 'versicolor', 'setosa', 'setosa', 'versicolor',
       'versicolor', 'virginica', 'setosa', 'versicolor', 'setosa',
       'setosa', 'virginica', 'setosa', 'virginica', 'setosa',
       'versicolor', 'versicolor', 'virginica', 'versicolor', 'setosa',
       'virginica', 'virginica', 'versicolor', 'setosa', 'setosa',
       'virginica', 'versicolor', 'versicolor', 'setosa', 'versicolor',
       'versicolor', 'versicolor', 'versicolor', 'versicolor', 'setosa',
       'setosa', 'versicolor', 'setosa', 'setosa', 'virginica',
       'virginica', 'virginica', 'setosa', 'versicolor', 'versicolor'],
      dtype=object)

In [340]:
# Classification Metrics

y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



In [341]:
print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.9333333333333333
Precision : 0.9333333333333333
Recall : 0.9333333333333333
F1 : 0.9333333333333333
Confustion Matrix :
 [[21  0  0]
 [ 0 22  1]
 [ 0  3 13]]
Classification Report :
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        21
  versicolor       0.88      0.96      0.92        23
   virginica       0.93      0.81      0.87        16

    accuracy                           0.93        60
   macro avg       0.94      0.92      0.93        60
weighted avg       0.93      0.93      0.93        60

Roc score : 0.9442022308870136
Log Loss : 2.4029102259411435




---



## **Logistic Regression**

### Import Libraries

In [491]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

### Dataset and Train-Test Split

In [492]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [493]:
model = LogisticRegression()

In [494]:
model.fit(X_train.values, y_train)

In [495]:
print("coef :", model.coef_)
print("intercept : ", model.intercept_)

coef : [[-2.35536634  2.01628295]
 [ 0.36117633 -1.25295836]
 [ 1.99419001 -0.76332459]]
intercept :  [ 7.1228112   2.29680075 -9.41961194]


### Evaluation

In [496]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.7833333333333333
Precision : 0.7833333333333333
Recall : 0.7833333333333333
F1 : 0.7833333333333333
Confustion Matrix :
 [[17  0  0]
 [ 0 16  5]
 [ 0  8 14]]
Classification Report :
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        17
  versicolor       0.67      0.76      0.71        21
   virginica       0.74      0.64      0.68        22

    accuracy                           0.78        60
   macro avg       0.80      0.80      0.80        60
weighted avg       0.79      0.78      0.78        60

Roc score : 0.9009124403861245
Log Loss : 0.5416499414384932


### Prediction

In [497]:
print(" - predicted class:", model.predict([(5,3)]))
print(" - classes :", model.classes_)
print(" - probabilities :", model.predict_proba([(5,3)]))

 - predicted class: ['setosa']
 - classes : ['setosa' 'versicolor' 'virginica']
 - probabilities : [[0.71779922 0.2509287  0.03127209]]






---



## **Naïve Bayes**

### Import Libraries

In [548]:
import numpy as np
import pandas as pd

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

### Dataset and Train-Test Split

In [549]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [550]:
model = GaussianNB()

In [551]:
model.fit(X_train, y_train)

### Evaluation

In [552]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.85
Precision : 0.85
Recall : 0.85
F1 : 0.85
Confustion Matrix :
 [[24  0  0]
 [ 0 16  3]
 [ 0  6 11]]
Classification Report :
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        24
  versicolor       0.73      0.84      0.78        19
   virginica       0.79      0.65      0.71        17

    accuracy                           0.85        60
   macro avg       0.84      0.83      0.83        60
weighted avg       0.85      0.85      0.85        60

Roc score : 0.940147991011194
Log Loss : 0.366985776078057


### Prediction

In [553]:
print(" - predicted class:", model.predict([(5,3)]))
print(" - classes :", model.classes_)
print(" - probabilities :", model.predict_proba([(5,3)]))

 - predicted class: ['setosa']
 - classes : ['setosa' 'versicolor' 'virginica']
 - probabilities : [[0.76193639 0.19086059 0.04720302]]




## K-Nearest Neighbors

In [554]:
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

In [555]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [556]:
model = KNeighborsClassifier(n_neighbors=7)
model.fit(X_train.values, y_train)

In [557]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.8
Precision : 0.8
Recall : 0.8
F1 : 0.8000000000000002
Confustion Matrix :
 [[25  0  0]
 [ 0 12  4]
 [ 0  8 11]]
Classification Report :
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        25
  versicolor       0.60      0.75      0.67        16
   virginica       0.73      0.58      0.65        19

    accuracy                           0.80        60
   macro avg       0.78      0.78      0.77        60
weighted avg       0.81      0.80      0.80        60

Roc score : 0.9277981191893258
Log Loss : 0.3657046977809678




## Artificial Neural Network

In [558]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

In [559]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [560]:
model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=1000, activation="tanh")
model.fit(X_train.values, y_train)

In [561]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
con = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc = roc_auc_score(y_test, y_pred_proba,  multi_class='ovr')
loss = log_loss(y_test, y_pred_proba)



print("Accuracy :", acc)
print("Precision :", prec)
print("Recall :", recall)
print("F1 :", f1)
print("Confustion Matrix :\n", con)
print("Classification Report :\n", report)
print("Roc score :", roc)
print("Log Loss :", loss)

Accuracy : 0.8666666666666667


## K-Fold Cross-Validation

In [584]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold

from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss

In [585]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

In [586]:
kf = KFold(n_splits=4)

In [587]:
acc_list = []
prec_list = []
recall_list = []
f1_list = []


for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  y_pred = model.predict(X_test)
  y_pred_proba = model.predict_proba(X_test)

  acc = accuracy_score(y_test, y_pred)
  prec = precision_score(y_test, y_pred, average='micro')
  recall = recall_score(y_test, y_pred, average='micro')
  f1 = f1_score(y_test, y_pred, average='micro')
  con = confusion_matrix(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  print("Accuracy (k) :", acc)
  print("Precision (k) :", prec)
  print("Recall (k) :", recall)
  print("F1 (k) :", f1)
  print("Confustion Matrix (k) :\n", con)
  print("Classification Report (k) :\n", report)
  acc_list.append(acc)
  prec_list.append(prec)
  recall_list.append(recall)
  f1_list.append(f1)

print("------------------------------------")
print("Mean Accuracy :", np.mean(acc_list))
print("Mean Precision :", np.mean(prec_list))
print("Mean Recall :", np.mean(recall_list))
print("Mean F1 :", np.mean(f1_list))

Accuracy (k) : 1.0
Precision (k) : 1.0
Recall (k) : 1.0
F1 (k) : 1.0
Confustion Matrix (k) :
 [[38]]
Classification Report (k) :
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        38

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

Accuracy (k) : 0.0
Precision (k) : 0.0
Recall (k) : 0.0
F1 (k) : 0.0
Confustion Matrix (k) :
 [[ 0  0 12]
 [ 0  0 26]
 [ 0  0  0]]
Classification Report (k) :
               precision    recall  f1-score   support

      setosa       0.00      0.00      0.00      12.0
  versicolor       0.00      0.00      0.00      26.0
   virginica       0.00      0.00      0.00       0.0

    accuracy                           0.00      38.0
   macro avg       0.00      0.00      0.00      38.0
weighted avg       0.00      0.00      0.00      38.0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (k) : 0.35135135135135137
Precision (k) : 0.35135135135135137
Recall (k) : 0.35135135135135137
F1 (k) : 0.35135135135135137
Confustion Matrix (k) :
 [[ 0  0  0]
 [ 2  1 21]
 [ 0  1 12]]
Classification Report (k) :
               precision    recall  f1-score   support

      setosa       0.00      0.00      0.00         0
  versicolor       0.50      0.04      0.08        24
   virginica       0.36      0.92      0.52        13

    accuracy                           0.35        37
   macro avg       0.29      0.32      0.20        37
weighted avg       0.45      0.35      0.23        37

Accuracy (k) : 0.1891891891891892
Precision (k) : 0.1891891891891892
Recall (k) : 0.1891891891891892
F1 (k) : 0.18918918918918917
Confustion Matrix (k) :
 [[ 0  0]
 [30  7]]
Classification Report (k) :
               precision    recall  f1-score   support

  versicolor       0.00      0.00      0.00         0
   virginica       1.00      0.19      0.32        37

    accuracy                

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [588]:
acc_list = []
prec_list = []
recall_list = []
f1_list = []


for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  y_pred = model.predict(X_test)
  y_pred_proba = model.predict_proba(X_test)

  acc = accuracy_score(y_test, y_pred)
  prec = precision_score(y_test, y_pred, average='micro')
  recall = recall_score(y_test, y_pred, average='micro')
  f1 = f1_score(y_test, y_pred, average='micro')
  con = confusion_matrix(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  print("Accuracy (k) :", acc)
  print("Precision (k) :", prec)
  print("Recall (k) :", recall)
  print("F1 (k) :", f1)
  print("Confustion Matrix (k) :\n", con)
  print("Classification Report (k) :\n", report)
  acc_list.append(acc)
  prec_list.append(prec)
  recall_list.append(recall)
  f1_list.append(f1)

print("------------------------------------")
print("Mean Accuracy :", np.mean(acc_list))
print("Mean Precision :", np.mean(prec_list))
print("Mean Recall :", np.mean(recall_list))
print("Mean F1 :", np.mean(f1_list))

Accuracy (k) : 1.0
Precision (k) : 1.0
Recall (k) : 1.0
F1 (k) : 1.0
Confustion Matrix (k) :
 [[38]]
Classification Report (k) :
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        38

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (k) : 0.5263157894736842
Precision (k) : 0.5263157894736842
Recall (k) : 0.5263157894736842
F1 (k) : 0.5263157894736842
Confustion Matrix (k) :
 [[11  1  0]
 [ 0  9 17]
 [ 0  0  0]]
Classification Report (k) :
               precision    recall  f1-score   support

      setosa       1.00      0.92      0.96        12
  versicolor       0.90      0.35      0.50        26
   virginica       0.00      0.00      0.00         0

    accuracy                           0.53        38
   macro avg       0.63      0.42      0.49        38
weighted avg       0.93      0.53      0.64        38



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (k) : 0.7027027027027027
Precision (k) : 0.7027027027027027
Recall (k) : 0.7027027027027027
F1 (k) : 0.7027027027027027
Confustion Matrix (k) :
 [[ 0  0  0]
 [ 2 15  7]
 [ 0  2 11]]
Classification Report (k) :
               precision    recall  f1-score   support

      setosa       0.00      0.00      0.00         0
  versicolor       0.88      0.62      0.73        24
   virginica       0.61      0.85      0.71        13

    accuracy                           0.70        37
   macro avg       0.50      0.49      0.48        37
weighted avg       0.79      0.70      0.72        37

Accuracy (k) : 0.10810810810810811
Precision (k) : 0.10810810810810811
Recall (k) : 0.10810810810810811
F1 (k) : 0.10810810810810811
Confustion Matrix (k) :
 [[ 0  0]
 [33  4]]
Classification Report (k) :
               precision    recall  f1-score   support

  versicolor       0.00      0.00      0.00         0
   virginica       1.00      0.11      0.20        37

    accuracy                 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Comparisions

In [589]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold


from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score

In [590]:
# Decision Tree

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = DecisionTreeClassifier()
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Decision Tree")
print(" Mean Accuracy :", np.mean(acc_list))

------------------------------------
Decision Tree
 Mean Accuracy : 0.552275960170697


In [591]:
# Logistic Regression

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = LogisticRegression()
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Logistic Regression")
print(" Mean Accuracy :", np.mean(acc_list))

------------------------------------
Logistic Regression
 Mean Accuracy : 0.591394025604552


In [592]:
# Neural Network

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Neural Network")
print("  Mean Accuracy :", np.mean(acc_list))

------------------------------------
Neural Network
  Mean Accuracy : 0.5177809388335705


## K-Fold Cross Validation 
### Computing cross-validated metrics

In [593]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [594]:
model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
scores = cross_val_score(model, X, y, cv=4, scoring="accuracy")

print("Mean Accuracy :", np.mean(scores))

Mean Accuracy : 0.8198790896159317




---
https://rathachai.creatier.pro/

## つづく