# **A Quick Tutorial on AI Techniques**
> ## *Author* : [Rathachai CHAWUTHAI](https://rathachai.creatier.pro/) , Ph.D
> ### *Affiliation* : Computer Engineering, King Mongkut's Institute of Technology Ladkrabang (KMITL)
> #### *Updated Date* : 2022-04-15
---

> <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>.



---

## **Linear Regression**

### Import Libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Dataset

In [2]:
CSV_PATH = "https://rathachai.github.io/DA101/data/online-learning-grade.csv"

In [3]:
df = pd.read_csv(CSV_PATH)

In [4]:
df

Unnamed: 0,sid,gpa,midterm,learning_hours,final
0,s1,3.22,9,6,23
1,s2,2.67,17,9,45
2,s3,3.67,8,9,36
3,s4,3.43,17,9,46
4,s5,3.31,14,10,46
5,s6,2.56,10,7,34
6,s7,3.78,19,10,44
7,s8,3.73,18,7,43
8,s9,3.74,19,2,29
9,s10,2.72,15,5,37


### Train-Test Data

In [5]:
X = df[["midterm", "learning_hours"]]
y = df["final"]

In [6]:
X

Unnamed: 0,midterm,learning_hours
0,9,6
1,17,9
2,8,9
3,17,9
4,14,10
5,10,7
6,19,10
7,18,7
8,19,2
9,15,5


In [7]:
y

0     23
1     45
2     36
3     46
4     46
5     34
6     44
7     43
8     29
9     37
10    25
11    43
12    23
13    31
14    37
15    44
16    36
17    41
18    34
19    31
Name: final, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [9]:
X_train

Unnamed: 0,midterm,learning_hours
19,9,8
16,12,9
3,17,9
0,9,6
5,10,7
13,11,6
9,15,5
7,18,7
1,17,9
10,11,5


In [10]:
X_test

Unnamed: 0,midterm,learning_hours
8,19,2
2,8,9
12,10,5
15,17,7
17,18,8
18,18,5


In [11]:
y_train

19    31
16    36
3     46
0     23
5     34
13    31
9     37
7     43
1     45
10    25
11    43
4     46
14    37
6     44
Name: final, dtype: int64

In [12]:
y_test

8     29
2     36
12    23
15    44
17    41
18    34
Name: final, dtype: int64

### Model

In [13]:
model = LinearRegression()

In [14]:
model.fit(X_train, y_train)

In [15]:
print("coef :", model.coef_)
print("intercept : ", model.intercept_)

coef : [1.45865809 1.81458719]
intercept :  3.158244978530284


### Evaluation

In [16]:
y_pred = model.predict(X_test)

In [17]:
y_pred

array([34.50192312, 31.15879444, 26.81776186, 40.65754289, 43.93078817,
       38.4870266 ])

In [18]:
y_test

8     29
2     36
12    23
15    44
17    41
18    34
Name: final, dtype: int64

In [19]:
rmse = mean_squared_error(y_test, y_pred, squared=False)

In [20]:
print("RMSE : ", rmse)

RMSE :  4.246148870931273




---



## **Decision Tree**

### Import Libraries

In [21]:
import numpy as np
import pandas as pd

from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Dataset

In [22]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"

In [23]:
df = pd.read_csv(CSV_PATH)

In [24]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


### Train-Test Split

In [25]:
X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [27]:
model = DecisionTreeClassifier()

In [28]:
model.fit(X_train, y_train)

In [29]:
print(tree.export_text(model))

|--- feature_3 <= 0.80
|   |--- class: setosa
|--- feature_3 >  0.80
|   |--- feature_2 <= 4.75
|   |   |--- class: versicolor
|   |--- feature_2 >  4.75
|   |   |--- feature_3 <= 1.75
|   |   |   |--- feature_3 <= 1.55
|   |   |   |   |--- class: virginica
|   |   |   |--- feature_3 >  1.55
|   |   |   |   |--- feature_2 <= 5.45
|   |   |   |   |   |--- class: versicolor
|   |   |   |   |--- feature_2 >  5.45
|   |   |   |   |   |--- class: virginica
|   |   |--- feature_3 >  1.75
|   |   |   |--- class: virginica



### Prediction

In [30]:
model.predict([[5,3,1,1]])



array(['versicolor'], dtype=object)

In [31]:
model.predict_proba([[5,3,1,1]])



array([[0., 1., 0.]])

In [32]:
model.classes_

array(['setosa', 'versicolor', 'virginica'], dtype=object)

### Evaluation

In [33]:
y_pred = model.predict(X_test)

In [34]:
y_pred

array(['setosa', 'versicolor', 'virginica', 'versicolor', 'versicolor',
       'setosa', 'setosa', 'versicolor', 'virginica', 'setosa',
       'virginica', 'versicolor', 'setosa', 'virginica', 'versicolor',
       'virginica', 'virginica', 'versicolor', 'versicolor', 'setosa',
       'versicolor', 'virginica', 'virginica', 'virginica', 'virginica',
       'setosa', 'setosa', 'virginica', 'versicolor', 'setosa', 'setosa',
       'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',
       'virginica', 'virginica', 'versicolor', 'setosa', 'versicolor',
       'virginica', 'virginica', 'versicolor', 'virginica', 'virginica',
       'setosa', 'setosa', 'versicolor', 'versicolor', 'virginica',
       'virginica', 'versicolor', 'virginica', 'virginica', 'setosa',
       'setosa', 'virginica', 'virginica', 'setosa'], dtype=object)

In [35]:
acc = accuracy_score(y_test, y_pred)

In [36]:
print("Accuracy :", acc)

Accuracy : 0.9166666666666666




---



## **Logistic Regression**

### Import Libraries

In [37]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Dataset and Train-Test Split

In [38]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [39]:
model = LogisticRegression()

In [40]:
model.fit(X_train, y_train)

In [41]:
print("coef :", model.coef_)
print("intercept : ", model.intercept_)

coef : [[-2.24974742  2.06873753]
 [ 0.51436862 -1.44529442]
 [ 1.73537879 -0.62344311]]
intercept :  [ 6.08050382  1.86481977 -7.9453236 ]


### Evaluation

In [42]:
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print("Accuracy :", acc)

Accuracy : 0.8333333333333334


### Prediction

In [43]:
print(" - predicted class:", model.predict([(5,3)]))
print(" - classes :", model.classes_)
print(" - probabilities :", model.predict_proba([(5,3)]))

 - predicted class: ['setosa']
 - classes : ['setosa' 'versicolor' 'virginica']
 - probabilities : [[0.66439263 0.26025804 0.07534933]]






---



## **Naïve Bayes**

### Import Libraries

In [44]:
import numpy as np
import pandas as pd

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Dataset and Train-Test Split

In [45]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### Model

In [46]:
model = GaussianNB()

In [47]:
model.fit(X_train, y_train)

### Evaluation

In [48]:
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print("Accuracy :", acc)

Accuracy : 0.8333333333333334


### Prediction

In [49]:
print(" - predicted class:", model.predict([(5,3)]))
print(" - classes :", model.classes_)
print(" - probabilities :", model.predict_proba([(5,3)]))

 - predicted class: ['setosa']
 - classes : ['setosa' 'versicolor' 'virginica']
 - probabilities : [[0.77694718 0.17070682 0.052346  ]]




## K-Nearest Neighbors

In [50]:
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [51]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [52]:
model = KNeighborsClassifier(n_neighbors=7)
model.fit(X_train, y_train)

In [53]:
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print("Accuracy :", acc)

Accuracy : 0.75


## Artificial Neural Network

In [54]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [55]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [56]:
model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=1000, activation="tanh")
model.fit(X_train, y_train)

In [57]:
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print("Accuracy :", acc)

Accuracy : 0.7166666666666667


## K-Fold Cross-Validation

In [58]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold

from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score

In [59]:
CSV_PATH = "https://rathachai.github.io/DA101/data/iris.csv"
df = pd.read_csv(CSV_PATH)

X = df[["sepal_length",	"sepal_width"]]
#X = df[["sepal_length",	"sepal_width",	"petal_length",	"petal_width"]]
y = df["species"]

In [60]:
kf = KFold(n_splits=4)

In [61]:
acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)
sav
  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  print("Accuracy (k) :", acc)
  acc_list.append(acc)

print("------------------------------------")
print("Mean Accuracy :", np.mean(acc_list))

IndentationError: unexpected indent (680209949.py, line 10)

## Comparisions

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold


from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score

In [None]:
# Decision Tree

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = DecisionTreeClassifier()
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Decision Tree")
print(" Mean Accuracy :", np.mean(acc_list))

------------------------------------
Decision Tree
 Mean Accuracy : 0.5455192034139402


In [None]:
# Logistic Regression

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = LogisticRegression()
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Logistic Regression")
print(" Mean Accuracy :", np.mean(acc_list))

------------------------------------
Logistic Regression
 Mean Accuracy : 0.591394025604552


In [None]:
# Neural Network

acc_list = []

for train_index, test_index in kf.split(X):
  X_train, X_test = X.loc[train_index], X.loc[test_index]
  y_train, y_test = y.loc[train_index], y.loc[test_index]

  model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)
  acc = accuracy_score(y_test, y_pred)
  acc_list.append(acc)

print("------------------------------------")
print("Neural Network")
print("  Mean Accuracy :", np.mean(acc_list))

------------------------------------
Neural Network
  Mean Accuracy : 0.24715504978662872


## K-Fold Cross Validation 
### Computing cross-validated metrics

In [None]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [None]:
model = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=5000)
scores = cross_val_score(model, X, y, cv=4, scoring="accuracy")

print("Mean Accuracy :", np.mean(scores))

Mean Accuracy : 0.773470839260313




---
https://rathachai.creatier.pro/

## つづく