# Logistic Regression  
IRIS dataset description:  鳶尾花資料庫
https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html    
https://scikit-learn.org/stable/datasets/index.html#iris-dataset  
logistic regression model:   
http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html 

In [1]:
import numpy as np
from sklearn.linear_model  import LogisticRegression
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix

# import some data to play with
iris = datasets.load_iris()

#X = iris.data[:, :2]  # we only take the first two features.
X = iris.data
y = iris.target
#print(y) #0,1,2

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1) #random_state 種子值

#資料前處理，標準化
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

#n_jobs,定義在模型訓練中允許使用的 CPU 核心數。n_job=-1,使用所有可用的 CPU 核心
model=LogisticRegression(n_jobs=-1)
model.fit(X_train,y_train)

X_test_nor = scaler.transform(X_test)
y_pred = model.predict(X_test_nor)
accuracy = accuracy_score(y_test, y_pred)

print(model.coef_)
#print prediction result
print(y_pred)
#print accuracy 
print(accuracy)

#dump 是 joblib 模組中的函數，用來將模型或其他 Python 對象序列化並存儲到檔案中，方便後續載入使用或共享。
#將 model 保存為一個名為 logistic_model.joblib 的檔案。
from joblib import dump, load
dump(model, 'logistic_model.joblib') 
print("模型已成功保存！")

[[-1.02143337  0.93456851 -1.70683051 -1.5811216 ]
 [ 0.64560105 -0.4830725  -0.27263008 -0.70651302]
 [ 0.37583231 -0.45149601  1.97946059  2.28763461]]
[0 1 1 0 2 1 2 0 0 2 1 0 2 1 1 0 1 1 0 0 1 1 2 0 2 1 0 0 1 2 1 2 1 2 2 0 1
 0 1 2 2 0 1 2 1]
0.9555555555555556
模型已成功保存！


In [2]:
#載入模型
logistic_model = load('logistic_model.joblib')
print("模型已成功載入！") 

y_pred = logistic_model.predict(X_test_nor)
accuracy = logistic_model.score(X_test_nor, y_test)

print(model.coef_)
#print prediction result
print(y_pred)
#print accuracy 
print(accuracy)

模型已成功載入！
[[-1.02143337  0.93456851 -1.70683051 -1.5811216 ]
 [ 0.64560105 -0.4830725  -0.27263008 -0.70651302]
 [ 0.37583231 -0.45149601  1.97946059  2.28763461]]
[0 1 1 0 2 1 2 0 0 2 1 0 2 1 1 0 1 1 0 0 1 1 2 0 2 1 0 0 1 2 1 2 1 2 2 0 1
 0 1 2 2 0 1 2 1]
0.9555555555555556


# K-Nearest Neighbor  
IRIS dataset description:  
https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html    
https://scikit-learn.org/stable/datasets/index.html#iris-dataset  
KNN model:   
http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html

In [3]:
import numpy as np
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix

# import some data to play with
iris = datasets.load_iris()

X = iris.data[:, :2] # we only take the first two features. 
#X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#資料前處理，標準化
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

model = neighbors.KNeighborsClassifier(n_neighbors=3)
model.fit(X_train, y_train)


X_test = scaler.transform(X_test)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
num_correct_samples = accuracy_score(y_test, y_pred, normalize=False)
con_matrix = confusion_matrix(y_test, y_pred)

print('number of correct sample: {}'.format(num_correct_samples))
print('accuracy: {}'.format(accuracy))
print('confusion matrix: {}'.format(con_matrix))


number of correct sample: 21.0
accuracy: 0.7
confusion matrix: [[12  1  0]
 [ 0  5  4]
 [ 0  4  4]]
