In [1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
%matplotlib inline

#導入資料
df = pd.read_csv('BloodData2.csv')


In [2]:
#載入模型
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

## 建立模型

In [3]:
from sklearn.ensemble import VotingClassifier

#model1 = SVC('linear',probability=True)
model1 = LogisticRegression(random_state=1)
model2 = tree.DecisionTreeClassifier(random_state=1)

model3 = KNeighborsClassifier(n_neighbors=1)

model = VotingClassifier(estimators=[ ('lr',model1),('dt', model2),('knn', model3)], voting='hard')



In [4]:
#先確認資料INDEX 
print(df.columns)
df.head()

Index(['Recency ', 'Times', 'Monetary ', 'Time', 'Target'], dtype='object')


Unnamed: 0,Recency,Times,Monetary,Time,Target
0,2,50,12500,98,1
1,0,13,3250,28,1
2,1,16,4000,35,1
3,2,20,5000,45,1
4,1,24,6000,77,0


## 處理資料


In [5]:
#載入標準化比例尺（StandardScaler）套件
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(df.drop('Target',axis=1))
scaled_features = scaler.transform(df.drop('Target',axis=1))
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
df_feat.head()

Unnamed: 0,Recency,Times,Monetary,Time
0,-0.927899,7.623346,7.623346,2.615633
1,-1.175118,1.282738,1.282738,-0.257881
2,-1.051508,1.796842,1.796842,0.029471
3,-0.927899,2.482313,2.482313,0.439973
4,-1.051508,3.167784,3.167784,1.753579


In [6]:
from sklearn.model_selection import train_test_split

X = df_feat
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30,random_state=101)

## 訓練模型

In [7]:
model.fit(X_train,y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=1, solver='lbfgs',
                                                 tol=0.0001, verbose=0,
                                                 warm_start=False)),
                             ('dt',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                              

In [9]:
model.score(X_test,y_test)

0.7288888888888889

In [92]:
#測試好壞
from sklearn.metrics import classification_report,confusion_matrix
#print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))

              precision    recall  f1-score   support

           0       0.78      0.89      0.83       168
           1       0.44      0.25      0.31        57

    accuracy                           0.73       225
   macro avg       0.61      0.57      0.57       225
weighted avg       0.69      0.73      0.70       225



## KNN

In [93]:
#K值等於9
knn = KNeighborsClassifier(n_neighbors=9)

knn.fit(X_train,y_train)
pred = knn.predict(X_test)

print('WITH K=9')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

WITH K=9


[[162   6]
 [ 38  19]]


              precision    recall  f1-score   support

           0       0.81      0.96      0.88       168
           1       0.76      0.33      0.46        57

    accuracy                           0.80       225
   macro avg       0.79      0.65      0.67       225
weighted avg       0.80      0.80      0.77       225



## 決策樹

In [94]:
DT = tree.DecisionTreeClassifier(random_state=1)
DT.fit(X_train,y_train)

pred = DT.predict(X_test)

print('WITH DT')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

WITH DT


[[143  25]
 [ 37  20]]


              precision    recall  f1-score   support

           0       0.79      0.85      0.82       168
           1       0.44      0.35      0.39        57

    accuracy                           0.72       225
   macro avg       0.62      0.60      0.61       225
weighted avg       0.71      0.72      0.71       225



## 邏輯回歸

In [95]:
LR = LogisticRegression(random_state=1)
LR.fit(X_train,y_train)

pred = LR.predict(X_test)

print('WITH LR')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

WITH LR


[[165   3]
 [ 54   3]]


              precision    recall  f1-score   support

           0       0.75      0.98      0.85       168
           1       0.50      0.05      0.10        57

    accuracy                           0.75       225
   macro avg       0.63      0.52      0.47       225
weighted avg       0.69      0.75      0.66       225

