In [16]:
# 导入依赖包
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import  classification_report
from sklearn.model_selection import GridSearchCV
import joblib
import time


In [8]:
# 载入数据集
cancer = load_breast_cancer()
X = cancer.data
Y = cancer.target
print(f'The shape of cancer dataset is {X.shape}')
print(f'The shape of label is {Y.shape}')


Thes shape of cancer dataset is (569, 30)
Thes shape of label is (569,)


In [10]:
# 划分训练集合测试集，比例为 8:2
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
print(f'The shape of X_train is {X_train.shape}')
print(f'The shape of y_train is {y_train.shape}')
print(f'The shape of X_test is {X_test.shape}')
print(f'The shape of y_test is {y_test.shape}')

The shape of X_train is (455, 30)
The shape of y_train is (455,)
The shape of X_test is (114, 30)
The shape of y_test is (114,)


In [13]:
# 建模
clf_svc = SVC()
clf_svc.fit(X_train, y_train)


['svc_breast_cancer.m']

In [18]:
# 模型评估
accuracy = clf_svc.score(X_test, y_test)
print(f'The accuracy of the SVC model is {accuracy*100}%')
y_pred = clf_svc.predict(X_test)
print(f'The report of the SVC model is \n {classification_report(y_test, y_pred)}')

The accuracy of the SVC model is 86.8421052631579%
The report of the SVC model is 
               precision    recall  f1-score   support

           0       0.97      0.67      0.79        43
           1       0.83      0.99      0.90        71

    accuracy                           0.87       114
   macro avg       0.90      0.83      0.85       114
weighted avg       0.88      0.87      0.86       114



In [25]:
# 参数调优
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(X_train, y_train)
print(f'The best parameters are {clf.best_params_}')
accuracy = clf.score(X_test, y_test)
print(f'The accuracy of the SVC model is {accuracy*100}%')
y_pred = clf.predict(X_test)
print(f'The report of the SVC model is \n {classification_report(y_test, y_pred)}')
# 保存模型
joblib.dump(clf, "svc_breast_cancer.m")

The best parameters are {'C': 10, 'kernel': 'linear'}
The accuracy of the SVC model is 98.24561403508771%
The report of the SVC model is 
               precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



['svc_breast_cancer.m']

In [29]:
# 模拟线上部署
clf_online = joblib.load("svc_breast_cancer.m")
for i, e in enumerate(X_test):
    result_SVM = clf_online.predict([e])
    print(f"--------------The {i+1}th data result is: ---------------------- ")
    print(f"The Result of Model is {result_SVM[0]}")
    print(f"The True value is {y_test[i]} \n")
    time.sleep(1)

--------------The 1th data result is: ---------------------- 
The Result of Model is 1
The True value is 1 

--------------The 2th data result is: ---------------------- 
The Result of Model is 0
The True value is 0 

--------------The 3th data result is: ---------------------- 
The Result of Model is 0
The True value is 0 

--------------The 4th data result is: ---------------------- 
The Result of Model is 1
The True value is 1 

--------------The 5th data result is: ---------------------- 
The Result of Model is 1
The True value is 1 

--------------The 6th data result is: ---------------------- 
The Result of Model is 1
The True value is 1 

--------------The 7th data result is: ---------------------- 
The Result of Model is 1
The True value is 1 

--------------The 8th data result is: ---------------------- 
The Result of Model is 0
The True value is 0 

--------------The 9th data result is: ---------------------- 
The Result of Model is 1
The True value is 1 



KeyboardInterrupt: 