In [23]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC

from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV

dataset = pd.read_csv(r'C:\Users\rosti\Downloads\breast+cancer+wisconsin+original\breast-cancer-wisconsin.data')
dataset.columns = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']
clean_dataset = dataset.dropna()

for x in clean_dataset.index:
    if clean_dataset.loc[x, 'Bare Nuclei']=='?':
        clean_dataset.drop(x, inplace = True)

clean_dataset = clean_dataset.astype({'Bare Nuclei':'int64', 'Sample code number':'category'})

clean_dataset = clean_dataset.drop(['Sample code number'], axis = 1)

X = clean_dataset.drop(['Class'], axis = 1)
y = clean_dataset['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

scaler = StandardScaler()

scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test) 

model = SVC(C=1.0, gamma = 1.0, kernel = 'rbf') #класифікація опорних векторів
model.fit(scaled_X_train,y_train)
y_pred = model.predict(scaled_X_test)
print(confusion_matrix(y_test, y_pred))

[[82  7]
 [ 0 48]]


In [24]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           2       1.00      0.92      0.96        89
           4       0.87      1.00      0.93        48

    accuracy                           0.95       137
   macro avg       0.94      0.96      0.95       137
weighted avg       0.96      0.95      0.95       137



In [25]:
param_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} # пошук найкращих значень параметрів «C» і «gamma» за допомогою GridSearch
grid = GridSearchCV(SVC(), param_grid, refit=True)
grid.fit(scaled_X_train,y_train)
grid.best_params_
print (grid.best_params_)

{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}


In [26]:
grid.best_estimator_
grid_predictions = grid.predict(scaled_X_test)
print(confusion_matrix(y_test,grid_predictions))

[[88  1]
 [ 2 46]]


In [27]:
print(classification_report(y_test,grid_predictions))

              precision    recall  f1-score   support

           2       0.98      0.99      0.98        89
           4       0.98      0.96      0.97        48

    accuracy                           0.98       137
   macro avg       0.98      0.97      0.98       137
weighted avg       0.98      0.98      0.98       137

