In [173]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle

In [174]:
from sklearn import datasets
data_breast_cancer = datasets.load_breast_cancer(as_frame=True)
print(data_breast_cancer['data'].shape)
# print(data_breast_cancer['DESCR'])
print(data_breast_cancer['data'].columns)

(569, 30)
Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'],
      dtype='object')


In [175]:
data_iris = datasets.load_iris(as_frame=True)
print(data_iris['data'].shape)
#print(data_iris['DESCR'])


(150, 4)


## Klasyfikacja
### Breast cancer

In [176]:
# load data
X_train, X_test, y_train, y_test = train_test_split(data_breast_cancer['data'][['mean smoothness', 'mean area']], data_breast_cancer['target'], test_size=0.2)

In [177]:
# create SVM model
svm_clf = Pipeline([("linear_svc", LinearSVC(C=1,
                                            loss="hinge")),
                    ])
svm_clf.fit(X_train, y_train)

# predict
y_predict = svm_clf.predict(X_test)
print("Accuracy of breast cancer data: ", accuracy_score(y_test, y_predict))
y_predict_train = svm_clf.predict(X_train)

Accuracy of breast cancer data:  0.6666666666666666




In [178]:
# create SVM models with scaling

svm_clf_scaled = Pipeline([
                        ("scaler", StandardScaler()),
                        ("linear_svc", LinearSVC(C=1,
                                                loss="hinge",
                                                random_state=42)),
                        ])
svm_clf_scaled = svm_clf_scaled.fit(X_train, y_train)

# predict
y_predict_scaled = svm_clf_scaled.predict(X_test)
print("Accuracy of breast cancer data with scaling: ", accuracy_score(y_test, y_predict_scaled))
y_predict_train_scaled = svm_clf_scaled.predict(X_train)

Accuracy of breast cancer data with scaling:  0.9122807017543859


In [179]:
accuracy_array = [
                  accuracy_score(y_train, y_predict_train), # train accuracy
                  accuracy_score(y_test, y_predict), # test accuracy
                  accuracy_score(y_train, y_predict_train_scaled), # train accuracy with scaling
                  accuracy_score(y_test, y_predict_scaled)] # test accuracy with scaling
# save to pickle file
with open('bc_acc.pkl', 'wb') as f:
    pickle.dump(accuracy_array, f)
print(accuracy_array)
                  

[0.6175824175824176, 0.6666666666666666, 0.9076923076923077, 0.9122807017543859]


### Iris

In [180]:
X_train, X_test, y_train, y_test = train_test_split(data_iris['data'], data_iris['target'], test_size=0.2)

In [181]:
# create svm model
svm_clf = Pipeline([
                        ("linear_svc", LinearSVC(C=1,
                                                loss="hinge",
                                                random_state=42)),
                        ])
svm_clf.fit(X_train, y_train)
# predict
y_predict = svm_clf.predict(X_test)
print("Accuracy of iris data: ", accuracy_score(y_test, y_predict))
y_predict_train = svm_clf.predict(X_train)

Accuracy of iris data:  0.9




In [182]:
# create SVM models with scaling
svm_clf_scaled = Pipeline([
                        ("scaler", StandardScaler()),
                        ("linear_svc", LinearSVC(C=1,
                                                loss="hinge",
                                                random_state=42)),
                        ])
svm_clf_scaled = svm_clf_scaled.fit(X_train, y_train)

# predict
y_predict_scaled = svm_clf_scaled.predict(X_test)
print("Accuracy of iris data with scaling: ", accuracy_score(y_test, y_predict_scaled))
y_predict_train_scaled = svm_clf_scaled.predict(X_train)

Accuracy of iris data with scaling:  0.9




In [183]:
accuracy_array = [
                  accuracy_score(y_train, y_predict_train), # train accuracy
                  accuracy_score(y_test, y_predict), # test accuracy
                  accuracy_score(y_train, y_predict_train_scaled), # train accuracy with scaling
                  accuracy_score(y_test, y_predict_scaled)] # test accuracy with scaling
# save to pickle file
with open('iris_acc.pkl', 'wb') as f:
    pickle.dump(accuracy_array, f)
print(accuracy_array)
                  

[0.95, 0.9, 0.925, 0.9]
