# SVM

## Przygotowanie danych

In [1]:
from sklearn import datasets
import numpy as np
import pandas as pd

In [2]:
data_breast_cancer = datasets.load_breast_cancer()
#print(data_breast_cancer['DESCR'])

In [3]:
data_iris = datasets.load_iris()
#print(data_iris['DESCR'])

## Klasyfikacja nowotworow

In [4]:
X = data_breast_cancer["data"][:, (3, 4)] # wartości cech area oraz smoothness
y = data_breast_cancer["target"]

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

### LinearSVM z Pipeline

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

# automatyczne skalowanie
svm_clf = Pipeline([("scaler", StandardScaler()), ("linear_svc", LinearSVC(C=1, loss="hinge", random_state=42))])

In [7]:
svm_clf.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge', random_state=42))])

In [8]:
y_train_pred = svm_clf.predict(X_train)
y_test_pred = svm_clf.predict(X_test)

In [9]:
from sklearn.metrics import accuracy_score
accuracy_train_pipeline = accuracy_score(y_train, y_train_pred)
print(accuracy_train_pipeline)

0.8923076923076924


In [10]:
accuracy_test_pipeline = accuracy_score(y_test, y_test_pred)
print(accuracy_test_pipeline)

0.9298245614035088


### LinearSVM bez Pipeline

In [11]:
#bez automatycznego skalowania 
svm_clf_nopip = LinearSVC(C=1, loss="hinge", random_state=42)

In [12]:
svm_clf_nopip.fit(X_train, y_train)



LinearSVC(C=1, loss='hinge', random_state=42)

In [13]:
y_train_pred = svm_clf_nopip.predict(X_train)
y_test_pred = svm_clf_nopip.predict(X_test)

In [14]:
accuracy_train = accuracy_score(y_train, y_train_pred)
print(accuracy_train)

0.6285714285714286


In [15]:
accuracy_test = accuracy_score(y_test, y_test_pred)
print(accuracy_test)

0.6228070175438597


#### zapisanie pliku:

In [16]:
import pickle 
bc_acc = [accuracy_train, accuracy_test, accuracy_train_pipeline, accuracy_test_pipeline]
print(bc_acc)
with open('bc_acc.pkl','wb') as f: pickle.dump(bc_acc, f)

[0.6285714285714286, 0.6228070175438597, 0.8923076923076924, 0.9298245614035088]


* brak skalowania zmaniejszylo skutecznosc

## Klasyfikacja zbioru irysow

In [17]:
X = data_iris["data"][:, (2, 3)] # wartości: dlugosc i szerokosc platka
y = (data_iris["target"] == 2).astype(np.int8)

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

### LinearSVM z Pipeline

In [19]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

# automatyczne skalowanie
svm_clf = Pipeline([("scaler", StandardScaler()), ("linear_svc", LinearSVC(C=1, loss="hinge", random_state=42))])

In [20]:
svm_clf.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge', random_state=42))])

In [21]:
y_train_pred = svm_clf.predict(X_train)
y_test_pred = svm_clf.predict(X_test)

In [22]:
from sklearn.metrics import accuracy_score
accuracy_train_pipeline = accuracy_score(y_train, y_train_pred)
print(accuracy_train_pipeline)

0.9416666666666667


In [23]:
accuracy_test_pipeline = accuracy_score(y_test, y_test_pred)
print(accuracy_test_pipeline)

1.0


### LinearSVM bez Pipeline

In [24]:
#bez automatycznego skalowania 
svm_clf_nopip = LinearSVC(C=1, loss="hinge", random_state=42)

In [25]:
svm_clf_nopip.fit(X_train, y_train)



LinearSVC(C=1, loss='hinge', random_state=42)

In [26]:
y_train_pred = svm_clf_nopip.predict(X_train)
y_test_pred = svm_clf_nopip.predict(X_test)

In [27]:
accuracy_train = accuracy_score(y_train, y_train_pred)
print(accuracy_train)

0.9416666666666667


In [28]:
accuracy_test = accuracy_score(y_test, y_test_pred)
print(accuracy_test)

1.0


#### zapisanie pliku:

In [29]:
import pickle
iris_acc = [accuracy_train, accuracy_test, accuracy_train_pipeline, accuracy_test_pipeline]
print(iris_acc)
with open('iris_acc.pkl','wb') as f: pickle.dump(iris_acc, f)

[0.9416666666666667, 1.0, 0.9416666666666667, 1.0]


* Why jeden?

In [30]:
### 