In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import pickle as pkl

In [2]:
####EXAMPLE WITH BREAST CANCER
data_breast_cancer = datasets.load_breast_cancer()
X_b=data_breast_cancer["data"]
y_b=data_breast_cancer["target"]

In [3]:
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_b, y_b, test_size=0.2, random_state=42) 

In [4]:
mean_area_idx = data_breast_cancer.feature_names.tolist().index('mean area')
print(mean_area_idx)
mean_smoothness_idx = data_breast_cancer.feature_names.tolist().index('mean smoothness')
print(mean_smoothness_idx)
X_train_area_smoothness = X_train_b[:, [mean_area_idx, mean_smoothness_idx]]
X_test_area_smoothness = X_test_b[:, [mean_area_idx, mean_smoothness_idx]]

3
4


In [5]:
####Without Scaling
svm_clf_1 = LinearSVC(loss='hinge', random_state=42)
svm_clf_1.fit(X_train_area_smoothness, y_train_b)



In [6]:
####With Scaling
scaler=StandardScaler()
svm_clf_2=LinearSVC(loss='hinge', random_state=42)

X_train_area_smoothness_scaled=scaler.fit_transform(X_train_area_smoothness)
X_test_area_smoothness_scaled=scaler.transform(X_test_area_smoothness)

svm_clf_2.fit(X_train_area_smoothness_scaled, y_train_b)

In [8]:
score_list=[accuracy_score(y_train_b, svm_clf_1.predict(X_train_area_smoothness)),
            accuracy_score(y_test_b,svm_clf_1.predict(X_test_area_smoothness)),
            accuracy_score(y_train_b, svm_clf_2.predict(X_train_area_smoothness_scaled)),
            accuracy_score(y_test_b, svm_clf_2.predict(X_test_area_smoothness_scaled))]
print(score_list)

[0.6285714285714286, 0.6228070175438597, 0.8923076923076924, 0.9298245614035088]


In [9]:
with open("bc_acc.pkl", 'wb') as file:
    pkl.dump(score_list, file)

In [11]:
###EXAMPLE WITH IRIS
data_iris = datasets.load_iris()
X_i=data_iris["data"][:,(2,3)]
y_i=(data_iris["target"]==2).astype(np.int8)

In [12]:
X_train_i, X_test_i, y_train_i, y_test_i=train_test_split(X_i,y_i,test_size=0.2,random_state=42)

In [13]:
####Without Scaling
svm_clf_1_iris=LinearSVC(loss='hinge', random_state=42)
svm_clf_1_iris.fit(X_train_i, y_train_i)



In [14]:
####With Scaling
svm_clf_2_iris=LinearSVC(loss='hinge', random_state=42)
scaler=StandardScaler()
X_train_i_scaled=scaler.fit_transform(X_train_i)
X_test_i_scaled=scaler.transform(X_test_i)
svm_clf_2_iris.fit(X_train_i_scaled, y_train_i)

In [15]:
score_list_iris=[accuracy_score(y_train_i, svm_clf_1_iris.predict(X_train_i)),
                 accuracy_score(y_test_i, svm_clf_1_iris.predict(X_test_i)),
                 accuracy_score(y_train_i,svm_clf_2_iris.predict(X_train_i_scaled)),
                 accuracy_score(y_test_i, svm_clf_2_iris.predict(X_test_i_scaled))
                ]
print(score_list_iris)

[0.9416666666666667, 1.0, 0.9416666666666667, 1.0]


In [16]:
print(svm_clf_2_iris.predict([[5.5, 1.7], [4.5, 1.7]]))

[1 1]


In [17]:
with open("iris_acc.pkl", 'wb') as file:
    pkl.dump(score_list_iris,file)