In [1]:
from sklearn import datasets
import numpy as np

In [2]:
#
data_breast_cancer = datasets.load_breast_cancer(as_frame = True)
# print(data_breast_cancer['DESCR'])
X_cancer = data_breast_cancer["data"][["mean area", "mean smoothness"]]
y_cancer = data_breast_cancer["target"]

X_cancer
y_cancer

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: target, Length: 569, dtype: int64

In [3]:
#
data_iris = datasets.load_iris(as_frame=True)
X_iris = data_iris["data"][["petal length (cm)", "petal width (cm)"]]
y_iris = (data_iris["target"] == 2).astype(np.int8)
X_iris

Unnamed: 0,petal length (cm),petal width (cm)
0,1.4,0.2
1,1.4,0.2
2,1.3,0.2
3,1.5,0.2
4,1.4,0.2
...,...,...
145,5.2,2.3
146,5.0,1.9
147,5.2,2.0
148,5.4,2.3


In [4]:
from sklearn.model_selection import train_test_split
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.2)

In [5]:
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2)

In [6]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [7]:
#cancer without scaling
svm_clf_cancer = Pipeline([("linear_svc", LinearSVC(C=1, loss="hinge",))])
svm_clf_cancer.fit(X_cancer, y_cancer)



Pipeline(steps=[('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [8]:
from sklearn.metrics import accuracy_score
y_cancer_train_pred = svm_clf_cancer.predict(X_cancer_train)
accuracy_cancer_train = accuracy_score(y_cancer_train, y_cancer_train_pred)

y_cancer_test_pred = svm_clf_cancer.predict(X_cancer_test)
accuracy_cancer_test = accuracy_score(y_cancer_test, y_cancer_test_pred)

print(accuracy_cancer_train, accuracy_cancer_test)

0.6263736263736264 0.631578947368421


In [9]:
#cancer with scaling
svm_clf_cancer_scal = Pipeline([
        ("scaler", StandardScaler()), ("linear_svc", LinearSVC(C=1, loss="hinge",))])

svm_clf_cancer_scal.fit(X_cancer, y_cancer)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [10]:
y_cancer_train_pred_scal = svm_clf_cancer_scal.predict(X_cancer_train)
accuracy_cancer_train_scal = accuracy_score(y_cancer_train, y_cancer_train_pred_scal)
print(accuracy_cancer_train_scal)

y_cancer_test_pred_scal = svm_clf_cancer_scal.predict(X_cancer_test)
accuracy_cancer_test_scal = accuracy_score(y_cancer_test, y_cancer_test_pred_scal)
print(accuracy_cancer_test_scal)


0.9076923076923077
0.868421052631579


In [11]:
bc_acc = [accuracy_cancer_train, accuracy_cancer_test, accuracy_cancer_train_scal, accuracy_cancer_test_scal]
bc_acc

[0.6263736263736264, 0.631578947368421, 0.9076923076923077, 0.868421052631579]

In [12]:
import pickle
with open('bc_acc.pkl', 'wb') as fp:
    pickle.dump(bc_acc, fp)

In [13]:
#iris without scaling
svm_clf_iris = Pipeline([("linear_svc", LinearSVC(C=1, loss="hinge",))])
svm_clf_iris.fit(X_iris, y_iris)



Pipeline(steps=[('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [14]:
from sklearn.metrics import accuracy_score

y_iris_train_pred = svm_clf_iris.predict(X_iris_train)
accuracy_iris_train = accuracy_score(y_iris_train, y_iris_train_pred)
print(accuracy_iris_train)

y_iris_test_pred = svm_clf_iris.predict(X_iris_test)
accuracy_iris_test = accuracy_score(y_iris_test, y_iris_test_pred)
print(accuracy_iris_test)

0.95
1.0


In [15]:
#iris with scaling
svm_clf_iris_scal = Pipeline([("scaler", StandardScaler()), ("linear_svc", LinearSVC(C=1, loss="hinge"))])
svm_clf_iris_scal.fit(X_iris, y_iris)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [16]:
y_iris_train_pred_scal = svm_clf_iris_scal.predict(X_iris_train)
accuracy_iris_train_scal = accuracy_score(y_iris_train, y_iris_train_pred_scal)
print(accuracy_iris_train_scal)

y_iris_test_pred_scal = svm_clf_iris_scal.predict(X_iris_test)
accuracy_iris_test_scal = accuracy_score(y_iris_test, y_iris_test_pred_scal)
print(accuracy_iris_test_scal)

0.9583333333333334
0.9666666666666667


In [17]:
iris_acc = [accuracy_iris_train, accuracy_iris_test, accuracy_iris_train_scal, accuracy_iris_test_scal]
iris_acc

[0.95, 1.0, 0.9583333333333334, 0.9666666666666667]

In [18]:
import pickle
with open('iris_acc.pkl', 'wb') as fp:
    pickle.dump(iris_acc, fp)