In [1]:
from utils import Dataloader
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score
import time

In [2]:
def array_to_matrix(arr, n_cols):
    n_rows = len(arr) // n_cols
    return np.array(arr).reshape(n_rows, n_cols)

## Iris Dataset

In [3]:
C = 1.0
clf = SVC(kernel = 'linear', C = C)

#### Iris-virginica & Iris-versicolor

In [4]:
loader = Dataloader(None, None, 'iris', 'Iris-virginica', 'Iris-versicolor')
print(f"Exporting iris to python... ")
data = loader.export_to_python()
X_shape = loader.X_train_shape
X_train = data[0]
y_train = data[1]
y = array_to_matrix(y_train,2)
X = array_to_matrix(X_train,X_shape[1])
y = np.array([np.argmax(np.append(i,0)) for i in y])

start_time = time.time()
clf.fit(X, y) 
training_time = time.time() - start_time
print("Training time:", training_time, "seconds")

fetching iris dataset
(150, 4) (150,)
Dataset:  iris
X_train:  [[ 2.02877297  0.38660992  2.06223168  1.00321947]
 [-0.39726347  0.38660992 -0.12904165  0.29339437]]
y_train:  [[-1.  1.]
 [-1.  1.]]
X_test:  [[ 0.05761837 -0.21746808  0.23617057 -0.41643072]
 [ 0.05761837  0.08457092  0.84485761  0.29339437]]
y_test:  [[-1.  1.]
 [-1.  1.]]
(80, 4) (80, 2) (20, 4) (20, 2)
Exporting iris to python... 
Training time: 0.000997304916381836 seconds


In [5]:
X_test = array_to_matrix(data[2],X_shape[1])
y_test = array_to_matrix(data[3],2)
y_test = np.array([np.argmax(np.append(i,0)) for i in y_test])

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


[1 1 1 0 0 0 1 1 0 0 1 0 1 1 1 0 1 1 0 0]
Accuracy: 0.85
Precision: 0.7272727272727273
Recall: 1.0
F1-score: 0.8421052631578948


#### Iris-setosa & Iris-versicolor

In [6]:
loader = Dataloader(None, None, 'iris', 'Iris-setosa', 'Iris-versicolor')
print(f"Exporting iris to python... ")
data = loader.export_to_python()
X_shape = loader.X_train_shape
X_train = data[0]
y_train = data[1]
y = array_to_matrix(y_train,2)
X = array_to_matrix(X_train,X_shape[1])
y = np.array([np.argmax(np.append(i,0)) for i in y])

start_time = time.time()
clf.fit(X, y) 
training_time = time.time() - start_time
print("Training time:", training_time, "seconds")

fetching iris dataset
(150, 4) (150,)
Dataset:  iris
X_train:  [[ 0.35866332 -0.62068428  1.1364712   0.91401319]
 [ 0.20204178 -0.19845007  0.85894465  0.91401319]]
y_train:  [[-1.  1.]
 [-1.  1.]]
X_test:  [[ 0.82852793 -0.83180138  1.55276101  1.44644806]
 [ 0.04542025 -1.67626978  0.78956302  0.91401319]]
y_test:  [[-1.  1.]
 [-1.  1.]]
(80, 4) (80, 2) (20, 4) (20, 2)
Exporting iris to python... 
Training time: 0.0 seconds


In [7]:
X_test = array_to_matrix(data[2],X_shape[1])
y_test = array_to_matrix(data[3],2)
y_test = np.array([np.argmax(np.append(i,0)) for i in y_test])

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


[1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0


#### Iris-setosa & Iris-virginica

In [8]:
loader = Dataloader(None, None, 'iris', 'Iris-setosa', 'Iris-virginica')
print(f"Exporting iris to python... ")
data = loader.export_to_python()
X_shape = loader.X_train_shape
X_train = data[0]
y_train = data[1]
y = array_to_matrix(y_train,2)
X = array_to_matrix(X_train,X_shape[1])
y = np.array([np.argmax(np.append(i,0)) for i in y])
clf.fit(X, y) 

fetching iris dataset
(150, 4) (150,)
Dataset:  iris
X_train:  [[ 1.91690205 -0.47344653  1.48387657  1.05511751]
 [ 0.21582425 -0.47344653  0.62004157  0.7271017 ]]
y_train:  [[-1.  1.]
 [-1.  1.]]
X_test:  [[ 0.53477633 -0.95655523  0.76401407  0.3990859 ]
 [ 0.53477633 -0.71500088  1.00396823  0.7271017 ]]
y_test:  [[-1.  1.]
 [-1.  1.]]
(80, 4) (80, 2) (20, 4) (20, 2)
Exporting iris to python... 


#### Result

In [9]:
w_sklearn = clf.coef_.reshape(-1, 1)
b_sklearn = clf.intercept_[0]
print('W =',w_sklearn.T)
print('b =',b_sklearn)

W = [[ 0.09770021 -0.13776356  0.66345363  0.65728682]]
b = 0.1394514488987482


#### Test

In [10]:
X_test = array_to_matrix(data[2],X_shape[1])
y_test = array_to_matrix(data[3],2)
y_test = np.array([np.argmax(np.append(i,0)) for i in y_test])

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


[1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0


## Mushroom Dataset

In [11]:
mushroom = Dataloader(None, None, 'mushroom', None, None)
print(f"Exporting mushroom to python... ")
data = mushroom.export_to_python()

fetching mushroom dataset
Dataset:  mushroom
X_train:  [[-0.21699152  0.14012794 -0.98389939 -0.84322964  1.3573133   0.16289645
  -0.43886364  1.49468272 -1.35889624  0.87351064  1.35578135  0.68377765
  -0.89305291  0.0965768   0.63199138  0.          0.14203663 -0.25613174
  -1.27221574  1.42842641  0.28432981 -0.8771691 ]
 [ 1.02971224  0.14012794 -0.19824983 -0.84322964 -1.01956488  0.16289645
  -0.43886364  1.49468272 -1.35889624  0.87351064  1.35578135 -0.9254372
   0.58638466  0.62244139  0.63199138  0.          0.14203663 -0.25613174
  -1.27221574  1.42842641  0.28432981  1.44858865]]
y_train:  [[-1.  1.]
 [-1.  1.]]
X_test:  [[-0.8403434  -1.48615695 -0.19824983 -0.84322964  0.40656203  0.16289645
   2.27861212 -0.66903831 -0.51147238  0.87351064  0.20869036  0.68377765
  -2.37249048  0.62244139  0.63199138  0.          0.14203663 -0.25613174
  -1.27221574 -0.2504706  -0.5143892  -0.29572966]
 [-0.8403434   0.14012794 -0.98389939 -0.84322964  1.83268894  0.16289645
  -0.43886

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.loc[:, col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.loc[:, col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.loc[:, col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[

In [12]:
C = 1.0
clf_mushroom = SVC(kernel = 'linear', C = C)
X_shape = mushroom.X_train_shape
X_train = data[0]
y_train = data[1]
y = array_to_matrix(y_train,2)
X = array_to_matrix(X_train,X_shape[1])
y = np.array([np.argmax(np.append(i,0)) for i in y])

start_time = time.time()
clf_mushroom.fit(X, y) 
training_time = time.time() - start_time
print("Training time:", training_time, "seconds")


Training time: 0.7565584182739258 seconds


In [13]:
w_sklearn = clf_mushroom.coef_.reshape(-1, 1)
b_sklearn = clf_mushroom.intercept_[0]
print('W =',w_sklearn.T)
print('b =',b_sklearn)

W = [[ 2.65812327e-06  2.87688240e-04 -3.81343132e-05 -1.24107312e+00
  -3.99120878e-01 -2.44862883e-02 -3.54136609e+00  4.01116900e+00
  -7.06083927e-04  9.77820723e-01 -3.93618812e+00 -1.44457578e+00
  -4.40555827e-05 -2.38037885e-05  1.93875009e-04  0.00000000e+00
   4.02360367e-01  1.07740314e+00 -1.82613065e-01 -2.09304631e-03
  -2.05477783e+00 -5.02723343e-05]]
b = -0.817683691364495


In [14]:
X_test = array_to_matrix(data[2],X_shape[1])
y_test = array_to_matrix(data[3],2)
y_test = np.array([np.argmax(np.append(i,0)) for i in y_test])

y_pred = clf_mushroom.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(y_pred)
print("Accuracy:", accuracy)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

[0 1 1 ... 1 1 1]
Accuracy: 0.9655384615384616
Precision: 0.9571788413098237
Recall: 0.9718670076726342
F1-score: 0.9644670050761421


## MNIST dataset

In [18]:
from utils import Dataloader
mnist = Dataloader(None, None, 'mnist', None, None)
print(f"Exporting mnist to python... ")
data = mnist.export_to_python()

[1 0 1 0 0 1 0 0 1 1]
Dataset:  mnist
X_train:  [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
y_train:  [[ 1. -1.]
 [-1.  1.]]
X_test:  [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
y_test:  [[-1.  1.]
 [ 1. -1.]]
(12665, 784) (12665, 2) (2115, 784) (2115, 2)
Exporting mnist to python... 


In [19]:
C = 1.0
clf_mnist = SVC(kernel = 'linear', C = C)
X_shape = mnist.X_train_shape
X_train = data[0]
y_train = data[1]
y = array_to_matrix(y_train,2)
X = array_to_matrix(X_train,X_shape[1])
y = np.array([np.argmax(np.append(i,0)) for i in y])

start_time = time.time()
clf_mnist.fit(X, y) 
training_time = time.time() - start_time
print("Training time:", training_time, "seconds")

Training time: 2.0065367221832275 seconds


In [20]:
X_test = array_to_matrix(data[2],X_shape[1])
y_test = array_to_matrix(data[3],2)
y_test = np.array([np.argmax(np.append(i,0)) for i in y_test])

y_pred = clf_mnist.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(y_pred)
print("Accuracy:", accuracy)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

[1 0 1 ... 1 0 1]
Accuracy: 0.9995271867612293
Precision: 0.9991197183098591
Recall: 1.0
F1-score: 0.9995596653456628
