# This lab is to deal with **SVM** to classification tasks and compare its performance with other competitive algorithms. In general, **SVM** is one of the most popular and widely used supervised machine learning algorithms.

*   **Deadline: 23:59, 17/03/2023**



# Import libraries

In [97]:
# code
from sklearn import datasets
import sklearn.svm as svm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree as treeModel
from prettytable import PrettyTable
from google.colab import drive
import pandas as pd
import warnings
warnings.filterwarnings('ignore')


#Task 1. 
For breast cancer dataset (https://tinyurl.com/3vme8hr3) which could be loaded from datasets in sklearn as follows:

```
#Import scikit-learn dataset library
from sklearn import datasets

#Load dataset
cancer = datasets.load_breast_cancer()
```

*   1.1.	Apply SVM algorithm to above dataset using linear kernel.
*   1.2.	Compare the obtained results with other competitive algorithms (Logistic Regression, Decision Tree, kNN) based on metrics: accuracy, precision, recall, f1 measures.



In [52]:
cancer = datasets.load_breast_cancer()
X_task1 = cancer['data']
Y_task1 = cancer['target']
X_train, X_test, y_train, y_test = train_test_split(X_task1,Y_task1, test_size=0.3, random_state=42)
SVM = svm.SVC(kernel='linear')
SVM.fit(X_train,y_train)

In [53]:
y_pred = SVM.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracySVM = metrics.accuracy_score(y_test, y_pred)
precisionSVM =metrics.precision_score(y_test, y_pred,average='weighted')
recallSVM = metrics.recall_score(y_test, y_pred,average='weighted')
f1SVM = metrics.f1_score(y_test, y_pred,average='weighted')


              precision    recall  f1-score   support

           0       0.97      0.94      0.95        63
           1       0.96      0.98      0.97       108

    accuracy                           0.96       171
   macro avg       0.97      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



Logistic

In [54]:
logistic_task1 = LogisticRegression()
logistic_task1.fit(X_train,y_train)

In [55]:
y_pred = logistic_task1.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

accuracyLogistic = metrics.accuracy_score(y_test, y_pred)
precisionLogistic =metrics.precision_score(y_test, y_pred,average='weighted')
recallLogistic = metrics.recall_score(y_test, y_pred,average='weighted')
f1Logistic = metrics.f1_score(y_test, y_pred,average='weighted')


              precision    recall  f1-score   support

           0       0.98      0.94      0.96        63
           1       0.96      0.99      0.98       108

    accuracy                           0.97       171
   macro avg       0.97      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171



KNN

In [56]:
kNN_task1 = KNeighborsClassifier(n_neighbors=10)
kNN_task1.fit(X_train,y_train)

In [57]:
y_pred = kNN_task1.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_kNN = metrics.accuracy_score(y_test, y_pred)
precision_kNN =metrics.precision_score(y_test, y_pred,average='weighted')
recall_kNN = metrics.recall_score(y_test, y_pred,average='weighted')
f1_kNN = metrics.f1_score(y_test, y_pred,average='weighted')


              precision    recall  f1-score   support

           0       0.98      0.97      0.98        63
           1       0.98      0.99      0.99       108

    accuracy                           0.98       171
   macro avg       0.98      0.98      0.98       171
weighted avg       0.98      0.98      0.98       171



Decision Tree


In [58]:
tree_task1 = tree.DecisionTreeClassifier()
tree_task1.fit(X_train,y_train)

In [59]:
y_pred = tree_task1.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_tree = metrics.accuracy_score(y_test, y_pred)
precision_tree =metrics.precision_score(y_test, y_pred,average='weighted')
recall_tree = metrics.recall_score(y_test, y_pred,average='weighted')
f1_tree = metrics.f1_score(y_test, y_pred,average='weighted')
print(accuracy_tree, precision_tree, recall_tree, f1_tree)

              precision    recall  f1-score   support

           0       0.87      0.92      0.89        63
           1       0.95      0.92      0.93       108

    accuracy                           0.92       171
   macro avg       0.91      0.92      0.91       171
weighted avg       0.92      0.92      0.92       171

0.9181286549707602 0.9201462324007492 0.9181286549707602 0.918615842945535


Compare

In [60]:
t = PrettyTable(["Metrics", "SVM", "Logistic", "kNN", "Decision Tree"])
t.add_row(["Accuracy", round(accuracySVM, 4), round(accuracyLogistic, 4), round(accuracy_kNN, 4),round(accuracy_tree, 4) ])
t.add_row(["Precision",round(precisionSVM, 4) ,round(precisionLogistic, 4) ,round(precision_kNN, 4) ,round(precision_tree, 4) ])
t.add_row(["Recall",round(recallSVM, 4) ,round(recallLogistic, 4) ,round(recall_kNN, 4) ,round(recall_tree, 4) ])
t.add_row(["F1",round(f1SVM, 4) ,round(f1Logistic, 4) ,round(f1_kNN, 4) ,round(f1_tree, 4) ])
print(t)

+-----------+--------+----------+--------+---------------+
|  Metrics  |  SVM   | Logistic |  kNN   | Decision Tree |
+-----------+--------+----------+--------+---------------+
|  Accuracy | 0.9649 |  0.9708  | 0.9825 |     0.9181    |
| Precision | 0.965  |  0.9711  | 0.9825 |     0.9201    |
|   Recall  | 0.9649 |  0.9708  | 0.9825 |     0.9181    |
|     F1    | 0.9648 |  0.9706  | 0.9824 |     0.9186    |
+-----------+--------+----------+--------+---------------+


#Task 2. 

*   1.1.	Perform SVM algorithm to **Iris dataset** using **linear kernel**.
*   1.2.	Compare the obtained results in 1.1 with SVM using other kernels (**Polynomial Kernel, Gaussian Kernel, Sigmoid Kernel, Radial Basis Function Kernel**). Some metrics could be used: accuracy, precision, recall, f1 measures





In [61]:
# code
data_task2 = datasets.load_iris()
X_task2 = data_task2["data"]
Y_task2 = data_task2["target"]
X_train, X_test, y_train, y_test = train_test_split(X_task2,Y_task2, test_size=0.3, random_state=42)


Linear Kernel


In [62]:
SVM = svm.SVC(kernel='linear')
SVM.fit(X_train,y_train)

In [63]:
y_pred = SVM.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_linear = metrics.accuracy_score(y_test, y_pred)
precision_linear =metrics.precision_score(y_test, y_pred,average='weighted')
recall_linear = metrics.recall_score(y_test, y_pred,average='weighted')
f1_linear = metrics.f1_score(y_test, y_pred,average='weighted')

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



Polynomial Kernel

In [64]:
SVM = svm.SVC(kernel='poly', degree=4)
SVM.fit(X_train,y_train)

In [65]:
y_pred = SVM.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_poly = metrics.accuracy_score(y_test, y_pred)
precision_poly =metrics.precision_score(y_test, y_pred,average='weighted')
recall_poly = metrics.recall_score(y_test, y_pred,average='weighted')
f1_poly = metrics.f1_score(y_test, y_pred,average='weighted')

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



Gaussian Kernel


In [66]:
SVM = svm.SVC(kernel='rbf')
SVM.fit(X_train,y_train)

In [67]:
y_pred = SVM.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_rbf = metrics.accuracy_score(y_test, y_pred)
precision_rbf =metrics.precision_score(y_test, y_pred,average='weighted')
recall_rbf = metrics.recall_score(y_test, y_pred,average='weighted')
f1_rbf = metrics.f1_score(y_test, y_pred,average='weighted')

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



Sigmoid

In [68]:
SVM = svm.SVC(kernel='sigmoid')
X_train, X_test, y_train, y_test = train_test_split(X_task2,Y_task2, test_size=0.2, random_state=42)

SVM.fit(X_train,y_train)

In [69]:
y_pred = SVM.predict(X_test)

accuracy_sigmoid = metrics.accuracy_score(y_test, y_pred)
precision_sigmoid =metrics.precision_score(y_test, y_pred,average='weighted')
recall_sigmoid = metrics.recall_score(y_test, y_pred,average='weighted')
f1_sigmoid = metrics.f1_score(y_test, y_pred,average='weighted')
print(accuracy_sigmoid)

0.3


Compare

In [70]:
t = PrettyTable(["Metrics", "Linear ", "Poly", "Gaussian Kernel", "Sigmoid Kernel"])
t.add_row(["Accuracy", round(accuracy_linear, 4), round(accuracy_poly, 4), round(accuracy_rbf, 4),round(accuracy_sigmoid, 4) ])
t.add_row(["Precision",round(precision_linear, 4) ,round(precision_poly, 4) ,round(precision_rbf, 4) ,round(precision_sigmoid, 4) ])
t.add_row(["Recall",round(recall_linear, 4) ,round(recall_poly, 4) ,round(recall_rbf, 4) ,round(recall_sigmoid, 4) ])
t.add_row(["F1",round(f1_linear, 4) ,round(f1_poly, 4) ,round(f1_rbf, 4) ,round(f1_sigmoid, 4) ])
print(t)

+-----------+---------+------+-----------------+----------------+
|  Metrics  | Linear  | Poly | Gaussian Kernel | Sigmoid Kernel |
+-----------+---------+------+-----------------+----------------+
|  Accuracy |   1.0   | 1.0  |       1.0       |      0.3       |
| Precision |   1.0   | 1.0  |       1.0       |      0.09      |
|   Recall  |   1.0   | 1.0  |       1.0       |      0.3       |
|     F1    |   1.0   | 1.0  |       1.0       |     0.1385     |
+-----------+---------+------+-----------------+----------------+


#Task 3. 
Compare the performance of selected classification algorithms (Decision Tree, kNN, Logistic Regression) and SVM (using different kernels) with mnist dataset based on accuracy, precision, recall, f1 measures.


In [71]:
mnist = datasets.load_digits()
X = mnist["data"]
y = mnist["target"]
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)


In [72]:
logistic_task3 = LogisticRegression()
logistic_task3.fit(X_train,y_train)


In [74]:
y_pred = logistic_task3.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

accuracyLogistic = metrics.accuracy_score(y_test, y_pred)
precisionLogistic =metrics.precision_score(y_test, y_pred,average='weighted')
recallLogistic = metrics.recall_score(y_test, y_pred,average='weighted')
f1Logistic = metrics.f1_score(y_test, y_pred,average='weighted')


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.94      0.94      0.94        50
           2       0.96      0.98      0.97        47
           3       0.98      0.96      0.97        54
           4       1.00      0.97      0.98        60
           5       0.94      0.94      0.94        66
           6       0.96      0.98      0.97        53
           7       0.98      0.96      0.97        55
           8       0.91      0.98      0.94        43
           9       0.97      0.95      0.96        59

    accuracy                           0.96       540
   macro avg       0.96      0.97      0.96       540
weighted avg       0.97      0.96      0.96       540



In [75]:
kNN = KNeighborsClassifier(n_neighbors=10)
kNN.fit(X_train,y_train)

In [76]:
y_pred = kNN.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_kNN = metrics.accuracy_score(y_test, y_pred)
precision_kNN =metrics.precision_score(y_test, y_pred,average='weighted')
recall_kNN = metrics.recall_score(y_test, y_pred,average='weighted')
f1_kNN = metrics.f1_score(y_test, y_pred,average='weighted')

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.96      1.00      0.98        50
           2       1.00      1.00      1.00        47
           3       0.98      1.00      0.99        54
           4       0.97      1.00      0.98        60
           5       0.98      0.94      0.96        66
           6       0.98      1.00      0.99        53
           7       0.98      1.00      0.99        55
           8       1.00      0.95      0.98        43
           9       0.96      0.93      0.95        59

    accuracy                           0.98       540
   macro avg       0.98      0.98      0.98       540
weighted avg       0.98      0.98      0.98       540



In [77]:
tree = tree.DecisionTreeClassifier()
tree.fit(X_train,y_train)

In [78]:
y_pred = tree.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_tree = metrics.accuracy_score(y_test, y_pred)
precision_tree =metrics.precision_score(y_test, y_pred,average='weighted')
recall_tree = metrics.recall_score(y_test, y_pred,average='weighted')
f1_tree = metrics.f1_score(y_test, y_pred,average='weighted')
print(accuracy_tree, precision_tree, recall_tree, f1_tree)

              precision    recall  f1-score   support

           0       1.00      0.91      0.95        53
           1       0.73      0.70      0.71        50
           2       0.78      0.74      0.76        47
           3       0.81      0.87      0.84        54
           4       0.82      0.83      0.83        60
           5       0.94      0.88      0.91        66
           6       0.88      0.96      0.92        53
           7       0.84      0.84      0.84        55
           8       0.78      0.84      0.81        43
           9       0.83      0.83      0.83        59

    accuracy                           0.84       540
   macro avg       0.84      0.84      0.84       540
weighted avg       0.84      0.84      0.84       540

0.8425925925925926 0.844352583555876 0.8425925925925926 0.8427061442422843


In [79]:
def initSVM(tpye,degree= 3 ):
  SVM = svm.SVC(kernel=tpye, degree = degree)
  SVM.fit(X_train,y_train)
  y_pred = SVM.predict(X_test)
  accuracy = metrics.accuracy_score(y_test, y_pred)
  precision =metrics.precision_score(y_test, y_pred,average='weighted')
  recall = metrics.recall_score(y_test, y_pred,average='weighted')
  f1 = metrics.f1_score(y_test, y_pred,average='weighted')
  return [accuracy, precision, recall, f1]

In [80]:
linear = initSVM("linear")
poly = initSVM("poly")
rbf = initSVM("rbf")
sigmoid = initSVM("sigmoid")

t = PrettyTable(["Metrics", "Linear ", "Poly", "Gaussian Kernel", "Sigmoid Kernel", "Logistic", "kNN", "Decision Tree"])
t.add_row(["Accuracy", round(linear[0], 4), round(poly[0], 4), round(rbf[0], 4),round(sigmoid[0], 4), round(accuracyLogistic, 4) , round(accuracy_kNN, 4) , round(accuracy_tree, 4) ])
t.add_row(["Precision",round(linear[1], 4) ,round(poly[1], 4) ,round(rbf[1], 4) ,round(sigmoid[1], 4), round(precisionLogistic, 4) , round(precision_kNN, 4) , round(precision_tree, 4) ])
t.add_row(["Recall",round(linear[2], 4) ,round(poly[2], 4) ,round(rbf[2], 4) ,round(sigmoid[2], 4), round(recallLogistic, 4) , round(recall_kNN, 4) , round(recall_tree, 4) ])
t.add_row(["F1",round(linear[3], 4) ,round(poly[3], 4) ,round(rbf[3], 4) ,round(sigmoid[3], 4) , round(f1Logistic, 4) , round(f1_kNN, 4) , round(f1_tree, 4)])
print(t)

+-----------+---------+--------+-----------------+----------------+----------+--------+---------------+
|  Metrics  | Linear  |  Poly  | Gaussian Kernel | Sigmoid Kernel | Logistic |  kNN   | Decision Tree |
+-----------+---------+--------+-----------------+----------------+----------+--------+---------------+
|  Accuracy |  0.9796 | 0.9889 |      0.987      |     0.9074     |  0.9648  | 0.9815 |     0.8426    |
| Precision |  0.9798 | 0.989  |      0.9871     |     0.9086     |  0.9653  | 0.9816 |     0.8444    |
|   Recall  |  0.9796 | 0.9889 |      0.987      |     0.9074     |  0.9648  | 0.9815 |     0.8426    |
|     F1    |  0.9796 | 0.9889 |      0.987      |     0.9072     |  0.9649  | 0.9813 |     0.8427    |
+-----------+---------+--------+-----------------+----------------+----------+--------+---------------+


#Task 4. 
Compare the performance of selected classification algorithms (Decision Tree, kNN, Logistic Regression) and SVM (using different kernels) with **credit card dataset** based on accuracy, precision, recall, f1 measures.

*   Give some comments on the obtained results
*   Identify issues with dataset, and propose the solutions to these issues



In [91]:
drive.mount('/content/gdrive')
%cd '/content/gdrive/MyDrive/Data/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/Data


In [92]:
data = pd.read_csv("creditcard.csv")
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.05, train_size=0.2, random_state=42)


In [83]:
logistic_task4 = LogisticRegression()
logistic_task4.fit(X_train,y_train)

In [84]:
y_pred = logistic_task4.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

accuracyLogistic = metrics.accuracy_score(y_test, y_pred)
precisionLogistic =metrics.precision_score(y_test, y_pred,average='weighted')
recallLogistic = metrics.recall_score(y_test, y_pred,average='weighted')
f1Logistic = metrics.f1_score(y_test, y_pred,average='weighted')


              precision    recall  f1-score   support

           0       1.00      1.00      1.00     14220
           1       0.53      0.48      0.50        21

    accuracy                           1.00     14241
   macro avg       0.76      0.74      0.75     14241
weighted avg       1.00      1.00      1.00     14241



In [85]:
kNN = KNeighborsClassifier(n_neighbors=10)
kNN.fit(X_train,y_train)

In [86]:
y_pred = kNN.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_kNN = metrics.accuracy_score(y_test, y_pred)
precision_kNN =metrics.precision_score(y_test, y_pred,average='weighted')
recall_kNN = metrics.recall_score(y_test, y_pred,average='weighted')
f1_kNN = metrics.f1_score(y_test, y_pred,average='weighted')

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     14220
           1       0.00      0.00      0.00        21

    accuracy                           1.00     14241
   macro avg       0.50      0.50      0.50     14241
weighted avg       1.00      1.00      1.00     14241



In [98]:
tree = treeModel.DecisionTreeClassifier()
tree.fit(X_train,y_train)

In [99]:
y_pred = tree.predict(X_test)
print(metrics.classification_report(y_test, y_pred))
accuracy_tree = metrics.accuracy_score(y_test, y_pred)
precision_tree =metrics.precision_score(y_test, y_pred,average='weighted')
recall_tree = metrics.recall_score(y_test, y_pred,average='weighted')
f1_tree = metrics.f1_score(y_test, y_pred,average='weighted')
print(accuracy_tree, precision_tree, recall_tree, f1_tree)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     14220
           1       0.61      0.52      0.56        21

    accuracy                           1.00     14241
   macro avg       0.81      0.76      0.78     14241
weighted avg       1.00      1.00      1.00     14241

0.998806263605084 0.9987244886309236 0.998806263605084 0.9987604136209577


In [100]:
def initSVM(tpye,degree= 3 ):
  SVM = svm.SVC(kernel=tpye, degree = degree)
  SVM.fit(X_train,y_train)
  y_pred = SVM.predict(X_test)
  accuracy = metrics.accuracy_score(y_test, y_pred)
  precision =metrics.precision_score(y_test, y_pred,average='weighted')
  recall = metrics.recall_score(y_test, y_pred,average='weighted')
  f1 = metrics.f1_score(y_test, y_pred,average='weighted')
  return [accuracy, precision, recall, f1]

In [101]:
linear = initSVM("linear")
poly = initSVM("poly")
rbf = initSVM("rbf")
sigmoid = initSVM("sigmoid")

t = PrettyTable(["Metrics", "Linear ", "Poly", "Gaussian Kernel", "Sigmoid Kernel", "Logistic", "kNN", "Decision Tree"])
t.add_row(["Accuracy", round(linear[0], 4), round(poly[0], 4), round(rbf[0], 4),round(sigmoid[0], 4), round(accuracyLogistic, 4) , round(accuracy_kNN, 4) , round(accuracy_tree, 4) ])
t.add_row(["Precision",round(linear[1], 4) ,round(poly[1], 4) ,round(rbf[1], 4) ,round(sigmoid[1], 4), round(precisionLogistic, 4) , round(precision_kNN, 4) , round(precision_tree, 4) ])
t.add_row(["Recall",round(linear[2], 4) ,round(poly[2], 4) ,round(rbf[2], 4) ,round(sigmoid[2], 4), round(recallLogistic, 4) , round(recall_kNN, 4) , round(recall_tree, 4) ])
t.add_row(["F1",round(linear[3], 4) ,round(poly[3], 4) ,round(rbf[3], 4) ,round(sigmoid[3], 4) , round(f1Logistic, 4) , round(f1_kNN, 4) , round(f1_tree, 4)])
print(t)

+-----------+---------+--------+-----------------+----------------+----------+--------+---------------+
|  Metrics  | Linear  |  Poly  | Gaussian Kernel | Sigmoid Kernel | Logistic |  kNN   | Decision Tree |
+-----------+---------+--------+-----------------+----------------+----------+--------+---------------+
|  Accuracy |  0.9987 | 0.9985 |      0.9985     |     0.9985     |  0.9986  | 0.9985 |     0.9988    |
| Precision |  0.9983 | 0.9971 |      0.9971     |     0.9971     |  0.9985  | 0.9971 |     0.9987    |
|   Recall  |  0.9987 | 0.9985 |      0.9985     |     0.9985     |  0.9986  | 0.9985 |     0.9988    |
|     F1    |  0.9984 | 0.9978 |      0.9978     |     0.9978     |  0.9986  | 0.9978 |     0.9988    |
+-----------+---------+--------+-----------------+----------------+----------+--------+---------------+


#Finally,
Save a copy in your Github. Remember renaming the notebook.