# Support Vector Machine (SVM)
First we import the required packages that we will use. 

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import plotly.graph_objs as go
import urllib.request

print('Beginning file download with url...')

url = 'https://raw.githubusercontent.com/mohammadrashedi/ML_workshop/master/data_svms_and_kernels.csv'
urllib.request.urlretrieve(url, 'data_svms_and_kernels.csv')
print('File download done!')

We load the data from a CSV file and use Pandas library to show it.

In [None]:
df = pd.read_csv('data_svms_and_kernels.csv')
df.head()

We transform the data into Numpy array and use `LabelEncoder` to change the labels into $\{-1,+1\}$

In [None]:
X=df.drop('Label',axis=1).to_numpy()
y_text=df['Label'].to_numpy()
y=(2*LabelEncoder().fit_transform(y_text))-1

Now we plot the data to see how the classes look like

In [None]:
points_colorscale = [
                     [0.0, 'rgb(239, 85, 59)'],
                     [1.0, 'rgb(99, 110, 250)'],
                    ]

points = go.Scatter(
                    x=df['Feature 1'],
                    y=df['Feature 2'],
                    mode='markers',
                    marker=dict(color=y,
                                colorscale=points_colorscale)
                   )
layout = go.Layout(
                   xaxis=dict(range=[-1.05, 1.05]),
                   yaxis=dict(range=[-1.05, 1.05])
                  )

fig = go.Figure(data=[points], layout=layout)
fig.show()

Let's prepare the data by splitting it to training, validation and test sets. 60% of the data will be for training, 20% for validation and 20% for testing.

In [None]:
(X_train,X_vt,y_train,y_vt)=train_test_split(X,y,test_size=0.4,random_state=0)
(X_validation,X_test,y_validation,y_test)=train_test_split(X_vt,y_vt,test_size=0.5,random_state=0)

## Building SVM with no kernel
At the first step, we build SVM classifiers with no kernel. To do so, we should set the `kernel` argument of `SVC` to `'linear'`

In [None]:
svm=SVC(kernel='linear')
svm.fit(X_train,y_train)

And let's visualize the decision boundary with support vectors.

In [None]:
decision_colorscale = [
                       [0.0, 'rgb(239,  85,  59)'],
                       [0.5, 'rgb(  0,   0,   0)'],
                       [1.0, 'rgb( 99, 110, 250)']
                      ]

detail_steps = 100

(x_vis_0_min, x_vis_1_min) = (-1.05, -1.05) #X_train.min(axis=0)
(x_vis_0_max, x_vis_1_max) = ( 1.05,  1.05) #X_train.max(axis=0)

x_vis_0_range = np.linspace(x_vis_0_min, x_vis_0_max, detail_steps)
x_vis_1_range = np.linspace(x_vis_1_min, x_vis_1_max, detail_steps)

(XX_vis_0, XX_vis_1) = np.meshgrid(x_vis_0_range, x_vis_0_range)

X_vis = np.c_[XX_vis_0.reshape(-1), XX_vis_1.reshape(-1)]

YY_vis = svm.decision_function(X_vis).reshape(XX_vis_0.shape)

points = go.Scatter(
                    x=df['Feature 1'],
                    y=df['Feature 2'],
                    mode='markers',
                    marker=dict(
                                color=y,
                                colorscale=points_colorscale),
                    showlegend=False
                   )
SVs = svm.support_vectors_
support_vectors = go.Scatter(
                             x=SVs[:, 0],
                             y=SVs[:, 1],
                             mode='markers',
                             marker=dict(
                                         size=15,
                                         color='black',
                                         opacity = 0.1,
                                         colorscale=points_colorscale),
                             line=dict(dash='solid'),
                             showlegend=False
                            )

decision_surface = go.Contour(x=x_vis_0_range,
                              y=x_vis_1_range,
                              z=YY_vis,
                              contours_coloring='lines',
                              line_width=2,
                              contours=dict(
                                            start=0,
                                            end=0,
                                            size=1),
                              colorscale=decision_colorscale,
                              showscale=False
                             )

margins = go.Contour(x=x_vis_0_range,
                     y=x_vis_1_range,
                     z=YY_vis,
                     contours_coloring='lines',
                     line_width=2,
                     contours=dict(
                                   start=-1,
                                   end=1,
                                   size=2),
                     line=dict(dash='dash'),
                     colorscale=decision_colorscale,
                     showscale=False
                    )

fig2 = go.Figure(data=[margins, decision_surface, support_vectors, points], layout=layout)
fig2.show()

## SVM with kernels
Now, we use different kernels and see how they affect the results. Let's use a polynomial kernel. Define `svm_p3` to be an instance of class `SVC` but this time with arguments `kernel='poly'` and `degree=3` to define a degree-3 polynomial kernel:

In [None]:
svm_p3=SVC(kernel='poly',degree=3)
svm_p3.fit(X_train,y_train)

Let's visualize the results.

In [None]:
YY_vis_p3 = svm_p3.decision_function(X_vis).reshape(XX_vis_0.shape)

SVs_p3 = svm_p3.support_vectors_
support_vectors_p3 = go.Scatter(
                                x=SVs_p3[:, 0],
                                y=SVs_p3[:, 1],
                                mode='markers',
                                marker=dict(
                                            size=15,
                                            color='black',
                                            opacity = 0.1,
                                            colorscale=points_colorscale),
                                line=dict(dash='solid'),
                                showlegend=False
                               )

decision_surface_p3 = go.Contour(x=x_vis_0_range,
                                 y=x_vis_1_range,
                                 z=YY_vis_p3,
                                 contours_coloring='lines',
                                 line_width=2,
                                 contours=dict(
                                               start=0,
                                               end=0,
                                               size=1),
                                 colorscale=decision_colorscale,
                                 showscale=False
                                )

margins_p3 = go.Contour(x=x_vis_0_range,
                        y=x_vis_1_range,
                        z=YY_vis_p3,
                        contours_coloring='lines',
                        line_width=2,
                        contours=dict(
                                      start=-1,
                                      end=1,
                                      size=2),
                        line=dict(dash='dash'),
                        colorscale=decision_colorscale,
                        showscale=False
                       )

fig4 = go.Figure(data=[margins_p3, decision_surface_p3, support_vectors_p3, points], layout=layout)
fig4.show()

And finally, we try RBF (Radial Basis Function) kernel which is the default kernel.

### Exercise
As an exercise, try to fit an SVM classifier by using `rbf` kernel.

In [None]:
svm_r= #Enter your code here.
svm_r.fit(X_train,y_train)

In [None]:
YY_vis_r = svm_r.decision_function(X_vis).reshape(XX_vis_0.shape)

SVs_r = svm_r.support_vectors_
support_vectors_r = go.Scatter(
                                x=SVs_r[:, 0],
                                y=SVs_r[:, 1],
                                mode='markers',
                                marker=dict(
                                            size=15,
                                            color='black',
                                            opacity = 0.1,
                                            colorscale=points_colorscale),
                                line=dict(dash='solid'),
                                showlegend=False
                               )

decision_surface_r = go.Contour(x=x_vis_0_range,
                                 y=x_vis_1_range,
                                 z=YY_vis_r,
                                 contours_coloring='lines',
                                 line_width=2,
                                 contours=dict(
                                               start=0,
                                               end=0,
                                               size=1),
                                 colorscale=decision_colorscale,
                                 showscale=False
                                )

margins_r = go.Contour(x=x_vis_0_range,
                        y=x_vis_1_range,
                        z=YY_vis_r,
                        contours_coloring='lines',
                        line_width=2,
                        contours=dict(
                                      start=-1,
                                      end=1,
                                      size=2),
                        line=dict(dash='dash'),
                        colorscale=decision_colorscale,
                        showscale=False
                       )

fig5 = go.Figure(data=[margins_r, decision_surface_r, support_vectors_r, points], layout=layout)
fig5.show()

## Model selection
In order to pick the best model, we compare the accuracy of the developed models. We start with the linear SVM.

In [None]:
print('The accuracy of the linear SVM for the training data is: {}'.format(svm.score(X_train,y_train)))
print('The accuracy of the linear SVM for the validation data is: {}'.format(svm.score(X_validation,y_validation)))

Next, we show the accuracy of the SVM with 3rd order polynomial kernel.

In [None]:
print('The accuracy of the SVM with 3rd order kernel for the training data is: {}'.format(svm_p3.score(X_train,y_train)))
print('The accuracy of the SVM with 3rd order kernel for the validation data is: {}'.format(svm_p3.score(X_validation,y_validation)))

And finally, we show the accuracy of the SVM with RBF kernel.

In [None]:
print('The accuracy of the SVM with RBF kernel for the training data is: {}'.format(svm_r.score(X_train,y_train)))
print('The accuracy of the SVM with RBF kernel for the validation data is: {}'.format(svm_r.score(X_validation,y_validation)))

## Final assessment
In the last step, we evaluate the best model with the test data.

In [None]:
print('The accuracy of the SVM with RBF kernel for the test data is: {}'.format(svm_r.score(X_test,y_test)))