# Support Vector Machine

#### Implementation of SVM from scratch

##### References
* https://machinelearningmastery.com/machine-learning-in-python-step-by-step/
* http://machinelearningmastery.com/support-vector-machines-for-machine-learning/
*  http://scikit-learn.org/stable/modules/svm.html
* https://towardsdatascience.com/support-vector-machine-introduction-to-machine-learning-algorithms-934a444fca47

In [99]:
#!pip install seaborn
#!pip install matplotlib
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import random
from sklearn.svm import SVC


def load_data(path):
  """
  Function to read csv data from path/url
  """
  names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
  dataset = pd.read_csv(path, names= names)
  
  # Select only two classes for binary classification
  dataset1 = dataset[dataset["class"] == "Iris-setosa"]
  dataset2 = dataset[dataset["class"] == "Iris-versicolor"]
  dataset = pd.concat([dataset1,dataset2])  
  dataset["class"]= LabelEncoder().fit_transform(dataset["class"])
  return dataset

def preprocess(dataset):
  """
  Function to scale data between 0 & 1
  """  
  dataset.iloc[:,:-1] = MinMaxScaler().fit_transform(dataset.iloc[:,:-1])
  return dataset

def analyse(dataset):
  """
  Function to data distributions and plot the graphs
  """
  print(dataset.describe())
  plot_var = dataset
  sns.pairplot(plot_var,hue="class",markers="*")
  plt.show()
  
def predict(weights, feature):
    """
    """
    y_predict = []
    for i in range(len(feature)):
      y_predict.append(np.dot(weights.T,feature.iloc[i,:]))
    return y_predict
  
def train(dataset):
  """
  """
  weights = np.array([random.randint(1,2)]*(len(dataset.columns)-1),dtype=float)
  feature = dataset.iloc[:,:-1].copy()
  print(weights)
  y = dataset.iloc[:,-1]
  epochs = 1
  lr = 0.0001
  
  while epochs< 100:
    
    alpha = 1/epochs
    predict_y = predict(weights, feature)
    #print("predicted", predict_y)
    product = np.dot(y,predict_y)
    #print("product", product)
    for i in range(len(weights)):
      if(product>=1):
        cost = 0
        wts=   2 * alpha * weights[i]
        weights[i]-=wts*lr

      else:
        cost = 1-product
        wts = ( 2 * alpha * weights[i])- product 
        weights[i]-=wts*lr
      epochs+=1
  print("weights", weights)
  model = SVC(kernel="linear")
  model.fit(feature,y)
  print(model.coef_)
     

if __name__=="__main__":
  dataset = load_data("https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv")
  dataset = preprocess(dataset)
  #analyse(dataset)
  train_input,train_output,test_input,test_output = train_test_split(dataset.iloc[:,:-1],dataset.iloc[:,-1], test_size =0.1)
  train(dataset)
  
  

[1. 1. 1. 1.]
weights [0.99962823 0.99962823 0.99962823 0.99962823]
[[ 0.36369934 -1.12477515  1.82897479  1.51865788]]
