# Support Vector Machine

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv("mobile.csv")

In [3]:
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,0
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,1
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,1
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,1
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,0


In [5]:
# predict the phone price is more than 50 thousand (1) or less than 50 thousand(0) bases in information
# wether it should be premium phone (price is more than 50 thousand) or normal phone(price is less than 50 thousand)

In [6]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=1)

In [8]:
def create_model(model):
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    print(classification_report(y_test,y_pred))
    return model

## Baseline Model

In [9]:
# 1) Logistic Regression
from sklearn.linear_model import LogisticRegression

In [10]:
log =  LogisticRegression()

In [11]:
create_model(log)

              precision    recall  f1-score   support

           0       0.94      0.95      0.95       284
           1       0.96      0.95      0.95       316

    accuracy                           0.95       600
   macro avg       0.95      0.95      0.95       600
weighted avg       0.95      0.95      0.95       600



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

## SVM

In [12]:
# Linear Kernal

In [13]:
from sklearn.svm import LinearSVC

In [15]:
svc = LinearSVC(random_state = 1)

In [18]:
create_model(svc)   #with hard margin

              precision    recall  f1-score   support

           0       0.54      1.00      0.70       284
           1       0.99      0.24      0.39       316

    accuracy                           0.60       600
   macro avg       0.76      0.62      0.54       600
weighted avg       0.78      0.60      0.54       600



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=1, tol=0.0001,
          verbose=0)

In [19]:
# soft margin

In [20]:
svc = LinearSVC(random_state = 1,C=0.9)

In [21]:
create_model(svc) 

              precision    recall  f1-score   support

           0       0.54      1.00      0.70       284
           1       0.99      0.24      0.39       316

    accuracy                           0.60       600
   macro avg       0.76      0.62      0.54       600
weighted avg       0.78      0.60      0.54       600



LinearSVC(C=0.9, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=1, tol=0.0001,
          verbose=0)

In [23]:
# even after adding some errors we cant see any difference
# it indicates that both calsses may not be linearly separable
# there might be requirement of polynimial kernal or radial kernal

## Polynomial Kernel

In [24]:
from sklearn.svm import SVC

In [26]:
poly_svc = SVC(random_state = 1,kernel = "poly")

In [27]:
create_model(poly_svc)

              precision    recall  f1-score   support

           0       0.99      0.97      0.98       284
           1       0.97      0.99      0.98       316

    accuracy                           0.98       600
   macro avg       0.98      0.98      0.98       600
weighted avg       0.98      0.98      0.98       600



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=1,
    shrinking=True, tol=0.001, verbose=False)

## Radial Kernel

In [30]:
radial_svc = SVC(random_state = 1,kernel = "rbf")

In [31]:
create_model(radial_svc)

              precision    recall  f1-score   support

           0       0.47      1.00      0.64       284
           1       0.00      0.00      0.00       316

    accuracy                           0.47       600
   macro avg       0.24      0.50      0.32       600
weighted avg       0.22      0.47      0.30       600



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=1,
    shrinking=True, tol=0.001, verbose=False)