# Support Vector Machine

* SVR - Support Vector Regressor
* SVC - Support Vector Classifier

**Compare two model**

In [26]:
# Load Libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score,roc_auc_score,confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

### Let's compare two models, and check which one is the best

In [3]:
# Load dataset
df = sns.load_dataset("titanic")
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


## Label Encoding

In [6]:
le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,1,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,0,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,0,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,0,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,1,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    int32   
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int32(1), int64(4), object(4)
memory usage: 77.2+ KB


In [8]:
# impute null values in age
df['age'] = df['age'].fillna(df['age'].mean())

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    int32   
 3   age          891 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int32(1), int64(4), object(4)
memory usage: 77.2+ KB


## Spliting Data into Training and Testing

In [10]:
# spliting data into train and test
X = df[['age','sex']]
y = df['survived']

In [12]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2 , random_state=42)

## Logistic Regression

In [19]:
lr = LogisticRegression().fit(X_train,y_train)
y_predict = lr.predict(X_test)

## Evaluation Metrix

In [33]:
accuracy = accuracy_score(y_test,y_predict)
precision = precision_score(y_test,y_predict)
recall = recall_score(y_test,y_predict)
f1 = f1_score(y_test,y_predict)
roc_auc = roc_auc_score(y_test,y_predict)
cm = confusion_matrix(y_test,y_predict)

print("Evaluation Matrix:")
print("__________________")
print("Accuracy: ", accuracy)
print(f"Precision: {precision:.2f}")
print("Recall:", recall)
print(f"F1 Score: {f1:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")
print("Confusion Metrix:\n", cm)

Evaluation Matrix:
__________________
Accuracy:  0.6089385474860335
Precision: 0.83
Recall: 0.06756756756756757
F1 Score: 0.12
ROC AUC: 0.53
Confusion Metrix:
 [[104   1]
 [ 69   5]]


## SVM

In [21]:
svc = SVC().fit(X_train,y_train)
y_predict = svc.predict(X_test)

In [32]:
accuracy = accuracy_score(y_test,y_predict)
precision = precision_score(y_test,y_predict)
recall = recall_score(y_test,y_predict)
f1 = f1_score(y_test,y_predict)
roc_auc = roc_auc_score(y_test,y_predict)
cm = confusion_matrix(y_test,y_predict)

print("Evaluation Matrix:")
print("__________________")
print("Accuracy: ", accuracy)
print(f"Precision: {precision:.2f}")
print("Recall:", recall)
print(f"F1 Score: {f1:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")
print("Confusion Metrix:\n", cm)

Evaluation Matrix:
__________________
Accuracy:  0.6089385474860335
Precision: 0.83
Recall: 0.06756756756756757
F1 Score: 0.12
ROC AUC: 0.53
Confusion Metrix:
 [[104   1]
 [ 69   5]]


**Let's compare**

  **Evaluation Matrix: (Logistic Regression)**
  __________________
- Accuracy:  0.7821229050279329
- Precision: 0.75
- Recall: 0.7027027027027027
- F1 Score: 0.73
- ROC AUC: 0.77
- Confusion Metrix:
    * [[104   1]
    * [ 69   5]]


**Evaluation Matrix: (SVC)**
  __________________
- Accuracy:  0.6089385474860335
- Precision: 0.83
- Recall: 0.06756756756756757
- F1 Score: 0.12
- ROC AUC: 0.53
- Confusion Metrix:
    * [[104   1]
    * [ 69   5]]
  

**Comparision shows Logistic Regression is best, because of High Accuracy.**