<a href="https://colab.research.google.com/github/tarupathak30/machine_learning_algorithms/blob/main/Support_Vector_Machine/SupportVectorRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Support Vector Regression Implementation

In [117]:
import seaborn as sns
df = sns.load_dataset('tips')

In [118]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [119]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [120]:
df['sex'].value_counts()

Unnamed: 0_level_0,count
sex,Unnamed: 1_level_1
Male,157
Female,87


In [121]:
df['smoker'].value_counts()

Unnamed: 0_level_0,count
smoker,Unnamed: 1_level_1
No,151
Yes,93


In [122]:
df['day'].value_counts()

Unnamed: 0_level_0,count
day,Unnamed: 1_level_1
Sat,87
Sun,76
Thur,62
Fri,19


Feature Encoding(Label Encoding and One Hot Encoding)

In [123]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [124]:
# splitting dependent and independent features

In [125]:
x = df.drop(columns=['total_bill'], axis=1)
y = df['total_bill']

In [126]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [127]:
le1 = LabelEncoder()
le2 = LabelEncoder()
le3 = LabelEncoder()

In [128]:
x_train['sex']=le1.fit_transform(x_train['sex'])
x_train['smoker']=le2.fit_transform(x_train['smoker'])
x_train['time']=le3.fit_transform(x_train['time'])

In [129]:
x_test['sex']=le1.transform(x_test['sex'])
x_test['smoker']=le2.transform(x_test['smoker'])
x_test['time']=le3.transform(x_test['time'])

In [130]:
ohe = OneHotEncoder(sparse_output=False)

In [131]:
# fit and transform
day_encoded_train = ohe.fit_transform(x_train[['day']])
day_encoded_test = ohe.transform(x_test[['day']])

In [132]:
# create column names
ohe_columns = ohe.get_feature_names_out(['day'])

In [133]:
import pandas as pd

In [134]:
# convert to DataFrame
day_encoded_train_df = pd.DataFrame(day_encoded_train, columns=ohe_columns, index=x_train.index)
day_encoded_test_df = pd.DataFrame(day_encoded_test, columns=ohe_columns, index=x_test.index)

In [135]:
x_train = pd.concat([x_train.drop(columns=['day']), day_encoded_train_df], axis=1)
x_test = pd.concat([x_test.drop(columns=['day']), day_encoded_test_df], axis=1)

In [136]:
x_train.head()

Unnamed: 0,tip,sex,smoker,time,size,day_Fri,day_Sat,day_Sun,day_Thur
228,2.72,1,0,0,2,0.0,1.0,0.0,0.0
208,2.03,1,1,0,2,0.0,1.0,0.0,0.0
96,4.0,1,1,0,2,1.0,0.0,0.0,0.0
167,4.5,1,0,0,4,0.0,0.0,1.0,0.0
84,2.03,1,0,1,2,0.0,0.0,0.0,1.0


In [137]:
from sklearn.svm import SVR
svr = SVR()

In [138]:
svr.fit(x_train, y_train)

In [139]:
y_pred = svr.predict(x_test)

In [140]:
from sklearn.metrics import r2_score, mean_absolute_error
print("R2 Score : ", r2_score(y_test, y_pred))
print("Mean Absolute Error : ", mean_absolute_error(y_test, y_pred))

R2 Score :  0.5513989822828735
Mean Absolute Error :  4.400967658119678


Hyperparameter Tuning GridSearch CV

In [141]:
from sklearn.model_selection import GridSearchCV

In [142]:
param_grid = {
    'kernel' : ['linear', 'poly', 'sigmoid', 'rbf'],
    'degree' : [2, 3, 4],
    'gamma' : ['scale', 'auto'],
    'C' : [0.01, 0.1, 1, 10, 100, 1000],
    'epsilon' : [0.01, 0.1, 1, 10]
}

In [143]:
gridCv = GridSearchCV(svr, param_grid=param_grid, cv=5)

In [145]:
gridCv.fit(x_train, y_train)

In [146]:
gridCv.best_params_

{'C': 100, 'degree': 2, 'epsilon': 1, 'gamma': 'auto', 'kernel': 'poly'}

In [147]:
grid_predict = gridCv.predict(x_test)

In [148]:
from sklearn.metrics import r2_score, mean_absolute_error
print("R2 Score : ", r2_score(y_test, grid_predict))
print("Mean Absolute Error : ", mean_absolute_error(y_test, grid_predict))

R2 Score :  0.5639463100534114
Mean Absolute Error :  4.4184226288583694
