## SVR (Support Vector Regression)

In [1]:
import seaborn as sns
import pandas as pd 

In [136]:
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [137]:
df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [138]:
# dependent and independent features
x = df[['tip', 'sex', 'smoker', 'day', 'time', 'size']]
y = df['total_bill']

In [139]:
# spliting the train and test data 
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

In [140]:
x_train.head()

Unnamed: 0,tip,sex,smoker,day,time,size
115,3.5,Female,No,Sun,Dinner,2
181,5.65,Male,Yes,Sun,Dinner,2
225,2.5,Female,Yes,Fri,Lunch,2
68,2.01,Male,No,Sat,Dinner,2
104,4.08,Female,No,Sat,Dinner,2


In [141]:
# encode the train data
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
sex_ecoder = LabelEncoder()
smoker_ecoder = LabelEncoder()
time_encoder = LabelEncoder()
day_encoder = OneHotEncoder()

In [142]:
x_train['sex'] = sex_ecoder.fit_transform(x_train['sex'])
x_train['smoker'] = smoker_ecoder.fit_transform(x_train['smoker'])
x_train['time'] = time_encoder.fit_transform(x_train['time'])


In [143]:
x_train.head()

Unnamed: 0,tip,sex,smoker,day,time,size
115,3.5,0,0,Sun,0,2
181,5.65,1,1,Sun,0,2
225,2.5,0,1,Fri,1,2
68,2.01,1,0,Sat,0,2
104,4.08,0,0,Sat,0,2


In [144]:
# for test data
x_test['sex'] = sex_ecoder.transform(x_test['sex'])
x_test['smoker'] = smoker_ecoder.transform(x_test['smoker'])
x_test['time'] = time_encoder.transform(x_test['time'])


In [145]:
x_test.head()

Unnamed: 0,tip,sex,smoker,day,time,size
24,3.18,1,0,Sat,0,2
6,2.0,1,0,Sun,0,2
153,2.0,1,0,Sun,0,4
211,5.16,1,1,Sat,0,4
198,2.0,0,1,Thur,1,2


In [146]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('onehot', OneHotEncoder(drop='first'), [3])],remainder='passthrough')


In [147]:
x_train = ct.fit_transform(x_train)

In [148]:
x_test = ct.transform(x_test)

In [150]:
pd.DataFrame(x_train)

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.0,1.0,0.0,3.50,0.0,0.0,0.0,2.0
1,0.0,1.0,0.0,5.65,1.0,1.0,0.0,2.0
2,0.0,0.0,0.0,2.50,0.0,1.0,1.0,2.0
3,1.0,0.0,0.0,2.01,1.0,0.0,0.0,2.0
4,1.0,0.0,0.0,4.08,0.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...,...
178,1.0,0.0,0.0,4.06,1.0,1.0,0.0,2.0
179,0.0,1.0,0.0,3.02,0.0,0.0,0.0,2.0
180,0.0,0.0,0.0,1.00,0.0,1.0,0.0,2.0
181,0.0,1.0,0.0,3.55,1.0,1.0,0.0,2.0


In [151]:
# let's train the model 
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_absolute_error
svr = SVR(kernel='linear')
svr.fit(x_train,y_train)
y_pred = svr.predict(x_test)
score = r2_score(y_test,y_pred)
MAE = mean_absolute_error(y_test,y_pred)
print(score)
print(MAE)

0.6036949191843275
4.114837460698695


In [None]:
# Sigmoid kernal
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_absolute_error
svr = SVR(kernel='sigmoid')
svr.fit(x_train,y_train)
y_pred = svr.predict(x_test)
score = r2_score(y_test,y_pred)
MAE = mean_absolute_error(y_test,y_pred)
print(score)
print(MAE)

-0.6630925094910909
8.235972415308003


In [153]:
# Poly Kernal 
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_absolute_error
svr = SVR(kernel='poly')
svr.fit(x_train,y_train)
y_pred = svr.predict(x_test)
score = r2_score(y_test,y_pred)
MAE = mean_absolute_error(y_test,y_pred)
print(score)
print(MAE)

0.5522389527484615
4.6048969245311016


In [154]:
# rbf Kernal 
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_absolute_error
svr = SVR(kernel='rbf')
svr.fit(x_train,y_train)
y_pred = svr.predict(x_test)
score = r2_score(y_test,y_pred)
MAE = mean_absolute_error(y_test,y_pred)
print(score)
print(MAE)

0.49798620106004743
4.463296539661221
