# Import modules and dataset.

In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, ShuffleSplit
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, Ridge
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

import warnings
warnings.filterwarnings(action = 'ignore')

In [2]:
df = pd.read_csv('../Data/Cleaned.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,0,16.99,1.01,Female,No,Sun,Dinner,2.0
1,1,10.34,1.66,Male,No,Sun,Dinner,3.0
2,2,21.01,3.5,Male,No,Sun,Dinner,3.0
3,3,23.68,3.31,Male,No,Sun,Dinner,2.0
4,4,24.59,3.61,Female,No,Sun,Dinner,4.0


# Split dataset.

In [16]:
x = df.drop(columns = ['tip', 'Unnamed: 0'], axis = 1)
y = df[['tip']]

In [17]:
x = pd.get_dummies(x)

In [18]:
print(x.shape)
x.head()

(243, 12)


Unnamed: 0,total_bill,size,sex_Female,sex_Male,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur,time_Dinner,time_Lunch
0,16.99,2.0,1,0,1,0,0,0,1,0,1,0
1,10.34,3.0,0,1,1,0,0,0,1,0,1,0
2,21.01,3.0,0,1,1,0,0,0,1,0,1,0
3,23.68,2.0,0,1,1,0,0,0,1,0,1,0
4,24.59,4.0,1,0,1,0,0,0,1,0,1,0


In [19]:
print(y.shape)
y.head()

(243, 1)


Unnamed: 0,tip
0,1.01
1,1.66
2,3.5
3,3.31
4,3.61


In [20]:
# separate dataset into train and test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 5)
x_train.shape, x_test.shape

((182, 12), (61, 12))

# Scaling the dataset

In [31]:
sc = StandardScaler()
sc.fit(x_train)

In [33]:
x_train = pd.DataFrame(data = sc.transform(x_train), columns = x_train.columns)
x_test = pd.DataFrame(data = sc.transform(x_test), columns = x_test.columns)

# Model building

In [35]:
def eveluating_model(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    return r2, mae, mse

In [43]:
models = {
    'LinearRegression' : LinearRegression(),
    'Lasso' : Lasso(),
    'Ridge' : Ridge(),
    'ElasticNet' : ElasticNet()
}

model_list = []
r2_list = []

for i in range(len(list(models.keys()))):
    model = list(models.values())[i]
    model.fit(x_train, y_train)
    
    y_pred = model.predict(x_test)
    
    r2, mae, mse = eveluating_model(y_test, y_pred)
    
    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    
    print("Model testing performance ")
    print("MAE = ", mae)
    print("R2 score = ", r2 * 100)
    
    r2_list.append(r2)
    
    print("=" * 50)
    print("\n")

LinearRegression
Model testing performance 
MAE =  0.7918108154501078
R2 score =  21.551995104055266


Lasso
Model testing performance 
MAE =  0.924087101423167
R2 score =  -0.01114979814953454


Ridge
Model testing performance 
MAE =  0.791614161875823
R2 score =  21.586555088128247


ElasticNet
Model testing performance 
MAE =  0.8496231319651438
R2 score =  18.67251535956114


