# Evaluation Techniques for Regression Models

### Load the Dataset

In [3]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [8]:
tips_df = sns.load_dataset('tips')
tips_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


### Prepare the Data

#### Define the feature and target variable

In [9]:
X = tips_df[['total_bill']]
y = tips_df['tip']

In [10]:
X

Unnamed: 0,total_bill
0,16.99
1,10.34
2,21.01
3,23.68
4,24.59
...,...
239,29.03
240,27.18
241,22.67
242,17.82


In [11]:
y

0      1.01
1      1.66
2      3.50
3      3.31
4      3.61
       ... 
239    5.92
240    2.00
241    2.00
242    1.75
243    3.00
Name: tip, Length: 244, dtype: float64

#### Split the data into training and testing sets

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
X_train

Unnamed: 0,total_bill
228,13.28
208,24.27
96,27.28
167,31.71
84,15.98
...,...
106,20.49
14,14.83
92,5.75
179,34.63


In [16]:
X_test

Unnamed: 0,total_bill
24,19.82
6,8.77
153,24.55
211,25.89
198,13.0
176,17.89
192,28.44
124,12.48
9,14.78
101,15.38


### Train a Linear Regression Model

#### Create and train the model

In [15]:
model = LinearRegression()
model.fit(X_train, y_train)

#### Make predictions on the test set

In [17]:
y_pred = model.predict(X_test)

In [18]:
y_pred

array([3.04525623, 1.86330727, 3.55119456, 3.69452593, 2.31576375,
       2.83881627, 3.96728338, 2.26014262, 2.50615915, 2.57033737,
       2.88160176, 2.07723468, 2.06439904, 2.47407003, 2.00236009,
       2.91903905, 2.92652651, 3.23351235, 2.68478854, 5.33107064,
       3.13831465, 3.13403611, 2.4558862 , 1.94673896, 3.16077703,
       2.17564129, 2.02375283, 3.62927807, 2.68906708, 6.07767732,
       4.99734388, 1.75313465, 2.83025918, 3.09552917, 2.74040966,
       3.50092162, 2.21200895, 5.53644096, 2.33287794, 3.35010279,
       2.04942412, 2.47834858, 3.48701634, 2.03017065, 2.03124029,
       1.25361414, 2.05798121, 2.92438724, 1.73388118])

### Calculate Evaluation Metrics

#### Mean Absolute Error (MAE):

In [19]:
from sklearn.metrics import mean_absolute_error

In [20]:
mae = mean_absolute_error(y_test, y_pred)
mae

0.6208580000398983

#### Mean Squared Error (MSE):

In [21]:
from sklearn.metrics import mean_squared_error

In [22]:
mse = mean_squared_error(y_test, y_pred)
mse

0.5688142529229538

#### Root Mean Squared Error (RMSE):

In [23]:
rmse = mean_squared_error(y_test, y_pred, squared=False)
rmse

0.7541977545199626

#### R-squared (R²):

In [24]:
r_squared = model.score(X_test, y_test)
r_squared

0.5449381659234664

#### Adjusted R-squared:

In [26]:
n = len(y_test) 
p = X_train.shape[1] 

adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)
adjusted_r_squared

0.535255999240987

#### Mean Absolute Percentage Error (MAPE):

In [28]:
import numpy as np
mape = (np.abs((y_test - y_pred) / y_test).mean()) * 100
mape

26.395165529907565

#### Median Absolute Error:

In [29]:
from sklearn.metrics import median_absolute_error

In [30]:
median_ae = median_absolute_error(y_test, y_pred)
median_ae

0.4967389590002127