---

# `Evaluation Metrics for Regression`

---

In [107]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import numpy as np

In [108]:
data = pd.read_excel("Folds5x2_pp.xlsx")

In [109]:
X = data[["AT","AP"]]
y = data["PE"]

In [110]:
print(len(X))
print(len(y))

9568
9568


In [111]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X,y)

LinearRegression()

In [112]:
theta1,theta2 = model.coef_   # theta1 and theta2
print(f'theta1 = {theta1} and theta2 = {theta2}')

theta1 = -2.1130659428774066 and theta2 = 0.14402918985400698


In [113]:
theta0 = model.intercept_   # theta0
print(f'theta0 = {theta0}')

theta0 = 349.95047262174114


In [114]:
y_pred = model.predict(X) # prediction for all values

In [115]:
y_pred

array([465.83497857, 443.659007  , 484.93329046, ..., 429.65929428,
       444.24805277, 450.81906105])

### SST

In [116]:
sum1 = 0
for i in y:
    sum1 = sum1 + (i - y.mean()) ** 2
SST = sum1
SST

2786697.9393991544

### SSE

In [117]:
sum2 = 0
for i in range(len(y)):
    sum2 = sum2 + (y[i] - y_pred[i] ) ** 2
SSE = sum2
SSE

276406.08282738156

### SSR

In [118]:
sum3 = 0
for i in y_pred:
    sum3 = sum3 + (i - y.mean() ) ** 2
SSR = sum3
SSR

2510291.856571764

## `R2 ( Coefficient of determination )   SSR / SST or 1 - SSE/SST` ***
> - How much accurate regression line is

In [119]:
R2 = SSR / SST
R2

0.900812327407474

## `Mean Squared Error ( MSE )` ***

In [120]:
MSE = SSE / len(X)
MSE

28.88859561322968

## `Root Mean Squared Error ( RMSE )` ***

In [121]:
RMSE = np.sqrt(MSE)
RMSE

5.374811216520045

## `Mean Absolute Error ( MAE )`

In [122]:
sum4 = 0
for i in range(len(y)):
    sum4 = sum4 + np.abs((y[i] - y_pred[i] )) # in finding absolute error we didn't square
SSe = sum4
SSe

40933.70021963503

In [123]:
MAE = SSe / len(X)
MAE

4.278187731985266

## `Mean Absolute Percentage Error ( MAPE )`

In [124]:
sum5 = 0
for i in range(len(y)):
    sum5 = sum5 + np.abs((y[i] - y_pred[i] ) / y[i]) # in finding absolute error we didn't square
MAPE = sum5 * 100 / len(y)
MAPE  # gives in percent

0.945464698829643

#### .94 % or .094 of the data are error

## `Mean Percentage Error ( MPE )`

In [125]:
sum6 = 0
for i in range(len(y)):
    sum6 = sum6 + (y[i] - y_pred[i] ) / y[i] 
MPE = sum6 * 100 / len(y)
MPE  # gives in percent

-0.013775875885930136

## `Adjusted R2` ***

In [126]:
k = len(X.columns)   # 2    predictor variables
n = len(X)           # 9568   number of observation
adjusted_R2 = R2 - ((k - 1)/(n - k) * (1 - R2))
adjusted_R2

0.9008019586355115

### value of R2

In [132]:
R2

0.900812327407474

#### Calculated by statsmodel

In [127]:
import statsmodels.api as sm

X = sm.add_constant(X)
model = sm.OLS(y, X).fit()

#display adjusted R-squared
print(model.rsquared_adj)

0.9007915876954871


---

### `Calculating with the help of library`

---

In [130]:
import math
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error,mean_absolute_percentage_error
print(f"R2_scores {r2_score(y,y_pred)}")
print(f"mean_squared_error {mean_squared_error(y,y_pred)}")
print(f"root_mean_squared_error {math.sqrt(mean_squared_error(y,y_pred))}")
print(f"mean_absolute_error {mean_absolute_error(y,y_pred)}")
print(f"mean_absolute_percentage_error {mean_absolute_percentage_error(y,y_pred)}")

R2_scores 0.9008123274074772
mean_squared_error 28.888595613229647
root_mean_squared_error 5.374811216520041
mean_absolute_error 4.2781877319852555
mean_absolute_percentage_error 0.009454646988296442


# `END ---------------------------`