In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
x = np.array([1, 2, 3, 4])
y = np.array([2, 3, 5, 4])

# Create DataFrame

In [3]:
df = pd.DataFrame({'x': x, 'y': y})
df

Unnamed: 0,x,y
0,1,2
1,2,3
2,3,5
3,4,4


In [4]:
X = df[['x']]
y = df['y']

# Using sklearn to perform linear regression

In [5]:
model = LinearRegression().fit(X, y) #training model

In [6]:
slope_sklearn = model.coef_
slope_sklearn

array([0.8])

In [7]:
intercept_sklearn = model.intercept_
intercept_sklearn

1.5000000000000004

# Manual calculations using Formula 1

In [8]:
N = len(x) 
sum_x = np.sum(x) 
sum_y = np.sum(y) 
sum_xy = np.sum(x * y) #MULTIPLICATION
sum_x2 = np.sum(x**2) #SQUARE

slope_formula1 = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x**2)
intercept_formula1 = (sum_y - slope_formula1 * sum_x) / N

In [9]:
slope_formula1

0.8

In [10]:
intercept_formula1

1.5

# Manual calculations using Formula 2

In [11]:
mean_x = np.mean(x)
mean_y = np.mean(y)
slope_formula2 = np.sum((x - mean_x) * (y - mean_y)) / np.sum((x - mean_x)**2)
intercept_formula2 = mean_y - slope_formula2 * mean_x

In [12]:
slope_formula2

0.8

In [13]:
intercept_formula2

1.5

# Let's Compare the Results

In [14]:
#sklearn
(slope_sklearn, intercept_sklearn)

(array([0.8]), 1.5000000000000004)

In [15]:
#formula 01
(slope_formula1, intercept_formula1)

(0.8, 1.5)

In [16]:
#formula 02
(slope_formula2, intercept_formula2)

(0.8, 1.5)

# Residual Calculation

In [17]:
X

Unnamed: 0,x
0,1
1,2
2,3
3,4


In [18]:
model.predict(X)   #df['y_hat'] = model.predict(X)

array([2.3, 3.1, 3.9, 4.7])

# Create the example dataset

In [19]:
data = {
    'x': [1, 2, 3, 4],
    'y': [2, 3, 5, 4],
    'y_hat': [2.3, 3.1, 3.9, 4.7] # Predicted Value of y
}

# Create a DataFrame

In [20]:
df = pd.DataFrame(data)
df

Unnamed: 0,x,y,y_hat
0,1,2,2.3
1,2,3,3.1
2,3,5,3.9
3,4,4,4.7


# Calculate residuals

In [21]:
#adding new column in df named Residuals
df['Residuals'] = df['y'] - df['y_hat']
df

Unnamed: 0,x,y,y_hat,Residuals
0,1,2,2.3,-0.3
1,2,3,3.1,-0.1
2,3,5,3.9,1.1
3,4,4,4.7,-0.7


# Calculate L1 Loss

In [22]:
L1_loss = df['Residuals'].abs().sum()
L1_loss

2.2

# Calculate L2 Loss

In [23]:
L2_loss = (df['Residuals']**2).sum()
L2_loss

1.8000000000000003

# Display the DataFrame with the residuals and the calculated losses

In [24]:
df

Unnamed: 0,x,y,y_hat,Residuals
0,1,2,2.3,-0.3
1,2,3,3.1,-0.1
2,3,5,3.9,1.1
3,4,4,4.7,-0.7


In [25]:
print(f"L1 Loss = {L1_loss}, L2 Loss = {L2_loss}")

L1 Loss = 2.2, L2 Loss = 1.8000000000000003


In [26]:
print(f"L1 Loss = {L1_loss:.2f}, L2 Loss = {L2_loss:.2f}") #2 DIGIT after dot.

L1 Loss = 2.20, L2 Loss = 1.80
