In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
def calculate_rmse(actual, predictions):
    return np.sqrt(np.sum((actual - predictions)**2) / len(actual))

def calculate_variance(x):
    return np.sum( (x - np.mean(x))**2 ) / len(x)

def calculate_error_variance(y,y_pred):
    return np.sum( (y - y_pred)**2) / len(y)

def calculate_r2(y,y_pred):
    y_mean = np.mean(y)
    error_variance_using_mean = calculate_error_variance(y, y_mean)
    error_variance_using_fit_line = calculate_error_variance(y, y_pred)
    return (error_variance_using_mean - error_variance_using_fit_line) / error_variance_using_mean
    
def get_model_f_stat(y,y_pred, parameters_num):
    y_mean = np.mean(y)
    error_variance_using_mean = calculate_error_variance(y, y_mean)
    error_variance_using_fit_line = calculate_error_variance(y, y_pred)

    error_variation_explained_by_fit_line = (error_variance_using_mean - error_variance_using_fit_line) / (parameters_num-1)
    error_variation_not_explained_by_fit_line = error_variance_using_fit_line / (len(y)-parameters_num)

    return error_variation_explained_by_fit_line / error_variation_not_explained_by_fit_line

In [3]:
def calclate_r_correlation(v1,v2):
    v1_normalized = v1 - np.mean(v1)
    v2_normalized = v2 - np.mean(v2)

    return np.dot(v1_normalized, v2_normalized) / np.dot(np.sqrt(np.sum(v1_normalized**2)) , np.sqrt(np.sum(v2_normalized**2)))

# Equations

## Linear Equation

$\hat{y} = wx + b$.

## Cost Function

$
\text{MSE} = \frac{1}{N} \sum_{i=1}^n (y_i - \hat{y}_i)^2
$

# Derivatives

$\frac{df}{dw} = -2 x \cdot \big(y_i - (w x + b)\big) = -2x \cdot \big(y_i - \hat{y})$

$\frac{df}{db} = -2 \cdot \big(y_i - (w x + b)\big) = -2 \cdot \big(y_i - \hat{y})$

## Update Rules

$ a: Learning Rate$

${w} = w - a \cdot dw$.

$b = b - a \cdot db$

# Model

In [5]:
class CustomLinearRegression():
    def __init__(self, lr=0.001, epochs=100):
        self.lr = lr
        self.epochs = epochs
        
    def loss_function(self,actual, prediction):
        n = len(actual)
        loss = np.sum((actual - prediction)**2) / n
        return loss

    def calculate_weight_derivative(self,x,loss):
        dw = -2*x * loss
        db = -2 * loss
        return dw, db

    def update_weights(self,w, dw):
        return w - self.lr * dw

    def fit(self,x,y):
        self.weights = np.random.randn(x.shape[1])
        self.bias = np.random.randn(1)
        self.losses = []
        for i in range(self.epochs):
            #print(self.weights, self.bias)
            sum_loss = 0
            for j in range(len(x)):
                y_hat = np.dot(x[j] , self.weights) + self.bias
                loss = (y[j] - y_hat)
                sum_loss += loss
                dw, db = self.calculate_weight_derivative(x[j],loss)
                self.weights = self.update_weights(self.weights, dw)
                self.bias = self.update_weights(self.bias, db)
            self.losses.append(sum_loss / len(x))

    def predict(self,x):
        return np.dot(x ,self.weights) + self.bias

    def display_loss(self):
        epochs_list = np.arange(1, self.epochs+1, 1)
        plt.plot(epochs_list,self.losses)
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")                

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

df = pd.read_csv('multiple_linear_regression_dataset.csv')
scaler = MinMaxScaler()
X = scaler.fit_transform(df.iloc[:, 0:2])
y = df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Train model
model = CustomLinearRegression(lr=0.01, epochs=300)
model.fit(X_train, y_train)
model.display_loss()

In [None]:
y_pred = model.predict(X_train)

rmse = calculate_rmse(y_train, y_pred)
r2 = calculate_r2(y_train,y_pred)
model_f_stat = get_model_f_stat(y_train, y_pred, 2)

print(rmse, r2, model_f_stat)

print(np.sqrt(mean_squared_error(y_train, y_pred)), r2_score(y_train, y_pred))

In [None]:
y_pred = model.predict(X_test)

rmse = calculate_rmse(y_test, y_pred)
r2 = calculate_r2(y_test,y_pred)
model_f_stat = get_model_f_stat(y_test, y_pred, 2)
rmse, r2, model_f_stat

print(rmse, r2, model_f_stat)

print(np.sqrt(mean_squared_error(y_test, y_pred)), r2_score(y_test, y_pred))