## Linear Regression from Scratch

### Importing libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

### Data pre-processing

In [2]:
housing = fetch_california_housing()
X = housing.data
y = housing.target

data = pd.DataFrame(X)

scaler = StandardScaler()
X = scaler.fit_transform(data)



In [3]:
len(X) == len(y) , int(len(X)*.8)

(True, 16512)

In [4]:
X_train = np.array( X[:int(len(X) * .8 )] ) 
X_test = np.array(X[:int(len(X) * .8): ])

y_train = np.array( y[:int(len(X) * .8 )] ) 
y_test = np.array(y[:int(len(X) * .8): ])

In [5]:
assert X_train.shape[0] == y_train.shape[0]
assert X_test.shape[0] == y_test.shape[0]

### Modelling 

In [6]:
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [7]:
class Linear_Regression():
    def __init__(self, learning_rate = 0.001, epochs = 100):
        self.lr = learning_rate
        self.epochs = epochs
    
    def fit(self, X, y):
        
        n_samples , n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for step in range(self.epochs):
            predicted = np.dot(X, self.weights) + self.bias

            # Gradient Descent

            dw = (1/n_samples) * np.dot(X.T,  (predicted - y))
            db = (1/n_samples) * np.sum( (predicted - y))

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            if step % 1000 == 0:
                print(f'Epochs {step} / {self.epochs}, Loss: {mse(y, predicted)}')
    
    def predict(self, X):
        return np.dot(X, self.weights) + self.bias
    

    def evaluate(self, X, y):
        predicted = self.predict(X)
        return mse(y, predicted)

        

In [8]:
model = Linear_Regression(0.01, 3000)

In [9]:
model.fit(X_train, y_train)

Epochs 0 / 3000, Loss: 5.3717905222912234
Epochs 1000 / 3000, Loss: 0.570237619119985
Epochs 2000 / 3000, Loss: 0.5457152305093357


In [10]:
model.evaluate(X_test, y_test)

0.5389816610329551