here in this notebook i build linear regression from scratch
it was really fun to do this 

In [83]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

In [85]:
#simple linear regression from scratch
# loading the dataset
def load_data():
    X, y = make_regression(
      n_samples=1000,
      n_features=1,
      noise=10,
      random_state=42
     )
    X = X.flatten() 

    return X,y

In [87]:
# initialize the parameters

def initialize_parameters():
    W=0.0
    b=0.0
    return W,b

In [89]:
#predict function

def predict(X, W ,b):
    return W*X+b

In [91]:
#compute the loss
def compute_loss(X_train , y_train ,W ,b):

    n=X_train.shape[0]
    sq_error=0
    for i in range(n):
        y_pred=W*X_train[i]+b
        sq_error+=(y_pred-y_train[i])**2

    MSE=sq_error/n
    return MSE
        

In [93]:
#compute gradients

def compute_gradients(X_train ,y_train ,W ,b):
    
    dW=0
    db=0
    n=X_train.shape[0]
    for i in range(n):
        y_pred=W*X_train[i]+b
        dW+=(2/n)*(y_pred-y_train[i])*X_train[i]
        db+=(2/n)*(y_pred-y_train[i])

    return dW,db

In [95]:
#update parameters
def update_parameters(dW, db ,W,b, learning_rate):
    W=W-learning_rate*dW
    b=b-learning_rate*db
    return W,b

In [97]:
#training loop
def train_model(X_train ,y_train , learning_rate=0.01, epochs=50):
    W,b=initialize_parameters()
    losses = []
    for i in range(epochs):
        MSE=compute_loss(X_train, y_train, W,b)
        losses.append(MSE)
        if i%5==0:
             print('epoch:', i,'have MSE:', MSE)
        dW,db=compute_gradients(X_train, y_train ,W,b)
        W,b=update_parameters(dW, db ,W,b, learning_rate)

    return W,b
    

In [99]:
#evaluation
def evaluate(X_test, y_test, W, b):
    y_pred = W * X_test + b
    mse = np.mean((y_pred - y_test) ** 2)
    return mse

In [105]:
X,y=load_data()

X_train,X_test,y_train, y_test =train_test_split(X ,y, test_size=0.2,random_state=42)


In [107]:
W,b=train_model(X_train ,y_train , learning_rate=0.01, epochs=500)

epoch: 0 have MSE: 357.71084460733186
epoch: 5 have MSE: 312.47415432075024
epoch: 10 have MSE: 275.0459182582754
epoch: 15 have MSE: 244.0779542874574
epoch: 20 have MSE: 218.45491498529591
epoch: 25 have MSE: 197.25406512069333
epoch: 30 have MSE: 179.71200970305247
epoch: 35 have MSE: 165.19717114102667
epoch: 40 have MSE: 153.18702180492252
epoch: 45 have MSE: 143.24925010020013
epoch: 50 have MSE: 135.02618025614566
epoch: 55 have MSE: 128.22188355496905
epoch: 60 have MSE: 122.59151592415581
epoch: 65 have MSE: 117.93249720479008
epoch: 70 have MSE: 114.07721389822935
epoch: 75 have MSE: 110.88698218730059
epoch: 80 have MSE: 108.24705351436485
epoch: 85 have MSE: 106.06248262146879
epoch: 90 have MSE: 104.25470907717104
epoch: 95 have MSE: 102.7587290550643
epoch: 100 have MSE: 101.52075542056636
epoch: 105 have MSE: 100.49628179440346
epoch: 110 have MSE: 99.6484808294927
epoch: 115 have MSE: 98.94687898880615
epoch: 120 have MSE: 98.36626008038809
epoch: 125 have MSE: 97.88575

In [109]:
mse=evaluate(X_test,y_test, W,b)
print(mse)

107.89870503294682


In [115]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lr=LinearRegression()
lr.fit(X_train.reshape(-1, 1), y_train)

y_pred=lr.predict(X_test.reshape(-1, 1))
mse_sklearn=mean_squared_error(y_test,y_pred)

print(mse_sklearn)

107.89991869915976


we can see this is the very good performance of the our simple linear regresion model

now let build same for multiple linear regression

In [120]:
# initialising the parameters
def initialize_parameters(n):
    W=np.zeros(n)
    b=0.0
    return W,b

In [122]:
#predict function

def predict(X, W ,b):
    return W*X+b

In [124]:
#compute the loss
def compute_loss(X , y ,W ,b):
        y_pred=X.dot(W)+b
        mse=np.mean((y_pred-y)**2)
        return mse
        

In [128]:
#compute gradients
# m-> number of samples
# n-> number of features
def compute_gradients(X ,y ,W ,b):
    m=X.shape[0]
    y_pred=X.dot(W)+b
    dW=(2/m)* X.T.dot(y_pred-y)
    db=(2/m)* np.sum(y_pred-y)
    return dW,db

In [None]:
#update parameters
def update_parameters(dW, db ,W,b, learning_rate):
    W=W-learning_rate*dW
    b=b-learning_rate*db
    return W,b

In [130]:
#training loop
def train_model(X ,y , learning_rate=0.01, epochs=500):
    n=X.shape[1]
    W,b=initialize_parameters(n)
    losses = []
    for i in range(epochs):
        MSE=compute_loss(X, y, W,b)
        losses.append(MSE)
        if i%10==0:
              print(f'Epoch {i}, MSE: {MSE}')
        dW,db=compute_gradients(X, y,W,b)
        W,b=update_parameters(dW, db ,W,b, learning_rate)

    return W,b

In [132]:
#evaluation
def evaluate(X_test, y_test, W, b):
    y_pred = X_test.dot(W) + b
    mse = np.mean((y_pred - y_test) ** 2)
    return mse

In [134]:
def load_data2():
    X, y = make_regression(
      n_samples=1000,
      n_features=5,
      noise=5,
      random_state=42
     )

    return X,y

In [136]:
X,y=load_data2()

X_train,X_test,y_train, y_test =train_test_split(X ,y, test_size=0.2,random_state=42)

In [138]:
W,b=train_model(X_train ,y_train , learning_rate=0.01, epochs=500)

Epoch 0, MSE: 4303.563861915203
Epoch 10, MSE: 2843.4385907955525
Epoch 20, MSE: 1884.1560962920987
Epoch 30, MSE: 1253.1749057665106
Epoch 40, MSE: 837.6275993155177
Epoch 50, MSE: 563.6096592714857
Epoch 60, MSE: 382.6791686245319
Epoch 70, MSE: 263.04979356240796
Epoch 80, MSE: 183.84063221058838
Epoch 90, MSE: 131.3185837138708
Epoch 100, MSE: 96.44051001030697
Epoch 110, MSE: 73.24400974799552
Epoch 120, MSE: 57.79272543813875
Epoch 130, MSE: 47.484351766798966
Epoch 140, MSE: 40.59610559787054
Epoch 150, MSE: 35.9858219044451
Epoch 160, MSE: 32.89515184759171
Epoch 170, MSE: 30.819822337012127
Epoch 180, MSE: 29.423993455631834
Epoch 190, MSE: 28.483648670063786
Epoch 200, MSE: 27.849123497529032
Epoch 210, MSE: 27.420266749708038
Epoch 220, MSE: 27.129951325253018
Epoch 230, MSE: 26.9331113524171
Epoch 240, MSE: 26.799442319649177
Epoch 250, MSE: 26.708532684951706
Epoch 260, MSE: 26.646611920172308
Epoch 270, MSE: 26.604374802681065
Epoch 280, MSE: 26.57552340856912
Epoch 290, 

In [140]:
mse=evaluate(X_test,y_test, W,b)
print(mse)

27.730356498074926


In [142]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lr=LinearRegression()
lr.fit(X_train, y_train)

y_pred=lr.predict(X_test)
mse_sklearn=mean_squared_error(y_test,y_pred)

print(mse_sklearn)

27.726524208661722
