In [166]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn as sns
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [165]:
X, Y = make_regression(n_samples=10000, n_features=6, noise=0.1, random_state=42)
df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(X.shape[1])])
df['target'] = y
df.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,target
0,0.376678,0.427697,-0.806369,0.597987,-1.336421,0.849943,326.0
1,2.644343,-0.46531,0.25374,-0.105948,-1.50297,0.467693,326.0
2,-0.160513,-0.011081,0.467378,1.002952,0.790533,-0.878339,327.0
3,1.548639,-1.336569,1.221295,1.336657,0.320217,0.133958,334.0
4,-1.428169,-1.370431,-0.646894,-0.301938,0.789952,-0.103304,335.0


In [268]:
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size = 0.15, random_state = 42)
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

((8500, 6), (8500,), (1500, 6), (1500,))

In [269]:
# initializing the bias term to be 0
b = torch.ones(xtrain.shape[1]).T.view(-1, 1)
e = torch.ones(1)
b.shape, e.shape

(torch.Size([6, 1]), torch.Size([1]))

In [270]:
xtr = torch.tensor(xtrain, dtype=torch.float32)
ytr = torch.tensor(ytrain, dtype=torch.float32).view(-1, 1)
b = torch.tensor(b, dtype=torch.float32)
e = torch.tensor(e, dtype = torch.float32)
xtr[:5]

  b = torch.tensor(b, dtype=torch.float32)
  e = torch.tensor(e, dtype = torch.float32)


tensor([[ 0.8531,  0.0107, -0.4057,  0.3544,  1.1279,  0.4994],
        [-1.3588, -0.9873, -2.2094, -0.8967, -2.1294,  2.4611],
        [-0.5045, -0.7787, -1.0753, -0.4908, -0.2880, -0.8139],
        [-0.4945, -0.7234, -0.1361, -3.1993,  0.0376,  1.9802],
        [ 0.1926, -0.2210, -2.0994,  0.0694,  2.3921,  0.6832]])

In [271]:
params = [b, e]
m = xtr.shape[0]
for i in params:
    i.requires_grad = True
    i.grad = None

In [272]:
# forward pass
for i in range(30):
    pred = xtr @ b + e
    # cost function 1/m * sum(ypred - ytrue)**2
    diff = pred - ytr
    diff_square = diff**2
    diff_sum = diff_square.sum(0, keepdim = True)
    cost = (1/m) *diff_sum # m is not a variable, its just a fixed value
    print(cost)
    # backward pass
    dcost = 1.0
    ddiff_square_sum = (1/m) * dcost
    ddiff_square = torch.ones_like(diff) * ddiff_square_sum
    ddiff = 2 * diff * ddiff_square
    dpred = ddiff
    dytr = ddiff
    de = dpred.sum(0)
    dxtr = dpred @ b.T
    db = xtr.T @ dpred
    # update
    if i<10:
    b = b - 0.3*db
    e = e - 0.3*de

tensor([[25113.1328]], grad_fn=<MulBackward0>)
tensor([[3868.5186]], grad_fn=<MulBackward0>)
tensor([[597.6393]], grad_fn=<MulBackward0>)
tensor([[92.6036]], grad_fn=<MulBackward0>)
tensor([[14.3981]], grad_fn=<MulBackward0>)
tensor([[2.2523]], grad_fn=<MulBackward0>)
tensor([[0.3605]], grad_fn=<MulBackward0>)
tensor([[0.0649]], grad_fn=<MulBackward0>)
tensor([[0.0186]], grad_fn=<MulBackward0>)
tensor([[0.0113]], grad_fn=<MulBackward0>)
tensor([[0.0102]], grad_fn=<MulBackward0>)
tensor([[0.0100]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)
tensor([[0.0099]], grad_fn=<MulBackward0>)


In [241]:
print(diff_sum.shape, cost.shape)
print(diff.shape, diff_sum.shape)
print(ytr.shape, pred.shape, diff.shape)
print(xtr.shape, b.shape, e.shape, pred.shape)

torch.Size([1, 1]) torch.Size([1, 1])
torch.Size([8500, 1]) torch.Size([1, 1])
torch.Size([8500, 1]) torch.Size([8500, 1]) torch.Size([8500, 1])
torch.Size([8500, 6]) torch.Size([6, 1]) torch.Size([1]) torch.Size([8500, 1])


In [236]:
lr = LinearRegression()
lr.fit(xtrain, ytrain)
lr.intercept_

np.float64(0.0009743049440947837)

In [237]:
lr.coef_

array([95.67525234, 20.18151872, 18.80571996, 94.80655363, 56.39541028,
       58.32739273])

In [238]:
mean_squared_error(ytest,lr.predict(xtest))

np.float64(0.009352978631749737)