## Install and import libraries

In [None]:
!pip install numpy pandas matplotlib scikit-learn

In [85]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

## Multivariate Linear Regression (using Gradient Descent) by Vectorization Of Numpy

In [None]:
# Load the diabetes dataset
X_diabetes, y_diabetes = load_diabetes(return_X_y = True)
"""
X_diabetes (n_samples, n_features): infos like age, blood sugar levels,... of patients
y_diabetes (n_samples,): index measuring the progression of diabetes in each patient
"""

X_scaled = StandardScaler().fit_transform(X_diabetes)
y_scaled = StandardScaler().fit_transform(y_diabetes.reshape(-1, 1))


array([[-1.47194752e-02],
       [-1.00165882e+00],
       [-1.44579915e-01],
       [ 6.99512942e-01],
       [-2.22496178e-01],
       [-7.15965848e-01],
       [-1.83538046e-01],
       [-1.15749134e+00],
       [-5.47147277e-01],
       [ 2.05006151e+00],
       [-6.64021672e-01],
       [-1.07957508e+00],
       [ 3.48889755e-01],
       [ 4.26806019e-01],
       [-4.43258925e-01],
       [ 2.45001404e-01],
       [ 1.80071184e-01],
       [-1.05621783e-01],
       [-7.15965848e-01],
       [ 2.06043272e-01],
       [-1.09256112e+00],
       [-1.33929596e+00],
       [-1.09256112e+00],
       [ 1.20596866e+00],
       [ 4.13819975e-01],
       [ 6.47568766e-01],
       [-1.96524090e-01],
       [-8.71798376e-01],
       [-2.74440354e-01],
       [ 1.69943833e+00],
       [-3.00412442e-01],
       [-1.20943552e+00],
       [ 2.45262887e+00],
       [-8.45826288e-01],
       [-1.13151925e+00],
       [-6.51035629e-01],
       [ 1.46568953e+00],
       [ 1.60853602e+00],
       [ 1.2

In [None]:
class LinearRegression:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.w = np.zeros(X.shape[1])
        self.b = 0
        self.losses = []

    def predict(self, X):
        pred = np.dot(X, self.w) + self.b
        return pred
    
    def compute_loss(self, X):
        loss = np.mean((self.predict(X) - self.y)**2)/2
        return loss
    
    def compute_gradient(self, X):
        n_samples = X.shape[0]
        error = self.y - self.predict(X)
        dw = - (np.dot(error, X))/n_samples
        db = - np.mean(error)
        return dw, db
    
    def train(self, learning_rate, n_iterations):
        for i in range(n_iterations + 1):
            dw, db = self.compute_gradient(self.X)
            self.w -= learning_rate * dw
            self.b -= learning_rate + db

            loss = self.compute_loss(self.X)
            self.losses.append(loss)

            if i % 10 == 0:
                print(f"Iteration {i}: w = {self.w}, b = {self.b}, losses = {loss}")
                pass
        return self

In [None]:
model = LinearRegression(X_scaled, y_scaled, 0.01, 1000)
model.train()

ValueError: non-broadcastable output operand with shape (10,) doesn't match the broadcast shape (442,10)

In [122]:
y_true = y_diabetes
y_pred = model.predict(X_diabetes)
r2_score(y_true, y_pred)

0.006554570247549019