In [1]:
# Importing necessary Library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.datasets import load_diabetes
import random

In [2]:
diabetes=load_diabetes()
x=diabetes.data
y=diabetes.target

In [3]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

In [4]:
x_train

array([[ 0.03081083,  0.05068012,  0.03259528, ...,  0.03430886,
         0.06301517,  0.00306441],
       [ 0.07440129, -0.04464164,  0.08540807, ..., -0.00259226,
         0.00620674,  0.08590655],
       [-0.05637009,  0.05068012, -0.0105172 , ...,  0.03430886,
         0.02060939,  0.0569118 ],
       ...,
       [ 0.03081083, -0.04464164, -0.02021751, ..., -0.03949338,
        -0.01090325, -0.0010777 ],
       [-0.01277963, -0.04464164, -0.02345095, ..., -0.00259226,
        -0.03845972, -0.03835666],
       [-0.09269548, -0.04464164,  0.02828403, ..., -0.03949338,
        -0.00514219, -0.0010777 ]])

In [5]:
# Define a custom class for MiniBatch Gradient Descent (MBGD) optimization
class MiniBatch():

    # Constructor method to initialize the learning rate (lr) and number of epochs and batch_size
    def __init__(self, lr=0.1, epochs=100,batch_size=35):
        self.coef_ = None  # Coefficients for the linear model (weights)
        self.intercept_ = None  # Intercept (bias) term for the linear model
        self.lr = lr  # Learning rate for gradient descent
        self.epochs = epochs  # Number of iterations for training
        self.batch_size=batch_size # batch_size
    # Method to train the model on the training data using MBGD
    def fitmodel(self, X_train, Y_train):
        self.intercept_ = 0  # Initialize intercept as 0
        self.coef_ = np.ones(X_train.shape[1])  # Initialize coefficients as ones

        # Loop through the number of epochs
        for i in range(self.epochs):
            # Loop through each training sample
            for j in range(X_train.shape[0]//self.batch_size):
                # Randomly select an batch for MBGD calculation
                id =random.sample(range(X_train.shape[0]),self.batch_size)

                # Calculate the predicted value for the randomly selected sample
                y_predicted = np.dot(X_train[id], self.coef_) + self.intercept_

                # Compute the gradient (derivative) for the intercept term
                intercept_der = -2 * np.mean(Y_train[id] - y_predicted)
                # Update the intercept using the learning rate and gradient
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)

                # Compute the gradient (derivative) for the coefficients
                coef_der = -2 * np.dot((Y_train[id] - y_predicted), X_train[id])
                # Update the coefficients using the learning rate and gradient
                self.coef_ = self.coef_ - (self.lr * coef_der)

        # Print the final intercept and coefficients after training
        print(self.intercept_, self.coef_)

    # Method to make predictions on test data using the trained model
    def predict(self, X_test):
        # Return the predicted values by applying the linear model (dot product)
        return np.dot(X_test, self.coef_) + self.intercept_


In [6]:
mbg=MiniBatch()

In [7]:
mbg.fitmodel(x_train,y_train)

150.18759874891302 [  66.10033764 -263.18895334  526.46101974  404.40519973 -170.55680403
  -59.09987523 -211.55063508  185.77850119  361.64705672   54.41926992]


In [8]:
y_MBGD=mbg.predict(x_test)

In [9]:
r2_score(y_test,y_MBGD)

0.4686739837793912

In [10]:
df=pd.read_csv('CarPricesPrediction.csv')

In [11]:
data=df.iloc[:,3:5]
target=df.iloc[:,6:]

In [12]:
target

Unnamed: 0,Price
0,19094.75
1,27321.10
2,23697.30
3,18251.05
4,19821.85
...,...
995,24548.50
996,26969.70
997,20507.55
998,31414.90


In [13]:
from sklearn.preprocessing import StandardScaler
scler=StandardScaler()

In [15]:
scaled_data=scler.fit_transform(data)

In [16]:
X_train,X_test,Y_train,Y_test=train_test_split(scaled_data,target,test_size=0.3,random_state=42)

In [17]:
Lr=LinearRegression()
Lr.fit(X_train,Y_train)

In [18]:
Y_pred=Lr.predict(X_test)

In [19]:
r2_score(Y_test,Y_pred)

0.9999999997555501

In [21]:
X_train.shape

(700, 2)

In [84]:
class MGradient():

    def __init__(self,lr=0.1,epochs=10,batch_size=70):
        self.coef_ = None
        self.intercept_ =None
        self.epochs=epochs
        self.batch_size=batch_size
        self.lr=lr

    def Fitmodel(self,x,y):
        self.coef_ = np.ones(x.shape[1])
        self.intercept_=0

        for i in range(self.epochs):
            for j in range(x.shape[0]//self.batch_size):
                idx=random.sample(range(x.shape[0]),self.batch_size)
                y_predicted=np.dot(x[idx],self.coef_)+self.intercept_
                y_predicted = np.array(y_predicted).flatten()
                y_batch = np.array(Y_train.iloc[idx]).flatten()
                intercept_der=-2*np.mean(y_batch-y_predicted)
                self.intercept_=self.intercept_-(self.lr*intercept_der)

                coef_der=-2*np.dot((y_batch-y_predicted),x[idx])
                self.coef_=self.coef_-(self.lr*coef_der)
        print(self.intercept_,self.coef_)

    def predict(self,test_data):
        return np.dot(test_data,self.coef_)+self.intercept_

In [93]:
mg=MGradient(0.1,100,35)

In [94]:
mg.Fitmodel(x_train,y_train)

22035.334124969377 [ 2206.75778049 -5224.2056098   2114.63478884  2946.8473146
  1565.76222708   174.14874738 -1407.49766726  6758.61197499
 -4276.61363945 -8357.1161596 ]


In [95]:
mgpred=mg.predict(x_test)

In [96]:
r2_score(y_test,mgpred)

-88974.48048353085

In [34]:
idx=random.sample(range(X_train.shape[0]),70)

In [36]:
X_train[idx]

array([[ 0.30021859, -0.18341283],
       [-1.02233027, -0.76504625],
       [-0.49331073,  1.03271378],
       [-0.22880095, -0.54366294],
       [-1.55134981, -0.37267848],
       [ 1.62276745, -1.3794251 ],
       [-1.55134981, -0.435608  ],
       [ 0.82923814,  0.68117738],
       [-1.55134981, -0.67803477],
       [ 1.09374791, -1.23151815],
       [-0.22880095,  1.37518492],
       [-0.49331073,  1.43068142],
       [ 0.82923814,  1.08645247],
       [ 0.30021859,  1.17783335],
       [ 0.82923814,  0.25084097],
       [ 0.56472836, -1.37696417],
       [ 1.35825768,  0.13984798],
       [ 0.30021859, -1.30338737],
       [ 0.03570882, -0.212944  ],
       [ 1.62276745, -0.90002076],
       [-0.7578205 , -0.62525032],
       [ 1.62276745,  1.17040034],
       [ 0.56472836, -1.48966475],
       [ 1.09374791,  1.69904841],
       [-0.22880095,  1.10726994],
       [ 0.03570882, -0.80062929],
       [-0.7578205 ,  0.47179739],
       [-0.49331073, -0.50775344],
       [ 1.35825768,

In [39]:
coef_=np.ones(X_train.shape[1])
intercept_=0

In [55]:
y_predicted=np.dot(X_train[idx],coef_)+intercept_
                

In [66]:
Y_train.iloc[idx]

Unnamed: 0,Price
224,21425.45
807,27583.55
114,22004.00
246,24142.75
176,28802.30
...,...
234,14551.30
804,23915.75
150,21606.40
156,29791.30


In [61]:
y_sample=y_predicted.reshape(-1,1)

In [65]:
y_sample.shape

(70, 1)

In [62]:
y_predicted = np.array(y_predicted).flatten()
y_batch = np.array(Y_train.iloc[idx]).flatten()

In [67]:
y_batch

array([21425.45, 27583.55, 22004.  , 24142.75, 28802.3 , 18806.85,
       28927.6 , 17703.95, 29410.3 , 20512.25, 20322.1 , 21211.6 ,
       16896.8 , 18715.05, 18560.8 , 22801.95, 16781.8 , 23655.45,
       22484.25, 17852.3 , 26305.1 , 13729.85, 23026.35, 14677.25,
       20855.55, 23654.4 , 24120.85, 25071.25, 18994.  , 28849.8 ,
       20981.75, 23045.75, 29861.7 , 25764.85, 22143.95, 22636.95,
       22713.15, 19841.65, 21449.6 , 16713.65, 25795.5 , 19253.5 ,
       26321.2 , 25099.8 , 29126.2 , 25969.4 , 20010.  , 21896.35,
       21492.45, 26177.75, 23595.45, 20377.6 , 21023.55, 17942.6 ,
       23894.1 , 22586.05, 23900.95, 19938.2 , 18657.35, 23832.55,
       27592.95, 18240.3 , 22749.75, 30136.75, 16912.3 , 14551.3 ,
       23915.75, 21606.4 , 29791.3 , 29060.05])

In [69]:
intercept_der=-2*np.mean(Y_train.iloc[idx]-y_sample)

In [70]:
intercept_der

-44985.52494336996