In [1]:
# needed libraries
import pandas as pd

In [2]:
# Simple data
df = pd.DataFrame({
    "Area": [10, 20, 30, 40, 50],
    "price": [500, 700, 800, 1000, 1500]
})

# preprocessing 
Theta = [1, 1]  
Alpha = 0.01  
df["Y^"] = [0]*df.Area.count()
df["MSE"] = [0]*df.Area.count()
df["Y^-Y"] = [0]*df.Area.count()

# Scaling the area 0:1 values
df["Normalized_Area"] = (df["Area"] - df["Area"].min()) / (df["Area"].max() - df["Area"].min())
df

Unnamed: 0,Area,price,Y^,MSE,Y^-Y,Normalized_Area
0,10,500,0,0,0,0.0
1,20,700,0,0,0,0.25
2,30,800,0,0,0,0.5
3,40,1000,0,0,0,0.75
4,50,1500,0,0,0,1.0


In [3]:
# updating the predicton every gradient descent value
def update(Theta):
    df["Y^"] = Theta[0] + Theta[1] * df.Area
    df["MSE"] = (df.price - df["Y^"])**2
    df["Y^-Y"] = df["Y^"] - df["price"]

In [4]:
# Training
def GD(Theta, Alpha, num_iterations=1000):
    n = len(df)

    for i in range(num_iterations):
        
        # GD Equation
        gradient_Theta0 = df["Y^-Y"].sum()
        gradient_Theta1 = (df["Y^-Y"]*df["Normalized_Area"]).sum()

        # New Thetas
        Theta[0] -= (Alpha / n) * gradient_Theta0
        Theta[1] -= (Alpha / n) * gradient_Theta1

        # Update predictions
        update(Theta)

        # Debuging
        if i % 100 == 0:
            print("I: ", i,"m: ", Theta[0], "W1: ", Theta[1],"MSE: ", df.MSE.mean())

    return Theta

# Initial Theta values  
Theta = [1, 1]
# Learning rate 
Alpha = 0.01   

GD(Theta, Alpha)

I:  0 m:  1.0 W1:  1.0 MSE:  862161.0
I:  100 m:  77.13316109410043 W1:  26.362161095955425 MSE:  13484.953836798537
I:  200 m:  105.60318671278803 W1:  25.64173444096336 MSE:  12228.012135495246
I:  300 m:  127.9728010801798 W1:  25.075677117519692 MSE:  11452.02161971967
I:  400 m:  145.54916907165827 W1:  24.63091165765462 MSE:  10972.953035541232
I:  500 m:  159.3593629667391 W1:  24.281448262171022 MSE:  10677.193353327386
I:  600 m:  170.21037925817103 W1:  24.00686609290818 MSE:  10494.601982254168
I:  700 m:  178.73629536416107 W1:  23.791119984298586 MSE:  10381.876648832005
I:  800 m:  185.4353218418728 W1:  23.62160284665944 MSE:  10312.284089663113
I:  900 m:  190.6989152424169 W1:  23.488408972904026 MSE:  10269.320151170796


[194.79803771026334, 23.384681735835542]

In [5]:
class linear_regression:
    def __init__(self, df):
        self.df = df
        self.df["Y^"] = [0] * self.df.Area.count()
        self.df["MSE"] = [0] * self.df.Area.count()
        self.df["Y^-Y"] = [0] * self.df.Area.count()
        

    # updating the predicton every gradient descent value
    def update(self, Theta):
        self.df["Y^"] = Theta[0] + Theta[1] * self.df.Area
        self.df["MSE"] = (self.df.price - self.df["Y^"])**2
        self.df["Y^-Y"] = self.df["Y^"] - self.df["price"]

    # Training
    def GD(self, df, Theta, Alpha, num_iterations=1000):
        n = len(df)
        for i in range(num_iterations):
            # GD Equation
            gradient_Theta0 = df["Y^-Y"].sum()
            gradient_Theta1 = (df["Y^-Y"] * df["Normalized_Area"]).sum()

            # New Thetas
            Theta[0] -= (Alpha / n) * gradient_Theta0
            Theta[1] -= (Alpha / n) * gradient_Theta1

            # Update predictions
            self.update(Theta)

            # Debugging
            if i % 100 == 0:
                print("I:", i, "m:", Theta[0], "W1:", Theta[1], "MSE:", df.MSE.mean())

        return Theta
    
    # Fitting
    def fit(self, Theta=[1,1], Alpha=0.001, num_iterations=1000):
        self.Theta = self.GD(self.df, Theta, Alpha, num_iterations)
        return self.Theta
    
    # Prediction
    def predict(self, x):
        y = self.Theta[0] + self.Theta[1] * x
        print("x: ", x, "y: ", y)
        return y
    
    # Cost Function
    def error(self):
        print("MSE: ", self.df.MSE.mean())
        return self.df.MSE.mean()


In [6]:
lr = linear_regression(df)

In [7]:
lr.fit([1,1], 0.01)

I: 0 m: 1.0 W1: 1.0 MSE: 862161.0
I: 100 m: 77.13316109410043 W1: 26.362161095955425 MSE: 13484.953836798537
I: 200 m: 105.60318671278803 W1: 25.64173444096336 MSE: 12228.012135495246
I: 300 m: 127.9728010801798 W1: 25.075677117519692 MSE: 11452.02161971967
I: 400 m: 145.54916907165827 W1: 24.63091165765462 MSE: 10972.953035541232
I: 500 m: 159.3593629667391 W1: 24.281448262171022 MSE: 10677.193353327386
I: 600 m: 170.21037925817103 W1: 24.00686609290818 MSE: 10494.601982254168
I: 700 m: 178.73629536416107 W1: 23.791119984298586 MSE: 10381.876648832005
I: 800 m: 185.4353218418728 W1: 23.62160284665944 MSE: 10312.284089663113
I: 900 m: 190.6989152424169 W1: 23.488408972904026 MSE: 10269.320151170796


[194.79803771026334, 23.384681735835542]

In [8]:
lr.predict(50)

x:  50 y:  1364.0321245020405


1364.0321245020405

In [9]:
lr.error()

MSE:  10243.002664629228


10243.002664629228