In [1]:
import math
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
train =  pd.read_csv('/kaggle/input/random-linear-regression/train.csv') # Read training data
test = pd.read_csv('/kaggle/input/random-linear-regression/test.csv')  # Read test data

#Drop null values
train = train.dropna() 
test = test.dropna()

In [3]:
train.head()

Unnamed: 0,x,y
0,24.0,21.549452
1,50.0,47.464463
2,15.0,17.218656
3,38.0,36.586398
4,87.0,87.288984


In [4]:
px.scatter(x=train['x'], y=train['y'],template='seaborn')

In [5]:
#Set training data and target
x_train = train['x'].values
y_train = train['y'].values
#Set testing data and target
x_test = test['x'].values
y_test = test['y'].values
# X_train = np.expand_dims(X_train, axis=-1)
# X_test = np.expand_dims(X_test, axis=-1)

# Z-score normalization

In [6]:
mean=np.mean(x_train,axis=0)
std=np.std(x_train,axis=0)

In [7]:
mean

50.01430615164521

In [8]:
std

28.933841385275375

In [9]:
x_train=(x_train-mean)/std
x_test=(x_test-mean)/std

In [10]:
x_train[:5]

array([-8.99096176e-01, -4.94443564e-04, -1.21015062e+00, -4.15233705e-01,
        1.27828495e+00])

# Min-max normalization

In [11]:
min_value=np.min(x_train)
max_value=np.max(x_train)

In [12]:
print(min_value)
print(max_value)

-1.72857469859145
1.7275858114641094


In [13]:
min_x_train=(x_train-min_value)/(max_value-min_value)
min_x_test=(x_test-min_value)/(max_value-min_value)

In [14]:
min_x_train[:5]

array([0.24, 0.5 , 0.15, 0.38, 0.87])

# L1 normalization

In [15]:
row_sum_xtrain=np.sum(np.abs(x_train),axis=0)

In [16]:
row_sum_xtrain[np.newaxis]

array([596.27016117])

In [17]:
l1_x_train=x_train/row_sum_xtrain[np.newaxis]

In [20]:
l1_x_train[:10]

array([-1.50786713e-03, -8.29227414e-07, -2.02953410e-03, -6.96385182e-04,
        2.14380163e-03, -8.12311175e-04, -2.20342308e-03,  1.79602366e-03,
       -1.44990413e-03, -2.60916406e-03])

# Model Implementation

In [29]:
class LinearRegression:
    def __init__(self,learning_rate):
        self.learning_rate=learning_rate
    
    def initialize_parameters(self):
        self.w=0
        self.b=0
        self.dw=np.zeros(self.w)
        self.db=np.zeros(self.b)
        
    def forward(self,x):
        z=np.dot(x,self.w)+self.b
        return z
    
    def compute_cost(self,predictions,y):
        m=len(self.x)
        j=np.sum(np.square(np.subtract(predictions,y)))
        j=j*1/2*m
        return j
    
    def backward(self,predictions):
        m=len(self.x)
        self.dw=1/m*np.sum(np.subtract(predictions,self.y))
        self.db=1/m*np.sum(np.multiply(np.subtract(predictions,self.y),self.x))
        
    def fit(self,x,y,iterations,plot_cost=True):
        costs=[]
        self.x=x
        self.y=y
        self.initialize_parameters()
        
        for i in range(iterations):
            z=self.forward(self.x)
            cost=self.compute_cost(z,self.y)
            costs.append(cost)
            self.backward(z)
            
            self.w=self.w-self.learning_rate*self.dw
            self.b=self.b-self.learning_rate*self.db
            if i%1000==0:
                print(f'Iteration: {i} Cost : {cost}')
            
        if plot_cost:
            fig = px.line(y=costs,title="Cost vs Iteration",template="plotly_dark")
            fig.update_layout(
                title_font_color="#41BEE9", 
                xaxis=dict(color="#41BEE9",title="Iterations"), 
                yaxis=dict(color="#41BEE9",title="cost")
            ) 

            fig.show()
            
    def predict(self,x):
        preds=self.forward(x)
        return preds

In [30]:
lr = LinearRegression(0.01)
lr.fit(x_train, y_train,10000)

Iteration: 0 Cost : 815994165.0978628
Iteration: 1000 Cost : 2.3634376307896868e+16
Iteration: 2000 Cost : 1.03822554761776e+25
Iteration: 3000 Cost : 4.560781607990631e+33
Iteration: 4000 Cost : 2.003488444636464e+42
Iteration: 5000 Cost : 8.801048357060738e+50
Iteration: 6000 Cost : 3.8661791332355916e+59
Iteration: 7000 Cost : 1.6983591594829354e+68
Iteration: 8000 Cost : 7.460657499813293e+76
Iteration: 9000 Cost : 3.277363920271523e+85


In [31]:
preds = lr.predict(x_test)
lr.compute_cost(preds, y_test)

5.884090213263632e+93