# Uni-variate Linear Regression

### import the need library

In [415]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### read_data

In [416]:
data1 = pd.read_csv("./data/univariateData.dat", header = None)
x1 = np.asarray(data1.iloc[:,0:1])
y1 = np.asarray(data1.iloc[:,-1:])
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1)
m1_train = len(x1_train)
x1_train = np.append(np.ones((m1_train,1)),x1_train , axis = 1)
# x1_train.insert(0,"00",np.ones(m1),True)
iteration1 = 1500
alpha1 = .01
theta1 = np.zeros((x1_train.shape[1],1))


In [417]:
print(f"x_train : {x1_train.shape}\nx_test : {x1_test.shape} \nx_test : {y1_train.shape} \ny_test: {y1_test.shape}")

x_train : (72, 2)
x_test : (25, 1) 
x_test : (72, 1) 
y_test: (25, 1)


### to compute the cost of the function 

In [418]:
def compute_cost(x,y,theta):
        m = len(y)
        h = np.dot(x,theta)
        sE = np.power((h-y),2)
        J_theta = (1/(2*m))*np.sum(sE)
        return J_theta

### compute gradient descent

In [419]:
def gradient_descent(x,y,theta,n_iter, l_rate):
    m  =len(y)
    J_history = []
    h = np.dot(x,theta)
    for i in range(n_iter):
            h = np.dot(x,theta)
            E = h-y
            error = np.dot(x.transpose(),E)
            theta = theta - (l_rate/m) *error
            J_history.append(compute_cost(x,y,theta))
            
    return theta ,  J_history
        

### train model

In [420]:
def fit(x,y,theta,n_iter, l_rate) :
    theta, J_history = gradient_descent(x,y,theta,n_iter,l_rate)
    return theta, J_history
    

In [421]:
theta1, J_history1 = fit(x1_train,y1_train,theta1,iteration1,alpha1)

In [422]:
theta1.shape

(2, 1)

### prediction

In [423]:
def predict(x,theta) :
    predictions = np.dot(x,theta)
    return predictions

In [424]:
predict( [1,5.5277],theta1)

array([3.24909013])

### evaluation

In [425]:
def evaluate (x_test , y_test , theta):
    x_test = np.append(np.ones((len(y_test),1)),x_test,axis = 1)
    y_pred = predict(x_test,theta)
    MSE = mean_squared_error(y_test,y_pred)
    print("MSE = ", MSE)
    return MSE

In [426]:
MSE = evaluate(x1_test , y1_test , theta1)

MSE =  7.029417991480569


# Multi-variate Linear Regression

### read data 

In [427]:
data = pd.read_csv("./data/multivariateData.dat", header = None)
x2 = np.asarray(data.iloc[:,0:-1])
y2 = np.asarray(data.iloc[:,-1:])
x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2)
m2_train = len(y2_train)

iteration2 = 1500
alpha2 = .01
theta2 = np.zeros((x2_train.shape[1] + 1,1))
print(f"{x2_train.shape}\n{x2_test.shape}\n{y2_train.shape}\n{y2_test.shape}")

(35, 2)
(12, 2)
(35, 1)
(12, 1)


### normalize

In [428]:
def normalize(x):
    norm = np.linalg.norm(x)
    return x / norm

In [429]:
x2_train = normalize(x2_train)

### add bias

In [430]:
x2_train = np.append(np.ones((m2_train,1)),x2_train , axis = 1)
print(x2_train.shape)

(35, 3)


### train model 

In [431]:
theta2 , J_history = fit(x2_train,y2_train,theta2,iteration2,alpha2)

In [432]:
J_history

[66859214948.61883,
 65651259636.07172,
 64467935650.34648,
 63308740583.25591,
 62173182274.30454,
 61060778601.66521,
 59971057277.41869,
 58903555646.97024,
 57857820492.55719,
 56833407840.764565,
 55829882773.96683,
 54846819245.61558,
 53883799899.29496,
 52940415891.46778,
 52016266717.83708,
 51110960043.24947,
 50224111535.06797,
 49355344699.94344,
 48504290723.91563,
 47670588315.77544,
 46853883553.62243,
 46053829734.55188,
 45270087227.40789,
 44502323328.539986,
 43750212120.50172,
 43013434333.6315,
 42291677210.45686,
 41584634372.86429,
 40892005691.97858,
 40213497160.6962,
 39548820768.8187,
 38897694380.73289,
 38259841615.58613,
 37634991729.90567,
 37022879502.61217,
 36423245122.37864,
 35835834077.287,
 35260397046.735214,
 34696689795.54931,
 34144473070.25516,
 33603512497.466057,
 33073578484.342827,
 32554446121.084354,
 32045895085.406948,
 31547709548.972134,
 31059678085.722973,
 30581593582.09013,
 30113253149.029385,
 29654458035.85345,
 29205013545.82

### prediction

In [433]:
predict([1,1,2],theta)

array([446306.74234999])

In [434]:
x2_test.shape

(12, 2)

In [435]:
MSE = evaluate(x2_test,y2_test,theta2)

MSE =  7.906950942575093e+16
