# Uni-variate Linear Regression

### import the need library

In [3]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### read_data

In [4]:
data1 = pd.read_csv("./data/univariateData.dat", header = None)
x1 = np.asarray(data1.iloc[:,0:1])
y1 = np.asarray(data1.iloc[:,-1:])
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1)
m1_train = len(x1_train)
x1_train = np.append(np.ones((m1_train,1)),x1_train , axis = 1)
# x1_train.insert(0,"00",np.ones(m1),True)
iteration1 = 1500
alpha1 = .01
theta1 = np.zeros((x1_train.shape[1],1))


In [5]:
print(f"x_train : {x1_train.shape}\nx_test : {x1_test.shape} \nx_test : {y1_train.shape} \ny_test: {y1_test.shape}")

x_train : (72, 2)
x_test : (25, 1) 
x_test : (72, 1) 
y_test: (25, 1)


### to compute the cost of the function 

In [6]:
def compute_cost(x,y,theta):
        m = len(y)
        h = np.dot(x,theta)
        sE = np.power((h-y),2)
        J_theta = (1/(2*m))*np.sum(sE)
        return J_theta

### compute gradient descent

In [7]:
def gradient_descent(x,y,theta,n_iter, l_rate):
    m  =len(y)
    J_history = []
    h = np.dot(x,theta)
    for i in range(n_iter):
            h = np.dot(x,theta)
            E = h-y
            error = np.dot(x.transpose(),E)
            theta = theta - (l_rate/m) *error
            J_history.append(compute_cost(x,y,theta))
            
    return theta ,  J_history
        

### train model

In [8]:
def fit(x,y,theta,n_iter, l_rate) :
    theta, J_history = gradient_descent(x,y,theta,n_iter,l_rate)
    return theta, J_history
    

In [9]:
theta1, J_history1 = fit(x1_train,y1_train,theta1,iteration1,alpha1)

In [10]:
theta1.shape

(2, 1)

### prediction

In [11]:
def predict(x,theta) :
    predictions = np.dot(x,theta)
    return predictions

In [12]:
predict( [1,5.5277],theta1)

array([3.10442561])

### evaluation

In [13]:
def evaluate (x_test , y_test , theta):
    x_test = np.append(np.ones((len(y_test),1)),x_test,axis = 1)
    y_pred = predict(x_test,theta)
    MSE = mean_squared_error(y_test,y_pred)
    print("MSE = ", MSE)
    return MSE

In [14]:
MSE = evaluate(x1_test , y1_test , theta1)

MSE =  8.753704781325643


# Multi-variate Linear Regression

### read data 

In [15]:
data = pd.read_csv("./data/multivariateData.dat", header = None)
x2 = np.asarray(data.iloc[:,0:-1])
y2 = np.asarray(data.iloc[:,-1:])
x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2)
m2_train = len(y2_train)

iteration2 = 1500
alpha2 = .01
theta2 = np.zeros((x2_train.shape[1] + 1,1))
print(f"{x2_train.shape}\n{x2_test.shape}\n{y2_train.shape}\n{y2_test.shape}")

(35, 2)
(12, 2)
(35, 1)
(12, 1)


### normalize

In [16]:
def normalize(x):
    norm = np.linalg.norm(x)
    return x / norm

In [17]:
x2_train = normalize(x2_train)

### add bias

In [18]:
x2_train = np.append(np.ones((m2_train,1)),x2_train , axis = 1)
print(x2_train.shape)

(35, 3)


### train model 

In [19]:
theta2 , J_history = fit(x2_train,y2_train,theta2,iteration2,alpha2)

In [20]:
J_history

[68125876522.92311,
 66901100355.480286,
 65701278149.33954,
 64525901281.666435,
 63374471496.62246,
 62246500694.05981,
 61141510722.52331,
 60059033176.47128,
 58998609197.62967,
 57959789280.39504,
 56942133081.203896,
 55945209231.787605,
 54968595156.23358,
 54011876891.77522,
 53074648913.23472,
 52156513961.04391,
 51257082872.77068,
 50375974418.079315,
 49512815137.05462,
 48667239181.821396,
 47838888161.392296,
 47027410989.67784,
 46232463736.59454,
 45453709482.20768,
 44690818173.8471,
 43943466486.135254,
 43211337683.86841,
 42494121487.6924,
 41791513942.51647,
 41103217288.6091,
 40428939835.32112,
 39768395837.382835,
 39121305373.72239,
 38487394228.75417,
 37866393776.08681,
 37258040864.60158,
 36662077706.85269,
 36078251769.742386,
 35506315667.42421,
 34946027056.3893,
 34397148532.69102,
 33859447531.264412,
 33332696227.297787,
 32816671439.614586,
 32311154536.024586,
 31815931340.604218,
 31330792042.866882,
 30855531108.7844,
 30389947193.62225,
 29933843

### prediction

In [22]:
predict([1,1,2],theta2)

array([480706.44828375])

In [23]:
x2_test.shape

(12, 2)

In [24]:
MSE = evaluate(x2_test,y2_test,theta2)

MSE =  9.65758111948867e+16


# Losso Regression