# Regression Lineaire from scratch

- Normalize features ($\mu$, $\sigma$)
- Cost functions ()
- Gradient descent ()

m : number of observations in the training data set.  
n : features
iterations = 1000  
delta = 0.001  
alpha = 0.03

J(w) = $\frac{1}{2m}$ $\sum_{i=1}^{nb}$ ( h<sub>w</sub> (x<sup>(i)</sup>) - y<sup>(i)</sup> )<sup>2</sup>

Gradient descent:   
Repeat until convergence  
w<sub>k</sub> = w<sub>k</sub> - $\frac{lamda}{m}$ $\sum_{i=1}^{nb}$ ( h<sub>w</sub> (x<sup>(i)</sup>) - y<sup>(i)</sup> ) x<sub>k</sub><sup>(i)</sup>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
xls = pd.ExcelFile('./Folds5x2_pp.xlsx')
df1 = pd.read_excel(xls, 'Sheet1')
df2 = pd.read_excel(xls, 'Sheet2')
df3 = pd.read_excel(xls, 'Sheet3')
df4 = pd.read_excel(xls, 'Sheet4')
df5 = pd.read_excel(xls, 'Sheet5')

m = len(df1)
n_features = 4
n_iter = 1000
delta = 0.0000001
learning_rate = 0.01

w = np.ones((n_features + 1, 1))

X = np.array((np.ones((m)),  df1['AT'], df1['V'], df1['AP'], df1['RH'])).T
Y = df1['PE'].values

print("Total samples in our dataset is: {}".format(X.shape[0]))

Total samples in our dataset is: 9568


In [3]:
print(w.shape, w)
print(X.shape, X)

(5, 1) [[1.]
 [1.]
 [1.]
 [1.]
 [1.]]
(9568, 5) [[1.00000e+00 1.49600e+01 4.17600e+01 1.02407e+03 7.31700e+01]
 [1.00000e+00 2.51800e+01 6.29600e+01 1.02004e+03 5.90800e+01]
 [1.00000e+00 5.11000e+00 3.94000e+01 1.01216e+03 9.21400e+01]
 ...
 [1.00000e+00 3.13200e+01 7.43300e+01 1.01292e+03 3.64800e+01]
 [1.00000e+00 2.44800e+01 6.94500e+01 1.01386e+03 6.23900e+01]
 [1.00000e+00 2.16000e+01 6.25200e+01 1.01723e+03 6.78700e+01]]


In [4]:
def normalisation(X):
    return (X - X.mean())/X.std()
X = normalisation(X)
Y = normalisation(Y)

In [5]:
def h(X, omega):
    return np.dot(X,omega)

In [6]:
def compute_cost(X, y, params):
    n_samples = len(y)
    return (1/(2*n_samples))*np.sum((h(X,params)-y)**2)

print(w.shape)
print(X.shape)
compute_cost(X, Y, w)

(5, 1)
(9568, 5)


4793.409334689109

In [8]:
def gradient_descent(X, Y, omega, learning_rate,n_iter):
    n_samples = len(Y)
    cost_history = np.zeros(n_iter)
    #omega_history = np.zeros((n_iter))
    #print(omega_history[0].shape)
    
    for i in range(n_iter):
        prediction = h(X,omega)
        
        omega = omega - (1/n_samples) * learning_rate * (X.T.dot((prediction - Y)))
        cost_history[i] = compute_cost(X,Y,omega)
    
    return omega, cost_history

KeyboardInterrupt: 

In [None]:
params, cost_history  = gradient_descent(X,Y,w,learning_rate,n_iter=1000)
plt.title('Cost Function J')
plt.xlabel('No. of iterations')
plt.ylabel('Cost')
plt.plot(cost_history)
plt.show()

In [None]:
df = pd.read_csv('./house.csv')

m = len(df)
n_features = 3
n_iter = 1000
delta = 0.0000001
learning_rate = 0.03

w = np.ones((n_features + 1, 1))

X = np.array((np.ones((m)),  df['size'], df['nb_rooms'], df['garden'])).T
Y = df['price'].values

print("Total samples in our dataset is: {}".format(X.shape[0]))

In [None]:
J_history, params = gradient_descent(X,Y,w,learning_rate,n_iter, delta)
plt.title('Cost Function J')
plt.xlabel('No. of iterations')
plt.ylabel('Cost')
plt.plot(J_history)
plt.show()

In [None]:
gradient_descent(X,Y,w,learning_rate,n_iter, delta)