# Linear Regression Tutorial

Goal fit a $$Y = X* w + b $$ 
Where $Y \in R^N$ is the target, $N$ is the no of samples, <br/>
$X \in R^{N x d}$ is the features for the $N$ samples and $d$ is the feature size <br/>
$w \in R^{d x 1}$ and $b \in R $ are the parameters we are learning <br/>

In [4]:
import numpy as np
from scipy.linalg import norm
import pandas as pd

In [5]:
df = pd.read_csv('forestfires.csv')

Dataset Used is Forestfires from UCI datasets <br/>
Aim:  To predict the burned area of forest fires, in the northeast region of Portugal, by using meteorological and other data 

In [6]:
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


Feature engineering: Onehot encoding for categorical features

In [7]:
df_month = pd.get_dummies(df['month'])
df_month.head()

Unnamed: 0,apr,aug,dec,feb,jan,jul,jun,mar,may,nov,oct,sep
0,0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,1,0,0,0,0


In [8]:
df.drop('month', axis=1, inplace=True)
df.head()

Unnamed: 0,X,Y,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [9]:
df_day = pd.get_dummies(df['day'])
df_day.head()

Unnamed: 0,fri,mon,sat,sun,thu,tue,wed
0,1,0,0,0,0,0,0
1,0,0,0,0,0,1,0
2,0,0,1,0,0,0,0
3,1,0,0,0,0,0,0
4,0,0,0,1,0,0,0


In [10]:
df.drop('day', axis=1, inplace=True)
df.head()

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [11]:
df = df.join(df_month).join(df_day)

In [12]:
df.head()

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,nov,oct,sep,fri,mon,sat,sun,thu,tue,wed
0,7,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,1,0,0,0,0,0,0
1,7,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,1,0,0,0,0,0,0,1,0
2,7,4,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,1,0,0,0,1,0,0,0,0
3,8,6,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,1,0,0,0,0,0,0
4,8,6,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,0,0,1,0,0,0


In [13]:
target =df.area.values  

In [14]:
df.drop('area', axis=1, inplace=True)

In [15]:
X = df.values

In [16]:
X.shape

(517, 29)

$Y = X*w + b$ <br/>
$Y = [X, 1] * w^{new}$ <br/>
Where $w^{new} \in R^{d+1}$

In [29]:
# Added an extra feature of all ones for learning intercept 
X = np.append(X, np.ones((517, 1)) , axis=1)

# Train data and test data Spliting

In [30]:
X_train = X[:-100]
Y_train = target[:-100]
X_test  =X[-100:]
Y_test = target[-100 :]
print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

(417, 31) (417,) (100, 31) (100,)


In [31]:
Y_train = Y_train[: , np.newaxis]
Y_test = Y_test[:, np.newaxis]

# Loss function

$$obj = \frac{1}{2} \frac{||Y- X w||_2^2}{N}  + \frac{1}{2}\lambda ||w||_2^2$$ 

In [34]:
def cost_function(X, Y, w, lbda):
    N = X.shape[0]
    obj = (0.5/N) * (norm(Y- X.dot(w), 2) ** 2 ) + 0.5 * lbda * (norm(w, 2) ** 2)
    return obj

In [35]:
N_train = X_train.shape[0]

In [46]:
N_test = X_test.shape[0]

# Update Rule

$$ w(t+1) = w(t) -\gamma \Delta_{w} Obj$$ </br>
$$ w(t+1) = w(t) - \gamma \big[ - X^T \frac{\sum_i^N \big( Y - X w(t) \big) }{N} + \lambda w(t) \big]$$

# Without Regularization

In [107]:
n_epoch = 2000  # No of epochs
lr = 0.000001   # Learning rate
lbda = 0  # regularization 
np.random.seed(40)
w = np.random.rand(X_train.shape[1], 1 )
print('start: ', "Train Mean squared error= ", (norm(Y_train - X_train.dot(w), 2)**2)/N_train )
print('       ', "Test Mean squared error=", (norm(Y_test - X_test.dot(w), 2)**2)/N_test )
        
for i in range(n_epoch):
   
    w = w - lr * ( -1 * X_train.T.dot(Y_train-X_train.dot(w))/N_train  + lbda * w )
    if i % 200== 0 :
        print('Epoch ' + str(i+1) )
        print('       ', "Train Mean squared error= ", (norm(Y_train - X_train.dot(w), 2)**2)/N_train )
        print('       ', "Test Mean squared error=", (norm(Y_test - X_test.dot(w), 2)**2)/N_test )
  
        
print('end:   ', "Train Mean squared error= ", (norm(Y_train - X_train.dot(w), 2)**2)/N_train )
print('       ', "Test Mean squared error=", (norm(Y_test - X_test.dot(w), 2)**2)/N_test )
             
    

start:  Train Mean squared error=  163201.5081081133
        Test Mean squared error= 188927.48786254038
Epoch 1
        Train Mean squared error=  67217.46105968219
        Test Mean squared error= 78360.44623414379
Epoch 201
        Train Mean squared error=  5630.772508740833
        Test Mean squared error= 2017.6545944006327
Epoch 401
        Train Mean squared error=  5129.191945809389
        Test Mean squared error= 1621.6686722927275
Epoch 601
        Train Mean squared error=  4881.419940672655
        Test Mean squared error= 1458.4228572080256
Epoch 801
        Train Mean squared error=  4756.166239254546
        Test Mean squared error= 1402.717302676287
Epoch 1001
        Train Mean squared error=  4691.174980413377
        Test Mean squared error= 1395.936354283599
Epoch 1201
        Train Mean squared error=  4656.441094410883
        Test Mean squared error= 1410.153708813211
Epoch 1401
        Train Mean squared error=  4637.23707743946
        Test Mean squared error

# With Regularization 

In [106]:
n_epoch = 2000  # No of epochs
lr =  0.000001     # Learning rate
lbda =  400 # regularization 
np.random.seed(40)
w = np.random.rand(X_train.shape[1], 1 )
print('start: ', "Train Mean squared error= ", (norm(Y_train - X_train.dot(w), 2)**2)/N_train )
print('       ', "Test Mean squared error=", (norm(Y_test - X_test.dot(w), 2)**2)/N_test )
        
for i in range(n_epoch):
   
    w = w - lr * ( -1 * X_train.T.dot(Y_train-X_train.dot(w))/N_train  + lbda * w )
    if i % 200== 0 :
        print('Epoch ' + str(i+1) )
        print('       ', "Train Mean squared error= ", (norm(Y_train - X_train.dot(w), 2)**2)/N_train )
        print('       ', "Test Mean squared error=", (norm(Y_test - X_test.dot(w), 2)**2)/N_test )
  
        
print('end:   ', "Train Mean squared error= ", (norm(Y_train - X_train.dot(w), 2)**2)/N_train )
print('       ', "Test Mean squared error=", (norm(Y_test - X_test.dot(w), 2)**2)/N_test )
             
    

start:  Train Mean squared error=  163201.5081081133
        Test Mean squared error= 188927.48786254038
Epoch 1
        Train Mean squared error=  67135.23759962992
        Test Mean squared error= 78261.95693204843
Epoch 201
        Train Mean squared error=  5483.30418673061
        Test Mean squared error= 1895.2227564437637
Epoch 401
        Train Mean squared error=  4996.289114952517
        Test Mean squared error= 1523.01674861943
Epoch 601
        Train Mean squared error=  4790.817461745081
        Test Mean squared error= 1403.0952887968783
Epoch 801
        Train Mean squared error=  4701.524390855429
        Test Mean squared error= 1379.7135950230688
Epoch 1001
        Train Mean squared error=  4661.253351568113
        Test Mean squared error= 1390.979564506222
Epoch 1201
        Train Mean squared error=  4642.232312995396
        Test Mean squared error= 1412.2588846571712
Epoch 1401
        Train Mean squared error=  4632.729026392391
        Test Mean squared error