# Linear Regression

In [34]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston

### Load Data

In [35]:
data=load_boston()

In [36]:
train_data=data.data

### Normalize Data

In [38]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
train_data=scaler.fit_transform(train_data)

### Train-Test Split

In [39]:
X_train=train_data[:380]
X_train.shape

(380, 13)

In [40]:
X_test=train_data[380:]
X_test.shape

(126, 13)

In [41]:
y_train=data.target[:380]
y_train.shape

(380,)

In [42]:
y_test=data.target[380:]
y_test.shape

(126,)

## Linear Regression using sklearn

In [43]:
from sklearn.linear_model import LinearRegression

### Train Model

In [258]:
lm=LinearRegression(fit_intercept=False)

In [259]:
lm.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,
         normalize=False)

### Coefficeints/Weights

In [262]:
lm.coef_

array([-27.98288286,   4.63524379,   3.57670266,   0.74680803,
        -2.39562713,  35.16674705,   0.78113389,  -8.37565552,
        13.27519506,  -6.34668043,  -5.72842707,  13.16463217,
       -13.44290616])

### Testing Model

In [58]:
y_pred_train=lm.predict(X_train)
y_pred_test=lm.predict(X_test)

In [59]:
from sklearn import metrics

### RMSE

In [60]:
print("Train error",np.sqrt(metrics.mean_squared_error(y_train, y_pred_train)))
print("Test error",np.sqrt(metrics.mean_squared_error(y_test, y_pred_test)))

Train error 4.60417357218944
Test error 7.819688142087272


## Linear Regression from Scratch

## Steps:
- Define hypothesis
- Choose loss(here squared loss)
- Define Cost Function

In [89]:
from random import random

### Hypothesis

In [266]:
def hypothesis(x,w):
    return np.matmul(x,w.T)

### Loss

In [267]:
def loss(x,y,w):
    return (hypothesis(x,w)-y)

### Cost Function

In [268]:
def cost_function(x,y,w):
    cost=np.sum(loss(x,y,w)**2)
    cost=cost/(2.0*len(y_train))
    return cost

### Derivative of Cost Function

In [269]:
def derivative(x,y,w,i):
    cost=np.sum(loss(x,y,w)*x[:,i])
    cost=(float)(cost/len(y_train))
    return cost

### Gradient Descent

In [343]:
def gradient_descent(x,y,w,alpha=3):
    for i in range(10001):
        cost=cost_function(x,y,w)
        if i%2000==0:
            print('Epoch=',i,'error=',cost)
        for i in range(len(w)):
            w[i]=w[i]-alpha*derivative(x,y,w,i)
    return w

### Predicting Values

In [344]:
def predict(x,w):
    y=hypothesis(x,w)
    return y

### Initializing Weights

In [345]:
weights=np.zeros(X_train.shape[1])

### Train Model

In [347]:
weights=gradient_descent(X_train,y_train,weights)

Epoch= 0 error= 11.894059948742381
Epoch= 2000 error= 11.894059944357993
Epoch= 4000 error= 11.894059944215048
Epoch= 6000 error= 11.894059944210387
Epoch= 8000 error= 11.894059944210236
Epoch= 10000 error= 11.89405994421023


### Coefficients

#### Coefficients(Linear Regression from Scratch)

In [348]:
print(weights)

[-27.98288178   4.63524379   3.57670265   0.74680804  -2.39562712
  35.16674702   0.7811339   -8.3756555   13.27519495  -6.34668045
  -5.72842706  13.16463219 -13.44290621]


In [1]:
#### #### Coefficients(Linear Regression using sklearn)

In [349]:
print(lm.coef_)

[-27.98288286   4.63524379   3.57670266   0.74680803  -2.39562713
  35.16674705   0.78113389  -8.37565552  13.27519506  -6.34668043
  -5.72842707  13.16463217 -13.44290616]


### Testing Model

In [350]:
y_pred_train=predict(X_train,weights)

In [351]:
y_pred_test=predict(X_test,weights)

In [352]:
print("Train error",np.sqrt(metrics.mean_squared_error(y_train, y_pred_train)))
print("Test error",np.sqrt(metrics.mean_squared_error(y_test, y_pred_test)))

Train error 4.877306622350133
Test error 7.976082494729241


# Inference

- Weights calculated from both the models are almost same
- RMSE of both the models are almost similar