# Linear Regression 

![title](asset/linearformula.png)

--------
![title](asset/costfunction.png)

# Gradient descent - first-order iterative optimization algorithm for finding the minimum of a function

![title](asset/gd.png)


![title](asset/gd.gif)

-------

![title](asset/derivatives.png)

![title](asset/update_param.png)

### Importing Packages

In [None]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from mpl_toolkits.mplot3d import axes3d

### Reading Data 

In [None]:
df = pd.read_csv('data/ex1.csv', header= None, names=['population', 'profit'], delimiter=',')
print(df.head())
df['bias'] = 1 # Lets add bias term 1 since we are building the model from scratch
df.head()

##### Features Exploration

In [None]:
### Visualize the data
plt.figure(figsize=(7,5))
plt.scatter(df.population, df.profit,  c='r', marker='x')
plt.xlim(4,24)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s');

# Objective to Predict the Profit based on Population

In [None]:
### Converting into a array form
X = df.iloc[:, [2,0]].values
y = df.iloc[:, 1].values.reshape(-1,1)

print(X.shape)
print(y.shape)

In [None]:
## Initialize the Parameters
w = np.zeros((X.shape[1], y.shape[1]))
w.shape

In [None]:
### Compute Cost function
def compute_cost(X, y, theta):
    m = y.size
    J = 0
    
    h = X.dot(theta)
    
    J = 1/(2*m)*np.sum(np.square(y-h))
    
    return(J)

In [None]:
## Lets understand the error for our initial set of random weights
print("Initial Loss: ", compute_cost(X, y,w))

In [None]:
# Gradient descent function
def gradientDescent(X, y, theta, alpha=0.01, num_iters=1500):
    m = y.size
    J_history = np.zeros(num_iters)  #To Store the cost in each iterations
    
    for iter in np.arange(num_iters):
        h = X.dot(theta)
        theta = theta - alpha*(1/m)*(X.T.dot(h-y))
        J_history[iter] = compute_cost(X, y, theta)
    return(theta, J_history)

In [None]:
# theta for minimized cost J
theta , Cost_J = gradientDescent(X, y, w)
print('theta: ',theta.ravel())

plt.plot(Cost_J)
plt.ylabel('Cost J')
plt.xlabel('Iterations');

In [None]:
# Compare with Scikit-learn Linear regression 
regr = LinearRegression()
regr.fit(X[:,1].reshape(-1,1), y.ravel())
print("Intercept from Sklearn : ", regr.intercept_)
print("Weights from Sklearn : ", regr.coef_)

In [None]:
### Visual Proof
xx = np.arange(5,23)
yy = theta[0]+theta[1]*xx

# Plot gradient descent
plt.scatter(X[:,1], y, s=30, c='r', marker='x', linewidths=1)
plt.plot(xx,yy, label='Linear regression (Gradient descent)')

# Compare with Scikit-learn Linear regression 
plt.plot(xx, regr.intercept_+regr.coef_*xx, label='Linear regression (Scikit-learn GLM)')

plt.xlim(4,24)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.legend(loc=4);

In [None]:
# Predict profit for a city with population of 35000 and 70000
print(theta.T.dot([1, 3.5])*10000)
print(theta.T.dot([1, 7])*10000)

In [None]:
#### Evaluate the Model
from sklearn.metrics import mean_squared_error

In [None]:
y_pred = X.dot(theta)
print("Root Mean Square Error:", np.round(np.sqrt(mean_squared_error(y, y_pred)),2))

In [None]:
y_pred = regr.predict(X[:,1].reshape(-1,1))
print("Root Mean Square Error:", np.round(np.sqrt(mean_squared_error(y, y_pred)),2))

In [None]:
# Create grid coordinates for plotting
B0 = np.linspace(-10, 10, 50)
B1 = np.linspace(-1, 4, 50)
xx, yy = np.meshgrid(B0, B1, indexing='xy')
Z = np.zeros((B0.size,B1.size))

# Calculate Z-values (Cost) based on grid of coefficients
for (i,j),v in np.ndenumerate(Z):
    Z[i,j] = compute_cost(X,y, theta=[[xx[i,j]], [yy[i,j]]])

fig = plt.figure(figsize=(15,6))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122, projection='3d')

# Left plot
CS = ax1.contour(xx, yy, Z, np.logspace(-2, 3, 20), cmap=plt.cm.jet)
ax1.scatter(theta[0],theta[1], c='r')

# Right plot
ax2.plot_surface(xx, yy, Z, rstride=1, cstride=1, alpha=0.6, cmap=plt.cm.jet)
ax2.set_zlabel('Cost')
ax2.set_zlim(Z.min(),Z.max())
ax2.view_init(elev=15, azim=230)

# settings common to both plots
for ax in fig.axes:
    ax.set_xlabel(r'$\theta_0$', fontsize=17)
    ax.set_ylabel(r'$\theta_1$', fontsize=17)