<a href="https://colab.research.google.com/github/luisdiaz1997/DataScienceCourse/blob/master/Week6_Solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd

In [0]:
data_url = "https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"

In [0]:
df = pd.read_csv(data_url, sep=",")
df.head()

In [0]:
plt.figure(1)
x = df.households.values.reshape(-1, 1)
y = df.total_rooms.values.reshape(-1,1)
plt.figure(figsize=(8, 6))
plt.scatter(x[::10], y[::10])
plt.xlabel('Number of Households')
plt.ylabel('Total Rooms');

>$h(x) = x w + b$

In [0]:
def linear(x, w, b):
  z = np.dot(x,w) + b
  return z

In [0]:
def start_params(features = 2):
  params = {'W': np.random.randn(features, 1), 'b': np.random.randn(1, 1)}
  return params

>$J = \frac{1}{2M}\sum_{} (h(x) - y)^2 $

In [0]:
def loss(model, inputs, targets):
  error = model(inputs) - targets
  return np.sum(error **2)/inputs.shape[0]

## We decrease this cost function by gradient descent
### To make things easier

>$ \delta = h(x) - y $

>$J = \frac{1}{2M}\sum_{} (\delta)^2 $

## With chain rule

>$\frac{\partial J}{\partial W} = \frac{1}{M} \sum_{} \delta \frac{\partial \delta}{\partial W}$

>$ \frac{\partial \delta}{\partial W} = x $

>$\frac{\partial J}{\partial W} = \frac{1}{M}\sum_{} \delta x $

## Vectorizing we get
>$dW=\frac{\partial J}{\partial W} = \frac{1}{M}X^T \delta $


## Similarly for B, since dJ/db is 1

> $db = \frac{\partial J}{\partial b} = \frac{1}{M}\sum_{} \delta $

## We will update the models this way, where alpha is our learning rate

> $W=W - \alpha dW $

> $b=b - \alpha db $


In [0]:
def grad(model, inputs, targets):

  error = model(inputs) - targets
  grad_dict = {'dW': np.dot(inputs.T, error)/inputs.shape[0],
               'db': np.sum(error, axis = 0, keepdims=  True)/inputs.shape[0]}
  return grad_dict

In [0]:
class LinearRegression():
  def __init__(self, features = 2):
    self.params = start_params(features = features)
    self.costs = []
  
  def __call__(self, inputs):
    return linear(inputs, self.params['W'], self.params['b'])

  def fit(self, inputs, targets, learning_rate = 1e-3, epochs= 10):
    for i in tqdm(range(epochs)):
      self.costs.append(loss(self, inputs, targets))
      grads = grad(self, inputs, targets)

      for key, values in self.params.items():
        self.params[key] -= learning_rate * grads['d'+key]



In [0]:
model = LinearRegression(features = 1)

In [0]:
model.params

In [0]:
plt.figure(2, figsize = (10,8))

y_predict = model(x)
plt.scatter(x, y )
plt.scatter(x[:, 0], y_predict)

In [0]:
model.fit(x, y, learning_rate=1e-8, epochs = 1000)

In [0]:
plt.plot(model.costs)

In [0]:
plt.figure(3, figsize = (10,8))

y_predict = model(x)
plt.scatter(x, y )
plt.scatter(x[:, 0], y_predict)

In [0]:
from sklearn.datasets import make_blobs
n_samples = 1500
random_state = 170
X, y = make_blobs(n_samples=n_samples, random_state=random_state, centers = np.array([[-3, -3], [1, 1]]))

In [0]:
plt.figure(4, figsize = (10,8))
plt.scatter(X[:, 0], X[:, 1], c = y, cmap = 'bwr')
plt.colorbar();

In [0]:
model2 = LinearRegression(features = 2)

In [0]:
model2.params

In [0]:
plt.figure(5, figsize = (10,8))

y_predict = model2(X)
plt.scatter(X[:, 0], X[:, 1], c = y_predict.reshape(-1), cmap = 'bwr', vmin = 0, vmax = 1)
plt.colorbar();

In [0]:
model2.fit(X, y[:, None], learning_rate = 1e-3, epochs = 10000)

In [0]:
plt.plot(model2.costs)

In [0]:
plt.figure(6, figsize = (10,8))

y_predict = model2(X)
plt.scatter(X[:, 0], X[:, 1], c = y_predict.reshape(-1), cmap = 'bwr', vmin = 0, vmax = 1)
plt.colorbar();

In [0]:
model2.params