# Theory

## Create dataset

In [3]:
import pandas as pd 
import wandb  

areas = [6.7, 4.6, 3.5, 5.5]
prices = [9.1, 5.9, 4.6, 6.7]

dataset = pd.DataFrame({
    'areas': areas, 
    'prices': prices
})

In [4]:
dataset

Unnamed: 0,areas,prices
0,6.7,9.1
1,4.6,5.9
2,3.5,4.6
3,5.5,6.7


## Set up model Linear Regression and follow training path

### Set up model

In [5]:
# forward 
def predict(x, w, b): 
    return x * w + b 

# compute gradient 
def gradient(y_hat, y, x): 
    dw = 2 * x * (y_hat - y)
    db = 2 * (y_hat - y)

    return (dw, db)

# update weights 
def update_weight(w, b, lr, dw, db): 
    w_new = w - lr * dw
    b_new = b - lr * db 

    return (w_new, b_new)

### Training and follow the process

In [6]:
# init weights 
b = 0.04 
w = -0.34 
lr = 0.01 
epochs = 10 

# init project wandb 
wandb.init( 
    # Set the project where this run will be logged 
    project = 'demo-linear-regression',
    config = {
        'learning_rate': lr, 
        'epochs': epochs,
    },
)

wandb.run.log({'Dataset' : wandb.Table(dataframe=dataset)})

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlenam1072004[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
X_train = dataset['areas']
Y_train = dataset['prices']

N = len(X_train)

# parameter 
losses = [] # for debug

for epoch in range(epochs): 
    # for an epoch 
    for i in range(N): 
        # get a sample 
        x = X_train[i]
        y = Y_train[i]

        # predict y_hat
        y_hat = predict(x, w, b)

        # compute loss 
        loss = (y_hat - y) * (y_hat - y) / 2.0

        # tracking loss with WandB
        wandb.log({'loss':loss})

        # compute gradient 
        (dw, db) = gradient(y_hat, y, x)

        # update weights 
        (w, b) = update_weight(w, b, lr, dw, db)

# Mark a run as finished, and finish uploading all data. 
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.9430379746835443, max=1.0…

0,1
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,0.13448


# Exercises

In [8]:
advertising = pd.read_csv('advertising.csv')

In [9]:
advertising

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [10]:
import numpy as np 
import matplotlib.pyplot as plt 
import random

In [17]:
lr = 0.01 
epochs_max = 1000

# init project wandb 
wandb.init( 
    # Set the project where this run will be logged 
    project = 'demo-linear-regression-advertising',
    config = {
        'learning_rate': lr, 
        'epochs': epochs,
    },
)
wandb.run.log({'Dataset' : wandb.Table(dataframe=advertising)})

VBox(children=(Label(value='0.012 MB of 0.029 MB uploaded\r'), FloatProgress(value=0.40438725163355116, max=1.…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

## Normalization

In [18]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [19]:
X_data = advertising.drop(['Sales'], axis=1)
Y_data = advertising['Sales']

X_data = scaler.fit_transform(X_data)

In [20]:
def predict(x,w,b):
    return np.dot(x, w) +b

def gradient(y_hat, y, x):
    loss = y_hat -y
    dw = x.T.dot(loss)/len(y)
    db = np.sum(loss)/len(y)
    cost = np.sum(loss**2)/(2*len(y))

    return (dw, db, cost)

def update_weight(w,b,lr,dw,db):
    w_new = w - lr*dw
    b_new = b - lr*db
    return (w_new, b_new)

In [22]:
w = np.zeros(X_data.shape[1])
b = 1
for epoch in range(epochs_max):
  y_hat = predict(X_data, w, b)
  (dw, db, cost) = gradient(y_hat, Y_data, X_data)
  wandb.log({"cost": cost})
  if(epoch % 50 == 0):
    print(f'Epoch {epoch} : cost = {cost}')
  (w, b) = update_weight(w, b, lr, dw, db)

wandb.finish()

Epoch 0 : cost = 113.725475
Epoch 50 : cost = 42.27098698484837
Epoch 100 : cost = 16.301089047300188
Epoch 150 : cost = 6.828957870761153
Epoch 200 : cost = 3.3642986207993797
Epoch 250 : cost = 2.093828606767359
Epoch 300 : cost = 1.6267885527910022
Epoch 350 : cost = 1.4546176763799286
Epoch 400 : cost = 1.390929873012838
Epoch 450 : cost = 1.3672659568117587
Epoch 500 : cost = 1.3584210332209725
Epoch 550 : cost = 1.3550886004010863
Epoch 600 : cost = 1.3538196481248657
Epoch 650 : cost = 1.3533296560388843
Epoch 700 : cost = 1.3531370424223292
Epoch 750 : cost = 1.3530596370712467
Epoch 800 : cost = 1.3530277067249128
Epoch 850 : cost = 1.3530141424027013
Epoch 900 : cost = 1.3530081975956834
Epoch 950 : cost = 1.35300550978008


VBox(children=(Label(value='0.012 MB of 0.012 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
cost,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
cost,1.353
