<a href="https://colab.research.google.com/github/naot97/Implement_Machine_Learning/blob/master/Optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Importing**

In [197]:
import numpy as np
import random
import pandas as pd
import warnings
warnings.filterwarnings("ignore")


In [198]:
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection


In [199]:
# import  torch
# from torch import nn
# import torchvision
# from torchvision.transforms import Compose, ToTensor, Resize
# from torch.optim import Optimizer

## **Models**

In [200]:
class MySGD():
  def __init__(self,lr = 0.001, momentum=0.9):
    self.lr = lr 
    self.momentum = momentum

  def update(self, para, v, grad_func, para_lambda,n = 1):
    
    a = [*range(n)]
    index_update = random.sample(a, n)
    selected_para_lambda = ()
    for p in para_lambda:
      selected_para_lambda = selected_para_lambda + (p[index_update],)

    grad = grad_func(*selected_para_lambda)
    if v is None:
      new_v = - self.lr * grad
    else:
      new_v = self.momentum * v - self.lr * grad

    return para + new_v, new_v



In [201]:
class SimpleModel():
  def __init__(self):
    self.w = np.random.randn(13,1) 
    self.v_w = None
    self.b=np.random.randn(1,1)
    self.v_b = None
    self.optimizer = MySGD()
  def forward(self, x):
    out = np.dot(x,self.w) + self.b
    return out
  def backprop(self, x, y_pred, y):
    w_grad_func = lambda x, y_pred, y :  x.T.dot((y_pred - y))
    b_grad_func = lambda y_pred, y: np.mean(y_pred - y, axis = 0)
    self.w, self.v_w = self.optimizer.update(self.w, self.v_w, w_grad_func, (x,y_pred,y), 8)
    self.b, self.v_b = self.optimizer.update(self.b, self.v_b,  b_grad_func, (y_pred,y), 8)

## **Data**

In [202]:
from sklearn.datasets import load_boston
boston = load_boston()
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df['PRICE'] = boston.target
boston_df.head(5)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [203]:
scalar = StandardScaler()
boston_df[boston.feature_names] = scalar.fit_transform(boston_df[boston.feature_names])
boston_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,-0.419782,0.284830,-1.287909,-0.272599,-0.144217,0.413672,-0.120013,0.140214,-0.982843,-0.666608,-1.459000,0.441052,-1.075562,24.0
1,-0.417339,-0.487722,-0.593381,-0.272599,-0.740262,0.194274,0.367166,0.557160,-0.867883,-0.987329,-0.303094,0.441052,-0.492439,21.6
2,-0.417342,-0.487722,-0.593381,-0.272599,-0.740262,1.282714,-0.265812,0.557160,-0.867883,-0.987329,-0.303094,0.396427,-1.208727,34.7
3,-0.416750,-0.487722,-1.306878,-0.272599,-0.835284,1.016303,-0.809889,1.077737,-0.752922,-1.106115,0.113032,0.416163,-1.361517,33.4
4,-0.412482,-0.487722,-1.306878,-0.272599,-0.835284,1.228577,-0.511180,1.077737,-0.752922,-1.106115,0.113032,0.441052,-1.026501,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,-0.413229,-0.487722,0.115738,-0.272599,0.158124,0.439316,0.018673,-0.625796,-0.982843,-0.803212,1.176466,0.387217,-0.418147,22.4
502,-0.415249,-0.487722,0.115738,-0.272599,0.158124,-0.234548,0.288933,-0.716639,-0.982843,-0.803212,1.176466,0.441052,-0.500850,20.6
503,-0.413447,-0.487722,0.115738,-0.272599,0.158124,0.984960,0.797449,-0.773684,-0.982843,-0.803212,1.176466,0.441052,-0.983048,23.9
504,-0.407764,-0.487722,0.115738,-0.272599,0.158124,0.725672,0.736996,-0.668437,-0.982843,-0.803212,1.176466,0.403225,-0.865302,22.0


In [204]:
X = boston_df[boston.feature_names].values
y = boston_df['PRICE'].values.reshape(-1,1)
print(X.shape,y.shape)

(506, 13) (506, 1)


In [205]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.20, random_state = 5)
X_train.shape, y_train.shape

((404, 13), (404, 1))

## **Training**

In [206]:
batch_size = 32
epochs = 20
model = SimpleModel()
for epoch in range(epochs):
  start = 0
  sumloss = 0
  while start < len(X_train):
    end = min(len(X_train), start + batch_size)
    X_batch = X[start:end]
    y_batch = y[start:end]
    y_pred = model.forward(X_batch)
    sumloss += np.sum((y_pred - y_batch)**2)
    model.backprop(X_batch, y_pred, y_batch)
    start = end
  
  y_pred_valid = model.forward(X_test)
  loss_valid = np.sum((y_pred_valid - y_test)**2)
  print(f'Epoch {epoch}: Training Loss = {sumloss // 10}, Valid Loss = {loss_valid // 10}')

Epoch 0: Training Loss = 36535.0, Valid Loss = 11785.0
Epoch 1: Training Loss = 16235.0, Valid Loss = 5884.0
Epoch 2: Training Loss = 14648.0, Valid Loss = 6186.0
Epoch 3: Training Loss = 10645.0, Valid Loss = 5057.0
Epoch 4: Training Loss = 8511.0, Valid Loss = 4697.0
Epoch 5: Training Loss = 6815.0, Valid Loss = 4013.0
Epoch 6: Training Loss = 5656.0, Valid Loss = 3457.0
Epoch 7: Training Loss = 4744.0, Valid Loss = 2914.0
Epoch 8: Training Loss = 4070.0, Valid Loss = 2447.0
Epoch 9: Training Loss = 3551.0, Valid Loss = 2035.0
Epoch 10: Training Loss = 3142.0, Valid Loss = 1688.0
Epoch 11: Training Loss = 2821.0, Valid Loss = 1396.0
Epoch 12: Training Loss = 2567.0, Valid Loss = 1155.0
Epoch 13: Training Loss = 2366.0, Valid Loss = 957.0
Epoch 14: Training Loss = 2206.0, Valid Loss = 796.0
Epoch 15: Training Loss = 2080.0, Valid Loss = 666.0
Epoch 16: Training Loss = 1980.0, Valid Loss = 562.0
Epoch 17: Training Loss = 1902.0, Valid Loss = 479.0
Epoch 18: Training Loss = 1841.0, Vali