<a href="https://colab.research.google.com/github/naot97/Implement_Machine_Learning/blob/master/Optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Importing**

In [81]:
import numpy as np
import random
import pandas as pd
import warnings
warnings.filterwarnings("ignore")


In [82]:
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection


In [83]:
# import  torch
# from torch import nn
# import torchvision
# from torchvision.transforms import Compose, ToTensor, Resize
# from torch.optim import Optimizer

## **Models**

In [84]:
class MyOptim():
  def __init__(self,lr = 0.001, momentum=0.9, rho = 0.9, mode = 'sgd'):
    self.lr = lr 
    self.momentum = momentum
    self.rho = rho
    self.mode = mode
  def update(self, para, pre_v, pre_Eg, grad_func, para_lambda,n = 1):
    new_v = 0
    Eg = 0

    a = [*range(n)]
    index_update = random.sample(a, n)
    selected_para_lambda = ()
    for p in para_lambda:
      selected_para_lambda = selected_para_lambda + (p[index_update],)
    grad = grad_func(*selected_para_lambda)

    if self.mode == 'sgd': 
      if pre_v is None:
        new_v = - self.lr * grad
      else:
        new_v = self.momentum * pre_v - self.lr * grad
    elif self.mode =='rms':
      if pre_Eg is None:
        Eg = grad**2
      else:
        Eg = self.momentum * pre_Eg + (1 - self.momentum) * grad**2

      new_v = - self.lr * grad / (Eg + 1e-7)
    return  Eg, new_v



In [85]:
class SimpleModel():
  def __init__(self, optimizer):
    self.w = np.random.randn(13,1) 
    self.v_w = None
    self.Eg_w = None
    self.b = np.random.randn(1,1)
    self.v_b = None
    self.Eg_b = None
    self.optimizer = optimizer
  def forward(self, x):
    out = np.dot(x,self.w) + self.b
    return out
  def backprop(self, x, y_pred, y):
    w_grad_func = lambda x, y_pred, y :  x.T.dot((y_pred - y))
    b_grad_func = lambda y_pred, y: np.mean(y_pred - y, axis = 0)
    self.Eg_w, self.v_w = self.optimizer.update(self.w, self.v_w, self.Eg_w, w_grad_func, (x,y_pred,y), 8)
    self.w = self.w + self.v_w
    self.Eg_b, self.v_b = self.optimizer.update(self.b, self.v_b, self.Eg_b, b_grad_func, (y_pred,y), 8)
    self.b = self.b + self.v_b

## **Data**

In [86]:
from sklearn.datasets import load_boston
boston = load_boston()
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df['PRICE'] = boston.target
boston_df.head(5)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [87]:
scalar = StandardScaler()
boston_df[boston.feature_names] = scalar.fit_transform(boston_df[boston.feature_names])
boston_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,-0.419782,0.284830,-1.287909,-0.272599,-0.144217,0.413672,-0.120013,0.140214,-0.982843,-0.666608,-1.459000,0.441052,-1.075562,24.0
1,-0.417339,-0.487722,-0.593381,-0.272599,-0.740262,0.194274,0.367166,0.557160,-0.867883,-0.987329,-0.303094,0.441052,-0.492439,21.6
2,-0.417342,-0.487722,-0.593381,-0.272599,-0.740262,1.282714,-0.265812,0.557160,-0.867883,-0.987329,-0.303094,0.396427,-1.208727,34.7
3,-0.416750,-0.487722,-1.306878,-0.272599,-0.835284,1.016303,-0.809889,1.077737,-0.752922,-1.106115,0.113032,0.416163,-1.361517,33.4
4,-0.412482,-0.487722,-1.306878,-0.272599,-0.835284,1.228577,-0.511180,1.077737,-0.752922,-1.106115,0.113032,0.441052,-1.026501,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,-0.413229,-0.487722,0.115738,-0.272599,0.158124,0.439316,0.018673,-0.625796,-0.982843,-0.803212,1.176466,0.387217,-0.418147,22.4
502,-0.415249,-0.487722,0.115738,-0.272599,0.158124,-0.234548,0.288933,-0.716639,-0.982843,-0.803212,1.176466,0.441052,-0.500850,20.6
503,-0.413447,-0.487722,0.115738,-0.272599,0.158124,0.984960,0.797449,-0.773684,-0.982843,-0.803212,1.176466,0.441052,-0.983048,23.9
504,-0.407764,-0.487722,0.115738,-0.272599,0.158124,0.725672,0.736996,-0.668437,-0.982843,-0.803212,1.176466,0.403225,-0.865302,22.0


In [88]:
X = boston_df[boston.feature_names].values
y = boston_df['PRICE'].values.reshape(-1,1)
print(X.shape,y.shape)

(506, 13) (506, 1)


In [89]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.20, random_state = 5)
X_train.shape, y_train.shape

((404, 13), (404, 1))

## **Training**

### **SGD**

In [90]:
batch_size = 32
epochs = 20
optim = MyOptim()
model = SimpleModel(optim)
for epoch in range(epochs):
  start = 0
  sumloss = 0
  while start < len(X_train):
    end = min(len(X_train), start + batch_size)
    X_batch = X[start:end]
    y_batch = y[start:end]
    y_pred = model.forward(X_batch)
    sumloss += np.sum((y_pred - y_batch)**2)
    model.backprop(X_batch, y_pred, y_batch)
    start = end
  
  y_pred_valid = model.forward(X_test)
  loss_valid = np.sum((y_pred_valid - y_test)**2)
  print(f'Epoch {epoch}: Training Loss = {sumloss // 10}, Valid Loss = {loss_valid // 10}')

Epoch 0: Training Loss = 33900.0, Valid Loss = 11060.0
Epoch 1: Training Loss = 17858.0, Valid Loss = 6448.0
Epoch 2: Training Loss = 14871.0, Valid Loss = 6323.0
Epoch 3: Training Loss = 11302.0, Valid Loss = 5359.0
Epoch 4: Training Loss = 8812.0, Valid Loss = 4994.0
Epoch 5: Training Loss = 7089.0, Valid Loss = 4277.0
Epoch 6: Training Loss = 5818.0, Valid Loss = 3689.0
Epoch 7: Training Loss = 4900.0, Valid Loss = 3106.0
Epoch 8: Training Loss = 4188.0, Valid Loss = 2616.0
Epoch 9: Training Loss = 3654.0, Valid Loss = 2175.0
Epoch 10: Training Loss = 3221.0, Valid Loss = 1806.0
Epoch 11: Training Loss = 2886.0, Valid Loss = 1491.0
Epoch 12: Training Loss = 2618.0, Valid Loss = 1233.0
Epoch 13: Training Loss = 2407.0, Valid Loss = 1020.0
Epoch 14: Training Loss = 2238.0, Valid Loss = 847.0
Epoch 15: Training Loss = 2104.0, Valid Loss = 707.0
Epoch 16: Training Loss = 1998.0, Valid Loss = 595.0
Epoch 17: Training Loss = 1915.0, Valid Loss = 505.0
Epoch 18: Training Loss = 1850.0, Val

### **RMS**

In [97]:
batch_size = 32
epochs = 20
optim = MyOptim(lr = 5, rho  = 0.9, mode = 'rms')
model = SimpleModel(optim)
for epoch in range(epochs):
  start = 0
  sumloss = 0
  while start < len(X_train):
    end = min(len(X_train), start + batch_size)
    X_batch = X[start:end]
    y_batch = y[start:end]
    y_pred = model.forward(X_batch)
    sumloss += np.sum((y_pred - y_batch)**2)
    model.backprop(X_batch, y_pred, y_batch)
    start = end
  
  y_pred_valid = model.forward(X_test)
  loss_valid = np.sum((y_pred_valid - y_test)**2)
  print(f'Epoch {epoch}: Training Loss = {sumloss // 10}, Valid Loss = {loss_valid // 10}')

Epoch 0: Training Loss = 26674.0, Valid Loss = 5173.0
Epoch 1: Training Loss = 21518.0, Valid Loss = 4202.0
Epoch 2: Training Loss = 16634.0, Valid Loss = 3300.0
Epoch 3: Training Loss = 12136.0, Valid Loss = 2500.0
Epoch 4: Training Loss = 8204.0, Valid Loss = 1829.0
Epoch 5: Training Loss = 5047.0, Valid Loss = 1313.0
Epoch 6: Training Loss = 2903.0, Valid Loss = 939.0
Epoch 7: Training Loss = 1917.0, Valid Loss = 653.0
Epoch 8: Training Loss = 1795.0, Valid Loss = 455.0
Epoch 9: Training Loss = 1763.0, Valid Loss = 331.0
Epoch 10: Training Loss = 1561.0, Valid Loss = 279.0
Epoch 11: Training Loss = 1393.0, Valid Loss = 266.0
Epoch 12: Training Loss = 1283.0, Valid Loss = 284.0
Epoch 13: Training Loss = 1230.0, Valid Loss = 322.0
Epoch 14: Training Loss = 1224.0, Valid Loss = 369.0
Epoch 15: Training Loss = 1251.0, Valid Loss = 419.0
Epoch 16: Training Loss = 1301.0, Valid Loss = 469.0
Epoch 17: Training Loss = 1366.0, Valid Loss = 517.0
Epoch 18: Training Loss = 1439.0, Valid Loss =