# Optimizers

In [1]:
%cd ..

/home/karimgamaleldin/projects/KTorch


In [2]:
# Importing the required libraries
from nn import Sequential, Linear, Sigmoid, MSELoss, ReLU, BCELoss
from optim import SGD, Adam, RMSProp, Adagrad, Adadelta
import numpy as np
from autograd import Tensor

In [79]:
# Create a regression toy dataset
np.random.seed(0)
X_reg = np.random.randn(4096, 1)
y_reg = 2*X_reg[:,0] + 1
X_reg_tensor = Tensor(X_reg)
y_reg_tensor = Tensor(y_reg)
X_reg_test = np.random.randn(1024, 1)
y_reg_test = 2*X_reg_test[:,0] + 1
X_reg_test_tensor = Tensor(X_reg_test)
y_reg_test_tensor = Tensor(y_reg_test)
# Create a classification toy dataset
np.random.seed(0)
X_clf = np.random.randn(4096, 1)
y_clf = (2*X_clf[:,0]) > 0
X_clf_tensor = Tensor(X_clf)
y_clf_tensor = Tensor(y_clf)

X_clf_test = np.random.randn(1024, 1)
y_clf_test = (2*X_clf_test[:,0]) > 0
X_clf_test_tensor = Tensor(X_clf_test)
y_clf_test_tensor = Tensor(y_clf_test)

In [80]:
y_clf_tensor.data[y_clf_tensor.data == 0].shape, y_clf_tensor.data[y_clf_tensor.data == 1].shape

((2096,), (2000,))

### Stochastic Gradient Descent (SGD)

#### Regression Example

In [75]:
# Create a feed forward neural network for regression and classification
regression_feed = Sequential(Linear(1, 16), ReLU(), Linear(16, 32), ReLU(), Linear(32, 16), ReLU(), Linear(16, 1))


In [76]:
# Loss and Optimizer
criterion = MSELoss()
optimizer = SGD(regression_feed.parameters(), lr=0.01)

In [77]:
y_preds = regression_feed(X_reg_test_tensor)
loss = criterion(y_preds, y_reg_test_tensor)
loss

tensor: 7.630028247833252

In [78]:
# Training loop
EPOCHS = 100
for epoch in range(EPOCHS):
    optimizer.zero_grad()
    y_pred = regression_feed(X_reg_tensor)
    loss = criterion(y_pred, y_reg_tensor)
    loss.backward()
    weights = [param.data for param in regression_feed.parameters()]
    grads = [param.grad for param in regression_feed.parameters()]
    optimizer.step()
    updated_weights = [param.data for param in regression_feed.parameters()]
    assert np.equal(weights[0] - 0.01*grads[0], updated_weights[0]).all()
    y_preds_test = regression_feed(X_reg_test_tensor)
    loss_test = criterion(y_preds_test, y_reg_test_tensor)
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {loss.data}, Test_Loss: {loss_test.data}')

Epoch 1/100, Loss: 7.145706653594971, Test_Loss: 7.352579593658447
Epoch 2/100, Loss: 6.88231897354126, Test_Loss: 7.106645107269287
Epoch 3/100, Loss: 6.648864269256592, Test_Loss: 6.893398284912109
Epoch 4/100, Loss: 6.44681453704834, Test_Loss: 6.710356712341309
Epoch 5/100, Loss: 6.274351119995117, Test_Loss: 6.547443866729736
Epoch 6/100, Loss: 6.121325969696045, Test_Loss: 6.39837646484375
Epoch 7/100, Loss: 5.981585502624512, Test_Loss: 6.263370513916016
Epoch 8/100, Loss: 5.855305194854736, Test_Loss: 6.141838073730469
Epoch 9/100, Loss: 5.741837024688721, Test_Loss: 6.028887748718262
Epoch 10/100, Loss: 5.63657283782959, Test_Loss: 5.923155784606934
Epoch 11/100, Loss: 5.5382208824157715, Test_Loss: 5.823471546173096
Epoch 12/100, Loss: 5.445682525634766, Test_Loss: 5.72920036315918
Epoch 13/100, Loss: 5.358343124389648, Test_Loss: 5.6398539543151855
Epoch 14/100, Loss: 5.275679111480713, Test_Loss: 5.554920673370361
Epoch 15/100, Loss: 5.197242259979248, Test_Loss: 5.47407674

#### Classification Example

In [95]:
# Classification model
np.random.seed(0)
classification_feed = Sequential(Linear(1, 16), ReLU(), Linear(16, 32), ReLU(), Linear(32, 16), ReLU(), Linear(16, 1), Sigmoid())


In [96]:
# Loss and Optimizer
criterion = BCELoss()
optimizer = SGD(classification_feed.parameters(), lr=0.2, momentum=0.9)

In [97]:
def accuracy(y_true, y_pred):
    return np.mean((y_true.data == (y_pred.data > 0.5)).astype(int))

acc = accuracy(y_clf_test_tensor, classification_feed(X_clf_test_tensor))
acc

0.498046875

In [98]:
# Training loop
EPOCHS = 5
for epoch in range(EPOCHS):
    optimizer.zero_grad()
    y_pred = classification_feed(X_clf_tensor)
    loss = criterion(y_pred, y_clf_tensor)
    loss.backward()
    weights = [param.data for param in classification_feed.parameters()]
    grads = [param.grad for param in classification_feed.parameters()]
    prev_velocities = optimizer.prev_velocities[0]
    momentum = optimizer.momentum
    dampening = optimizer.dampening
    optimizer.step()
    updated_weights = [param.data for param in classification_feed.parameters()]
    assert np.equal(weights[0] - 0.2*(momentum * prev_velocities + (1 - dampening) * grads[0]), updated_weights[0]).all()
    y_preds_test = classification_feed(X_clf_test_tensor)
    acc = accuracy(y_clf_test_tensor, y_preds_test)
    y_preds_train = classification_feed(X_clf_tensor)
    acc_train = accuracy(y_clf_tensor, y_preds_train)
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {loss.data}, Test_Accuracy: {acc}', f'Train_Accuracy: {acc_train}')

Epoch 1/5, Loss: 0.6942612528800964, Test_Accuracy: 0.498046875 Train_Accuracy: 0.51171875
Epoch 2/5, Loss: 0.6934086084365845, Test_Accuracy: 0.498046875 Train_Accuracy: 0.5117073059082031
Epoch 3/5, Loss: 0.6928922533988953, Test_Accuracy: 0.5012779235839844 Train_Accuracy: 0.49240684509277344
Epoch 4/5, Loss: 0.6932503581047058, Test_Accuracy: 0.5018081665039062 Train_Accuracy: 0.48914527893066406
Epoch 5/5, Loss: 0.6937583684921265, Test_Accuracy: 0.5017814636230469 Train_Accuracy: 0.4893455505371094


### Adagrad

In [99]:
# Reg model
np.random.seed(0)
regression_feed = Sequential(Linear(1, 16), ReLU(), Linear(16, 32), ReLU(), Linear(32, 16), ReLU(), Linear(16, 1))

In [None]:
# Loss and Optimizer
