# 5 Exploration (optional)

Report of <br>
Jonas Ortner: 2265527 <br>
Marmee Pandya: 1963521

In [1]:
# Safe run block for the PyTorch optimize() call
import math
import numpy as np
import matplotlib.pyplot as plt
import sklearn

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data

%load_ext autoreload
%autoreload 2

from a02_helper import *
from a02_functions import optimize

torch.manual_seed(0)
np.random.seed(0)
torch.set_num_threads(1)  # reduce thread-related instability

In [2]:
# Prepare tensors (safe explicit conversions)
Xztorch = torch.from_numpy(Xz.astype(np.float32))
ytorch = torch.from_numpy(y.astype(np.int64))
train = torch.utils.data.TensorDataset(Xztorch, ytorch)

In [3]:
# model (explicit dim in log_softmax)
class LogisticRegression(nn.Module):
    def __init__(self, D, C):
        super(LogisticRegression, self).__init__()
        self.weights = nn.Parameter(torch.randn(D, C) / math.sqrt(D))
        self.register_parameter("W", self.weights)

    def forward(self, x):
        out = torch.matmul(x, self.weights)
        return F.log_softmax(out, dim=1)

# opt_pytorch that returns Python float from objective()
def opt_pytorch(learning_rate=0.01, batch_size=100, optimizer_name="Adam"):
    model = LogisticRegression(D, 2)
    criterion = torch.nn.NLLLoss(reduction="sum")
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) if optimizer_name.lower()!="sgd" else torch.optim.SGD(model.parameters(), lr=learning_rate)

    # Create loader here (will be used by update)
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)

    def objective(_):
        model.eval()
        with torch.no_grad():
            outputs = model(Xztorch)
            loss = criterion(outputs, ytorch)
        return float(loss.item())   # <-- IMPORTANT: return Python float

    def update(_1, _2):
        model.train()
        for examples, labels in train_loader:
            outputs = model(examples)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        W = model.state_dict()["W"]
        w = W[:, 1] - W[:, 0]
        return w

    return (objective, update)

In [4]:
# Create object and run optimize safely with a numeric eps0
learning_rate = 0.01
batch_size = 100
obj_up = opt_pytorch(learning_rate=learning_rate, batch_size=batch_size, optimizer_name="Adam")

# run optimization with a numeric eps0
wz_t, vz_t, ez_t = optimize(obj_up, None, nepochs=100, eps0=0.01, verbose=True)
print("Finished optimize; wz_t shape:", np.shape(wz_t))

Epoch   0: f=  2970.833, eps=0.010000000
Epoch   1: f=   876.598, eps=0.010500000
Epoch   2: f=   777.385, eps=0.011025000
Epoch   3: f=   739.052, eps=0.011576250
Epoch   4: f=   719.818, eps=0.012155063
Epoch   5: f=   707.682, eps=0.012762816
Epoch   6: f=   701.013, eps=0.013400956
Epoch   7: f=   694.605, eps=0.014071004
Epoch   8: f=   689.833, eps=0.014774554
Epoch   9: f=   687.160, eps=0.015513282
Epoch  10: f=   684.529, eps=0.016288946
Epoch  11: f=   682.617, eps=0.017103394
Epoch  12: f=   681.271, eps=0.017958563
Epoch  13: f=   679.351, eps=0.018856491
Epoch  14: f=   677.842, eps=0.019799316
Epoch  15: f=   676.719, eps=0.020789282
Epoch  16: f=   676.463, eps=0.021828746
Epoch  17: f=   675.279, eps=0.022920183
Epoch  18: f=   674.493, eps=0.024066192
Epoch  19: f=   673.347, eps=0.025269502
Epoch  20: f=   672.591, eps=0.026532977
Epoch  21: f=   672.269, eps=0.027859626
Epoch  22: f=   671.803, eps=0.029252607
Epoch  23: f=   671.111, eps=0.030715238
Epoch  24: f=   