In [16]:
#Optimizing the NN structure with Pytorch and Optuna, mnist fashion example
!pip install optuna torchvision



In [17]:
#import the libraries
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

In [18]:
import torch as torch
import torch.nn as nn
import torch.optim as optim

In [19]:
import optuna

In [20]:
#Define the PyTorch model
class Net(nn.Module):
    def __init__(self, n_units):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, n_units)
        self.fc2 = nn.Linear(n_units, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [21]:
def objective(trial):
    # 1. Load MNIST dataset
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = MNIST('.', train=True, download=True, transform=transform)
    test_dataset = MNIST('.', train=False, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    # 2. Define model, optimizer, and criterion
    n_units = trial.suggest_int('n_units', 32, 512)
    model = Net(n_units)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # 3. Train model
    for epoch in range(10):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    # 4. Evaluate model
    model.eval()
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    accuracy = correct / len(test_loader.dataset)
    return accuracy


In [22]:
#Run Optuna on the model
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

print('Number of finished trials: ', len(study.trials))
print('Best trial:')
trial = study.best_trial
print('Value: ', trial.value)
print('Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')


[I 2023-08-29 16:52:59,289] A new study created in memory with name: no-name-a6f243ca-d38e-41b5-97fe-843466d4f41b
[I 2023-08-29 16:53:48,986] Trial 0 finished with value: 0.5744 and parameters: {'n_units': 215, 'optimizer': 'SGD', 'lr': 0.00016680011139916175}. Best is trial 0 with value: 0.5744.
[I 2023-08-29 16:54:47,464] Trial 1 finished with value: 0.9774 and parameters: {'n_units': 431, 'optimizer': 'Adam', 'lr': 0.0029859182598388767}. Best is trial 1 with value: 0.9774.
[I 2023-08-29 16:55:46,426] Trial 2 finished with value: 0.9729 and parameters: {'n_units': 500, 'optimizer': 'Adam', 'lr': 0.00019490223729422412}. Best is trial 1 with value: 0.9774.
[I 2023-08-29 16:56:40,313] Trial 3 finished with value: 0.977 and parameters: {'n_units': 156, 'optimizer': 'Adam', 'lr': 0.0022992921259711667}. Best is trial 1 with value: 0.9774.
[I 2023-08-29 16:57:42,702] Trial 4 finished with value: 0.9792 and parameters: {'n_units': 372, 'optimizer': 'Adam', 'lr': 0.000451910438456066}. Bes

Number of finished trials:  10
Best trial:
Value:  0.9792
Params: 
    n_units: 372
    optimizer: Adam
    lr: 0.000451910438456066


In [31]:
# Now that we have the right parameters, we just need to apply them
# defining the model:
class Net(nn.Module):
    def __init__(self, n_units):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, n_units)
        self.fc2 = nn.Linear(n_units, 10)
        
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
# using the parameters from Optuna:
n_units_optimal = 372
model = Net(n_units_optimal)

In [32]:
#loading the mnist dataset
from torchvision.datasets import FashionMNIST
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = FashionMNIST('.', train=True, download=True, transform=transform)
test_dataset = FashionMNIST('.', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [33]:
#Define the optimizer and loss function using the optimal parameters:
lr_optimal = 0.000451910438456066
optimizer = optim.Adam(model.parameters(), lr=lr_optimal)
criterion = nn.CrossEntropyLoss()

In [34]:
#Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    # Evaluate on the test set
    model.eval()
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    accuracy = correct / len(test_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Accuracy: {accuracy*100:.2f}%")

Epoch 1/10, Accuracy: 81.65%
Epoch 2/10, Accuracy: 84.46%
Epoch 3/10, Accuracy: 85.86%
Epoch 4/10, Accuracy: 86.61%
Epoch 5/10, Accuracy: 87.13%
Epoch 6/10, Accuracy: 86.91%
Epoch 7/10, Accuracy: 87.07%
Epoch 8/10, Accuracy: 87.28%
Epoch 9/10, Accuracy: 87.58%
Epoch 10/10, Accuracy: 88.04%


In [35]:
#Exporting the model 
# First, let's convert the model to a script
scripted_model = torch.jit.script(model)

In [36]:
#Then, let's export it
torch.jit.save(scripted_model, "PTmodel.pt")

In [39]:
#The model can then be loaded and run with:
loaded_model = torch.jit.load("PTmodel.pt")
output = loaded_model(input_data) # where input_data is the inference source data