In [153]:
%pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
import torch.nn as nn
import torchvision
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from random import choices
import numpy as np
import random
from sklearn.model_selection import train_test_split
import optuna
import warnings

In [127]:
device = "cuda" if torch.cuda.is_available() else "cpu"
seed = 42
warnings.filterwarnings("ignore")

In [None]:
torch.random.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [128]:
training_dataset = torchvision.datasets.MNIST(root="./", download=True, train=True)
test_dataset = torchvision.datasets.MNIST(root="./", download=True, train=False)

In [129]:
transformation = torchvision.transforms.Compose([torchvision.transforms.ToPILImage(), torchvision.transforms.Pad(padding=2, fill=0), torchvision.transforms.ToTensor()])

In [130]:
training_images = []
test_images = []
for img in training_dataset.data:
  training_images.append(transformation(img).apply_(lambda el: -0.1 if el == 0 else 1.175))

for img in test_dataset.data:
  test_images.append(transformation(img).apply_(lambda el: -0.1 if el == 0 else 1.175))

X_train = torch.stack(training_images)
X_test = torch.stack(test_images)
train_labels = training_dataset.targets
test_labels = test_dataset.targets


In [131]:
y_train = torch.Tensor([[1 if i == el else 0 for i in range(10)] for el in train_labels])
y_test = torch.Tensor([[1 if i == el else 0 for i in range(10)] for el in test_labels])

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 0.2)

In [132]:
class LeNet(nn.Module):

  def subsample1(self, batch):


    return nn.functional.sigmoid(nn.functional.conv2d(batch, torch.stack([torch.full(size=(1, 2, 2), fill_value = 1, device=device)*self.weight1[i] for i in range(6)]), stride=2, bias=self.bias1, groups=6))

  def subsample2(self, batch):

    return nn.functional.sigmoid(nn.functional.conv2d(batch, torch.stack([torch.full(size=(1, 2, 2), fill_value = 1, device=device)*self.weight2[i] for i in range(16)]), stride=2, bias=self.bias2, groups=16))

  def squashed_sigmoid(self, x, A = 1.7159, S = 2/3):
    return A * torch.nn.functional.tanh(S * x)


  def __init__(self):
    super().__init__()

    rbf_weights = choices([-1, 1], weights = [0.5, 0.5], k=840)
    self.rbf_weights = torch.Tensor(rbf_weights).reshape((10, 84)).to(device)

    self.weight1 = nn.Parameter(torch.ones(6)).to(device)
    self.bias1 = nn.Parameter(torch.ones(6)).to(device)
    self.weight2 = nn.Parameter(torch.ones(16)).to(device)
    self.bias2 = nn.Parameter(torch.ones(16)).to(device)

    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    self.c3conv0 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5)
    self.c3conv1 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5)
    self.c3conv2 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5)
    self.c3conv3 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5)
    self.c3conv4 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5)
    self.c3conv5 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5)
    self.c3conv6 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv7 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv8 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv9 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv10 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv11 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv12 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv13 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv14 = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=5)
    self.c3conv15 = nn.Conv2d(in_channels=6, out_channels=1, kernel_size=5)
    self.conv4 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
    self.fc1 = nn.Linear(in_features = 120, out_features = 84)
    self.fc2 = nn.Linear(in_features = 84, out_features = 10)
  def forward(self, x):
    x = self.conv1(x)
    x = self.subsample1(x)

    x0 = x[:, [0, 1, 2]]
    x1 = x[:, [1, 2, 3]]
    x2 = x[:, [2, 3, 4]]
    x3 = x[:, [3, 4, 5]]
    x4 = x[:, [0, 4, 5]]
    x5 = x[:, [0, 1, 5]]
    x6 = x[:, [0, 1, 2, 3]]
    x7 = x[:, [1, 2, 3, 4]]
    x8 = x[:, [2, 3, 4, 5]]
    x9 = x[:, [0, 3, 4, 5]]
    x10 = x[:, [0, 1, 4, 5]]
    x11 = x[:, [0, 1, 2, 5]]
    x12 = x[:, [0, 1, 3, 4]]
    x13 = x[:, [1, 2, 4, 5]]
    x14 = x[:, [0, 2, 3, 5]]
    x15 = x[:, :]
    x0 = self.c3conv0(x0.to(device))
    x1 = self.c3conv1(x1.to(device))
    x2 = self.c3conv2(x2.to(device))
    x3 = self.c3conv3(x3.to(device))
    x4 = self.c3conv4(x4.to(device))
    x5 = self.c3conv5(x5.to(device))
    x6 = self.c3conv6(x6.to(device))
    x7 = self.c3conv7(x7.to(device))
    x8 = self.c3conv8(x8.to(device))
    x9 = self.c3conv9(x9.to(device))
    x10 = self.c3conv10(x10.to(device))
    x11 = self.c3conv11(x11.to(device))
    x12 = self.c3conv12(x12.to(device))
    x13 = self.c3conv13(x13.to(device))
    x14 = self.c3conv14(x14.to(device))
    x15 = self.c3conv15(x15.to(device))
    x = torch.stack([x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15], dim=1).reshape((x.shape[0], 16, 10, 10))
    x = self.subsample2(x)
    x = self.conv4(x.to(device))
    x = torch.flatten(x, 1, -1)
    x = self.fc1(x)
    x = self.squashed_sigmoid(x)
    x = self.fc2(x)
    return nn.functional.softmax(x)

In [None]:
def objective(trial):
    torch.random.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)



    batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024, 2048, 4096])
    learning_rate = trial.suggest_loguniform("learning_rate", 0.00001, 0.1)
    optim = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RAdam"])
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-10, 1e-3)

    model = LeNet().to(device)
    loss_fn = nn.CrossEntropyLoss()
    train_batches = DataLoader([*zip(X_train, y_train)], batch_size=batch_size, shuffle=True)


    if optim == "Adam":
      optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optim == "AdamW":
      optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
      optimizer = torch.optim.RAdam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    for i in range(10):
      model.train()
      for batch in train_batches:
        optimizer.zero_grad()
        features, target = batch[:-1], batch[-1]
        features = features[0].to(device)
        target = target.to(device)
        outputs = model(features)
        perte = loss_fn(outputs, target)
        perte.backward()
        optim.step()
    model.eval()
    return loss_fn(model(X_val.to(device)), y_val.to(device)).item()

In [148]:
sampler = optuna.samplers.TPESampler(seed=seed)

study = optuna.create_study(direction="minimize", sampler=sampler)
study.optimize(objective, n_trials = 20)
trial = study.best_trial
print(trial.params)

97.22000122070312


  return nn.functional.softmax(x)
