In [2]:
pip install optuna

Collecting optuna
  Obtaining dependency information for optuna from https://files.pythonhosted.org/packages/15/da/68883911855d8b4d521f9a370e4e6aab8232b91c1d8d5a8348c4680c6642/optuna-3.6.1-py3-none-any.whl.metadata
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Obtaining dependency information for alembic>=1.5.0 from https://files.pythonhosted.org/packages/df/ed/c884465c33c25451e4a5cd4acad154c29e5341e3214e220e7f3478aa4b0d/alembic-1.13.2-py3-none-any.whl.metadata
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Obtaining dependency information for colorlog from https://files.pythonhosted.org/packages/f3/18/3e867ab37a24fdf073c1617b9c7830e06ec270b1ea4694a624038fc40a03/colorlog-6.8.2-py3-none-any.whl.metadata
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Obtaining dependency information for Mako from https://files.python

In [3]:
import optuna
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import numpy as np
import torch

class RNN(nn.Module):
    def __init__(self, vocab_size, dw, dh, output):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, dw, padding_idx=vocab_size-1)
        self.rnn = nn.RNN(dw, dh, batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(dh*2, output, bias=True)
        self.fc2 = nn.Softmax(dim=1)
    def forward(self, x):
        x = self.embed(x)
        _, x = self.rnn(x)
        rnn_out = torch.cat([x[-2,:,:], x[-1,:,:]], dim=1)
        x = self.fc1(rnn_out)
        x = self.fc2(x)
        return x

class LSTM(nn.Module):
    def __init__(self, vocab_size, dw, dh, output):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, dw, padding_idx=vocab_size-1)
        self.lstm = nn.LSTM(dw, dh, batch_first=True,bidirectional=True)
        self.fc1 = nn.Linear(dh*2, output, bias=True)
        self.fc2 = nn.Softmax(dim=1)
    def forward(self, x):
        x = self.embed(x)
        _, x = self.lstm(x)
        x = torch.cat([x[0][0],x[0][1]], dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

class CNN(nn.Module):
  def __init__(self, vocab_size, dw, output, layer, unit, activation):
      super().__init__()
      self.layer = layer
      self.embed = nn.Embedding(vocab_size, dw, padding_idx = vocab_size-1)
      if unit == 6:
          units = [6, 4, 2]
      elif unit == 4:
          units = [4, 3, 2]
      elif unit == 2:
          units = [2, 2, 2]
      self.conv1 = nn.Conv2d(1, units[0], kernel_size=(units[0], 300))
      linearoutput = units[0]
      if layer > 1:
          self.conv2 = nn.Conv2d(units[0], units[1], kernel_size=(units[1],1))
          linearoutput = units[1]
      if layer > 2:
          self.conv3 = nn.Conv2d(units[1], units[2], kernel_size=(units[2],1))
          linearoutput = units[2]
      self.fc1 = nn.Linear(linearoutput, output, bias=True)
      self.fc2 = nn.Softmax(dim=1)

      if activation == "Tanh":
          self.active = nn.Tanh()
      elif activation == "ReLU":
          self.active = nn.ReLU()
      elif activation == "Sigmoid":
          self.active = nn.Sigmoid()
  def forward(self, x):
      x = self.embed(x)
      x = x.unsqueeze(1)
      x = self.conv1(x)
      x = self.active(x)
      if self.layer > 1:
          x = self.conv2(x)
          x = self.active(x)
      if self.layer > 2:
          x = self.conv3(x)
          x = self.active(x)
      x = F.max_pool2d(x, kernel_size=(x.size()[2], 1))
      x = x.view(x.size()[0], -1)
      x = self.fc1(x)
      x = self.fc2(x)
      return x

def calculate_loss_and_accuracy(model, dataset, device, criterion=None):
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
    loss = 0.0
    total = 0
    correct = 0
    model = model.to(device)
    with torch.no_grad():
        for X, Y in dataloader:
            X = X.to(device)
            Y = Y.to(device)
            Y_pred = model(X)
            if criterion != None:
                loss += criterion(Y_pred, Y).item()
            pred = torch.argmax(Y_pred, dim=-1)
            total += len(Y)
            correct += (pred == Y).sum().item()
    return loss / len(dataset), correct / total


def train_model(X_train, y_train, X_test, y_test, batch_size, model, lr, num_epochs, device, collate_fn=None, optimizer_select="SGD"):
    dataset_train = TensorDataset(X_train, y_train)
    dataset_test = TensorDataset(X_test, y_test)
    model = model.to(device)
    dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
    dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)
    for ep in range(num_epochs):
        if ep%20==0:
            lr = lr * 0.1
        if optimizer_select == "SGD":
            optimizer = torch.optim.SGD(model.parameters(), lr=lr)
        elif optimizer_select == "Adam":
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        elif optimizer_select == "RMSprop":
          optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
        model.train()
        for X, Y in dataloader_train:
            X = X.to(device)
            Y = Y.to(device)
            optimizer.zero_grad()
            Y_pred = model(X)
            loss = criterion(Y_pred, Y)
            loss.backward()
            optimizer.step()
    model.eval()
    _, acc_test = calculate_loss_and_accuracy(model, dataset_test, device, criterion=criterion)

    return acc_test


def CountVocab(name):
    f = open("{}_code.txt".format(name), "r")
    lines = f.readlines()
    f.close()
    max_num = []
    for line in lines:
        line_t = line.split("\t")[2].replace("\n", "").split(" ")
        max_num.extend(map(int, line_t))
    vocab_max = max(max_num)+1
    return vocab_max

def GetCodeLow(name):
    f = open("{}_code.txt".format(name), "r")
    lines = f.readlines()
    f.close()
    num_list = []
    code_list = []
    pad_list = []
    for line in lines:
        line_s = line.split("\t")
        code_list.append(int(line_s[0]))
        num = line_s[2].replace("\n", "").split(" ")
        num = list(map(int, num))
        num_list.append(num)
        num_tensor = torch.tensor(num)
        pad_list.append(num_tensor)
    max_vocab = CountVocab("train")
    mlen = max([len(x) for x in num_list])
    pad_list = list(map(lambda x:x + [max_vocab]*(mlen-len(x)), num_list))
    pad_list = torch.tensor(pad_list)
    code_list = torch.tensor(code_list)
    return pad_list, code_list

def objective_RNN(trial):
    X_train, Y_train = GetCodeLow("train")
    X_test, Y_test = GetCodeLow("test")
    BATCH_SIZE = 2
    NUM_EPOCHS = 10
    VOCAB_SIZE = CountVocab("train")+1
    EMB_SIZE = 300
    OUTPUT_SIZE = 4
    lr = 1e-2
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model_name_display_only = trial.suggest_categorical("model_name_RNN", ["RNN"])
    HIDDEN_SIZE = trial.suggest_categorical("HIDDEN_SIZE", [10, 50, 100, 500, 1000])
    optimizer_select = trial.suggest_categorical("optimizer_select", ["SGD", "Adam", "RMSprop"])
    model = RNN(VOCAB_SIZE, EMB_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
    score = train_model(X_train, Y_train, X_test, Y_test, BATCH_SIZE, model, lr, NUM_EPOCHS, device, optimizer_select)
    return score

def objective_LSTM(trial):
    X_train, Y_train = GetCodeLow("train")
    X_test, Y_test = GetCodeLow("test")
    BATCH_SIZE = 2
    NUM_EPOCHS = 10
    VOCAB_SIZE = CountVocab("train")+1
    EMB_SIZE = 300
    OUTPUT_SIZE = 4
    lr = 1e-2
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model_name_display_only = trial.suggest_categorical("model_name_LSTM", ["LSTM"])
    HIDDEN_SIZE = trial.suggest_categorical("HIDDEN_SIZE", [10, 50, 100, 500, 1000])
    optimizer_select = trial.suggest_categorical("optimizer_select", ["SGD", "Adam", "RMSprop"])
    model = LSTM(VOCAB_SIZE, EMB_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
    score = train_model(X_train, Y_train, X_test, Y_test, BATCH_SIZE, model, lr, NUM_EPOCHS, device, optimizer_select)
    return score

def objective_CNN(trial):
    X_train, Y_train = GetCodeLow("train")
    X_test, Y_test = GetCodeLow("test")
    BATCH_SIZE = 2
    NUM_EPOCHS = 10
    VOCAB_SIZE = CountVocab("train")+1
    EMB_SIZE = 300
    OUTPUT_SIZE = 4
    lr = 1e-2
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model_name_display_only = trial.suggest_categorical("model_name_CNN", ["CNN"])
    layer = trial.suggest_categorical("layer", [1,2,3])
    unit = trial.suggest_categorical("unit", [2,4,6])
    activation = trial.suggest_categorical("activation", ["Tanh", "Sigmoid", "ReLU"])
    optimizer_select = trial.suggest_categorical("optimizer_select", ["SGD", "Adam", "RMSprop"])
    model = CNN(VOCAB_SIZE, EMB_SIZE, OUTPUT_SIZE, layer, unit, activation)
    score = train_model(X_train, Y_train, X_test, Y_test, BATCH_SIZE, model, lr, NUM_EPOCHS, device, optimizer_select)
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective_CNN, n_trials=81)
study.optimize(objective_RNN, n_trials=15)
study.optimize(objective_LSTM, n_trials=15)
print(study.best_params)
print(study.best_value)


[I 2024-07-01 13:51:52,287] A new study created in memory with name: no-name-2b508ec0-099a-40b8-81af-4faa0c8b700e
[I 2024-07-01 14:07:25,321] Trial 0 finished with value: 0.6694152923538231 and parameters: {'model_name_CNN': 'CNN', 'layer': 2, 'unit': 6, 'activation': 'ReLU', 'optimizer_select': 'SGD'}. Best is trial 0 with value: 0.6694152923538231.
[I 2024-07-01 14:13:09,656] Trial 1 finished with value: 0.7008995502248876 and parameters: {'model_name_CNN': 'CNN', 'layer': 1, 'unit': 4, 'activation': 'ReLU', 'optimizer_select': 'RMSprop'}. Best is trial 1 with value: 0.7008995502248876.
[I 2024-07-01 14:17:07,070] Trial 2 finished with value: 0.656671664167916 and parameters: {'model_name_CNN': 'CNN', 'layer': 2, 'unit': 4, 'activation': 'Tanh', 'optimizer_select': 'RMSprop'}. Best is trial 1 with value: 0.7008995502248876.
[I 2024-07-01 14:20:48,369] Trial 3 finished with value: 0.4160419790104948 and parameters: {'model_name_CNN': 'CNN', 'layer': 2, 'unit': 4, 'activation': 'Sigmoi

[I 2024-07-01 16:19:59,643] Trial 34 finished with value: 0.6904047976011994 and parameters: {'model_name_CNN': 'CNN', 'layer': 1, 'unit': 6, 'activation': 'ReLU', 'optimizer_select': 'RMSprop'}. Best is trial 14 with value: 0.7181409295352323.
[I 2024-07-01 16:23:32,837] Trial 35 finished with value: 0.704647676161919 and parameters: {'model_name_CNN': 'CNN', 'layer': 1, 'unit': 4, 'activation': 'ReLU', 'optimizer_select': 'Adam'}. Best is trial 14 with value: 0.7181409295352323.
[I 2024-07-01 16:27:54,477] Trial 36 finished with value: 0.6679160419790104 and parameters: {'model_name_CNN': 'CNN', 'layer': 2, 'unit': 6, 'activation': 'ReLU', 'optimizer_select': 'Adam'}. Best is trial 14 with value: 0.7181409295352323.
[I 2024-07-01 16:31:44,157] Trial 37 finished with value: 0.6776611694152923 and parameters: {'model_name_CNN': 'CNN', 'layer': 1, 'unit': 4, 'activation': 'Sigmoid', 'optimizer_select': 'SGD'}. Best is trial 14 with value: 0.7181409295352323.
[I 2024-07-01 16:35:57,232] 

KeyboardInterrupt: 