<a href="https://colab.research.google.com/github/nelly-hateva/rnn2fsa/blob/master/notebooks/State_Regularized_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prerequisities

## Mount Google drive

In [None]:
from google.colab import drive

MOUNT_POINT = "/content/drive/"
drive.mount(MOUNT_POINT, force_remount=True)

DATA_DIR = MOUNT_POINT + "My Drive/Thesis/data"
MODELS_DIR = MOUNT_POINT + "My Drive/Thesis/models"

## Imports

In [None]:
import sys

sys.path.append(MOUNT_POINT + "My Drive/Thesis/src") 

from automata import Evaluation
from dataset import NLDataset, Preprocessing
from measures import Measures
from model import NLNN
from rnn2fsa import Algorithm1, Algorithm2
from training import Trainer, ModelSerializer
from utils import Reproducibility

## Set device and check runtime resources

In [None]:
from psutil import virtual_memory
import torch

ram_gb = virtual_memory().total / 1e+9
print('{:.2f} GB RAM available\n'.format(ram_gb))
if ram_gb < 20:
  print('To enable a high-RAM runtime, select "Runtime" -> "Change runtime type", ')
  print('and then select "High-RAM" in the "Runtime shape" dropdown. ')
  print('Then re-execute this cell.\n')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device: {}\n'.format(device))

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select "Runtime" -> "Change runtime type" to enable a GPU accelerator, ')
  print('and then re-execute this cell.\n')
else:
  print(gpu_info)


# Training

## Train numeral model

In [None]:
train, dev, test, alphabet = NLDataset.load(DATA_DIR + "/numeral/")

# numeral/modelgradviz.params
# train : TP : 471 TN : 448 FP : 23 FN : 8 Pr : 0.95 R : 0.98 F1: 0.97 ACC : 0.97 
# dev : TP : 52 TN : 57 FP : 8 FN : 2 Pr : 0.87 R : 0.96 F1: 0.91 ACC : 0.92 
# test : TP : 58 TN : 46 FP : 12 FN : 3 Pr : 0.83 R : 0.95 F1: 0.89 ACC : 0.87 
# {'num_embeddings': 23, 'mode': 'rnn', 'nonlinearity': 'relu', 'hidden_size': 20, 'bias': False, 'number_of_states': 3000, 'temperature': 0.1, 'batch_size': 10, 'lr': 0.001, 'weight_decay': 0, 'num_epochs': 200, 'patience': 500, 'save_after_each_epoch': False}

# numeral/stmodel.params
# clip gra 1.0
# params = {
#   'num_embeddings': len(alphabet),
#   #'embedding_dim': 3,
#   'mode': 'rnn',
#   'nonlinearity': 'relu',
#   'hidden_size': 20,
#   'bias': False,
#   'number_of_states': 2000,
#   'temperature': 0.1,#1e-7,

#   'batch_size': 10, # 100 1e-2 / 10 1e-3
#   'lr': 1e-3,
#   'weight_decay': 1e-4,
#   'num_epochs': 200,
#   'patience': 500,
#   'save_after_each_epoch': False,
# }
# Best epoch: 50	Best dev accuracy: 0.87	Time: 534.71s
# train : TP : 447 TN : 419 FP : 52 FN : 32 Pr : 0.90 R : 0.93 F1: 0.91 ACC : 0.91 
# dev : TP : 49 TN : 55 FP : 10 FN : 5 Pr : 0.83 R : 0.91 F1: 0.87 ACC : 0.87 
# test : TP : 54 TN : 42 FP : 16 FN : 7 Pr : 0.77 R : 0.89 F1: 0.82 ACC : 0.81 

model_params = {
  'num_embeddings': len(alphabet),
  #'embedding_dim': 3,
  'mode': 'rnn',
  'nonlinearity': 'relu',
  'hidden_size': 20,
  'bias': False,
  'number_of_states': 3000,
  'temperature': 0.1#1e-7
}

optimizer_params = {
  'lr': 1e-3,
  'weight_decay': 0,
}

params = {
  'batch_size': 10, # 100 1e-2 / 10 1e-3
  'num_epochs': 200,
  'max_norm': 1.0,
  'patience': 10
}

train_dataloader = utils.data.DataLoader(
  NLDataset(train), batch_size=params["batch_size"], shuffle = True
)
dev_dataloader = utils.data.DataLoader(
  NLDataset(dev), batch_size=params["batch_size"]
)
test_dataloader = utils.data.DataLoader(
  NLDataset(test), batch_size=params["batch_size"]
)

trainer = Trainer(
  checkpoint_dir=MODELS_DIR + "/numeral/check/",
  best_model_dir=MODELS_DIR + "/numeral/check/",
  model_class=NLNN,
  model_params=model_params,
  loss=nn.CrossEntropyLoss(),
  optimizer_class=optim.Adam,
  optimizer_params=optimizer_params,
  device=device,
  params=params
)

model = trainer.fit(
  train_dataloader=train_dataloader, dev_dataloader=dev_dataloader
)

trainer.plot_training_losses(labels={
  'avg_train_losses': 'Average Train Losses',
  'avg_dev_losses': 'Average Dev Losses',
  'dev_accuracies': 'Dev Accuracy',
  'xlabel': 'epochs',
  'min_avg_dev_loss': 'Minimum Average Dev Loss',
  'max_dev_accuracy': 'Maximum Dev Accuracy'
  }, path=MODELS_DIR + "/numeral/trainer/losses.en.jpg"
)
trainer.plot_training_losses(labels={
  'avg_train_losses': 'Средни стойности на целевата функция върху TRAIN',
  'avg_dev_losses': 'Средни стойности на целевата функция върху DEV',
  'dev_accuracies': 'Точност върху DEV',
  'xlabel': 'епохи',
  'min_avg_dev_loss': 'Минимална средна стойност на целевата функция върху DEV',
  'max_dev_accuracy': 'Максимална точност върху DEV'
  }, path=MODELS_DIR + "/numeral/trainer/losses.bg.jpg"
)

print_accuracy(train_dataloader, model, "train")
print_accuracy(dev_dataloader, model, "dev")
print_accuracy(test_dataloader, model, "test")


## Train model

In [None]:
train, dev, test, alphabet = NLDataset.load(DATA_DIR + "/words/")

model_params = {
  'num_embeddings': len(alphabet),
  #'embedding_dim': 3,
  'mode': 'rnn',
  'nonlinearity': 'relu',
  'hidden_size': 100,
  'bias': False,
  'number_of_states': 60000,
  'temperature': 0.1,#1e-7,
}

optimizer_params = {
  'lr': 1e-4,
  'weight_decay': 1e-6
}

params = {
  'batch_size': 20,
  'num_epochs': 50,
  'max_norm': 1.0,
  'patience': 10
}

train_dataloader = utils.data.DataLoader(
  NLDataset(train), batch_size=params["batch_size"], shuffle = True
)
dev_dataloader = utils.data.DataLoader(
  NLDatasetcollections(dev), batch_size=params["batch_size"]
)
test_dataloader = utils.data.DataLoader(
  NLDataset(test), batch_size=params["batch_size"]
)

trainer = Trainer(
  checkpoint_dir=MODELS_DIR + "/words/model/",
  best_model_dir=MODELS_DIR + "/words/model/",
  model_class=NLNN,
  model_params=model_params,
  loss=nn.CrossEntropyLoss(),
  optimizer_class=optim.Adam,
  optimizer_params=optimizer_params,
  device=device,
  params=params
)

model = trainer.fit(
  train_dataloader=train_dataloader, dev_dataloader=dev_dataloader
)

## Resume training

In [None]:
train, dev, test, alphabet = NLDataset.load(DATA_DIR + "/words/")
params = {
  'batch_size': 20
}
train_dataloader = utils.data.DataLoader(
  NLDataset(train), batch_size=params["batch_size"], shuffle = True
)
dev_dataloader = utils.data.DataLoader(
  NLDataset(dev), batch_size=params["batch_size"]
)
test_dataloader = utils.data.DataLoader(
  NLDataset(test), batch_size=params["batch_size"]
)

trainer = Trainer(
  checkpoint=MODELS_DIR + "/words/model/checkpoint.pt",
  checkpoint_dir=MODELS_DIR + "/words/model/",
  best_model_dir=MODELS_DIR + "/words/model/",
  device=device,
  model_class=NLNN,
  loss=nn.CrossEntropyLoss(),
  optimizer_class=optim.Adam
)

model = trainer.fit(
  train_dataloader=train_dataloader, dev_dataloader=dev_dataloader,
)


# Accuracy as function of the number of states



In [None]:
train, dev, test, alphabet = NLDataset.load(DATA_DIR + "/numeral/")

params = {
  'num_embeddings': len(alphabet),
  #'embedding_dim': 3,
  'mode': 'rnn',
  'nonlinearity': 'relu',
  'hidden_size': 20,
  'bias': False,
  'number_of_states': 3000,
  'temperature': 0.1,

  'batch_size': 10, # 100 1e-2 / 10 1e-3
  'lr': 1e-3,
  'weight_decay': 1e-5,
  'num_epochs': 200,
  'max_norm': 1.0,
  'patience': 10,
  'save_after_each_epoch': False,
}

train_dataloader = utils.data.DataLoader(
  NLDataset(train), batch_size=params["batch_size"], shuffle = True
)
dev_dataloader = utils.data.DataLoader(
  NLDataset(dev), batch_size=params["batch_size"]
)
test_dataloader = utils.data.DataLoader(
  NLDataset(test), batch_size=params["batch_size"]
)

train_results, dev_results = [], []
n_experiments = 5

for n_clusters in [3000, 2000, 1000, 300, 100]:
  print("n_clusters ", n_clusters)

  params['number_of_states'] = n_clusters

  train_accuracy_values, dev_accuracy_values = [], []

  for _ in range (0, n_experiments):
    model = NLNN(params)

    model = Trainer().fit(
      model, train_dataloader=train_dataloader, dev_dataloader=dev_dataloader,
      params=params
    )

    _, _, _, _, _, _, _, acc = compute_accuracy(train_dataloader, model)
    train_accuracy_values.append(acc)

    _, _, _, _, _, _, _, acc = compute_accuracy(dev_dataloader, model)
    dev_accuracy_values.append(acc)

  train_results.append((n_clusters, train_accuracy_values))
  dev_results.append((n_clusters, dev_accuracy_values))

print (train_results)
print (dev_results)
# with open(MODELS_DIR + "/numeral/number-of-states-accuracy-tain.pkl", 'wb') as f:
#   pickle.dump(train_results, f)

# with open(MODELS_DIR + "/numeral/number-of-states-accuracy-dev.pkl", 'wb') as f:
#   pickle.dump(dev_results, f)

## Plot saved results

In [None]:
def plot(input_file, output_file, labels):
  with open(input_file, 'rb') as f:
    results = pickle.load(f)

    results.reverse()

    min_accuracy = [min(accuracy) for (n_states, accuracy) in results]
    max_accuracy = [max(accuracy) for (n_states, accuracy) in results]
    avg_accuracy = [sum(accuracy) / len(accuracy) for (n_states, accuracy) in results]
    x = [n_states for (n_states, accuracy) in results]

    figure(num=None, figsize=(10, 10), dpi=100, facecolor='w', edgecolor='w')
    plt.ylim(0.5, 1.0)
    plt.xlim(min(x), max(x))
    plt.xticks(x)
    plt.grid()

    plt.plot(x, avg_accuracy, '-o', label=labels['avg'])
    plt.plot(x, min_accuracy, '-ro', label=labels['min'])
    plt.plot(x, max_accuracy, '-go', label=labels['max'])

    plt.legend(loc='upper left')
    plt.xlabel(labels['xlabel'])
    plt.ylabel(labels['ylabel'])
    plt.savefig(output_file)
    plt.show()

plot(
  MODELS_DIR + "/numeral/number-of-states-accuracy-tain.pkl",
  MODELS_DIR + '/numeral/number-of-states-accuracy-tain-en.jpg',
  {
    'avg': 'average accuracy',
    'min': 'min accuracy',
    'max': 'max accuracy',
    'xlabel': 'number of states',
    'ylabel': 'accuracy of the model'
  }
)

plot(
  MODELS_DIR + "/numeral/number-of-states-accuracy-dev.pkl",
  MODELS_DIR + '/numeral/number-of-states-accuracy-dev-en.jpg',
  {
    'avg': 'average accuracy',
    'min': 'min accuracy',
    'max': 'max accuracy',
    'xlabel': 'number of states',
    'ylabel': 'accuracy of the model'
  }
)

# plot(
#   MODELS_DIR + "/numeral/number-of-states-accuracy.model.pkl",
#   MODELS_DIR + '/numeral/number-of-states-accuracy-bg.model.jpg',
#   {
#     'avg': 'средна точност',
#     'min': 'минимална точност',
#     'max': 'максимална точност',
#     'xlabel': 'брой състояния',
#     'ylabel': 'точност на модела'
#   }
# )
