In [82]:
from nemo.collections import nlp as nemo_nlp
from nemo.utils.exp_manager import exp_manager

import os
import torch
import pytorch_lightning as pl
from omegaconf import OmegaConf

In [83]:
DATA_DIR = "DATA_DIR"
WORK_DIR = "WORK_DIR"
os.environ['DATA_DIR'] = DATA_DIR

os.makedirs(WORK_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)


In [84]:
config = OmegaConf.load('/home/lucas/PycharmProjects/Enzyme_Classification/configs/text_classification_config.yaml')

In [85]:
config.model.dataset.num_classes=2
config.model.train_ds.file_path = os.path.join('/home/lucas/PycharmProjects/Enzyme_Classification', 'SST-2/train.tsv')
config.model.validation_ds.file_path = os.path.join('/home/lucas/PycharmProjects/Enzyme_Classification', 'SST-2/dev.tsv')
# Name of the .nemo file where trained model will be saved.
config.save_to = 'trained-model.nemo'
config.export_to = 'trained-model.onnx'

print("Train dataloader's config: \n")
# OmegaConf.to_yaml() is used to create a proper format for printing the train dataloader's config
# You may change other params like batch size or the number of samples to be considered (-1 means all the samples)
print(OmegaConf.to_yaml(config.model.train_ds))

Train dataloader's config: 

file_path: /home/lucas/PycharmProjects/Enzyme_Classification/SST-2/train.tsv
batch_size: 64
shuffle: true
num_samples: 5
num_workers: ${model.dataset.num_workers}
drop_last: ${model.dataset.drop_last}
pin_memory: ${model.dataset.pin_memory}



In [86]:
print("Trainer config - \n")
# OmegaConf.to_yaml() is used to create a proper format for printing the trainer config
print(OmegaConf.to_yaml(config.trainer))

Trainer config - 

gpus: 1
num_nodes: 1
max_epochs: 100
max_steps: null
accumulate_grad_batches: 1
gradient_clip_val: 0.0
amp_level: O0
precision: 32
accelerator: ddp
log_every_n_steps: 1
val_check_interval: 1.0
resume_from_checkpoint: null
num_sanity_val_steps: 0
checkpoint_callback: false
logger: false



In [87]:
# lets modify some trainer configs
# checks if we have GPU available and uses it
config.trainer.gpus = 1 if torch.cuda.is_available() else 0


# for mixed precision training, uncomment the lines below (precision should be set to 16 and amp_level to O1):
# config.trainer.precision = 16
# config.trainer.amp_level = O1

# disable distributed training when using Colab to prevent the errors
config.trainer.accelerator = None

# setup max number of steps to reduce training time for demonstration purposes of this tutorial
# Training stops when max_step or max_epochs is reached (earliest)
config.trainer.max_epochs = 5

# instantiates a PT Trainer object by using the trainer section of the config
trainer = pl.Trainer(**config.trainer)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [88]:
 #The experiment manager of a trainer object can not be set twice. We repeat the trainer creation code again here to prevent getting error when this cell is executed more than once. 
trainer = pl.Trainer(**config.trainer)

# exp_dir specifies the path to store the the checkpoints and also the logs, it's default is "./nemo_experiments"
# You may set it by uncommentig the following line
# config.exp_manager.exp_dir = 'LOG_CHECKPOINT_DIR'

# OmegaConf.to_yaml() is used to create a proper format for printing the trainer config
print(OmegaConf.to_yaml(config.exp_manager))

exp_dir = exp_manager(trainer, config.exp_manager)

# the exp_dir provides a path to the current experiment for easy access
print(exp_dir)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[NeMo W 2021-01-15 13:46:41 exp_manager:562] trainer had a weights_save_path of cwd(). This was ignored.


exp_dir: /home/lucas/PycharmProjects/Enzyme_Classification/DATA_DIR
name: TextClassification
create_tensorboard_logger: true
create_checkpoint_callback: true

[NeMo I 2021-01-15 13:46:41 exp_manager:183] Experiments will be logged at /home/lucas/PycharmProjects/Enzyme_Classification/DATA_DIR/TextClassification/2021-01-15_13-32-33
[NeMo I 2021-01-15 13:46:41 exp_manager:519] TensorboardLogger has been set up
/home/lucas/PycharmProjects/Enzyme_Classification/DATA_DIR/TextClassification/2021-01-15_13-32-33


In [89]:
config.model.language_model.pretrained_model_name = 'distilbert-base-cased'
model = nemo_nlp.models.TextClassificationModel(cfg=config.model, trainer=trainer)
trainer.fit(model)
model.save_to(config.save_to)

Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


[NeMo I 2021-01-15 13:46:43 text_classification_dataset:119] Read 1 examples from /home/lucas/PycharmProjects/Enzyme_Classification/SST-2/train.tsv.
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:232] *** Example ***
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:233] example 0: ['hide', 'new', 'secretions', 'from', 'the', 'parental', 'units']
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:234] subtokens: [CLS] hide new secret ##ions from the parental units [SEP]
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:235] input_ids: 101 4750 1207 3318 5266 1121 1103 22467 2338 102
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:236] segment_ids: 0 0 0 0 0 0 0 0 0 0
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:237] input_mask: 1 1 1 1 1 1 1 1 1 1
[NeMo I 2021-01-15 13:46:43 text_classification_dataset:238] label: 0
[NeMo I 2021-01-15 13:46:43 data_preprocessing:294] Some stats of the lengths of the sequences:
[NeMo I 2021-01-15 13:46:43 dat

RuntimeError: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 3.95 GiB total capacity; 2.51 GiB already allocated; 54.25 MiB free; 2.61 GiB reserved in total by PyTorch)