# Load Libraries

In [None]:
!nvidia-smi

In [None]:
# For finetuning
import os
import torch

# For loading finetuned model
from modules.train.trainer import get_model_tokenizer

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}.')

# Inference Model

## Configure Model

In [None]:
# List of implemented methods
models   = ['t5-base', 'bart-base', 'gpt2']
datasets = ['squad', 'wmt16_en_de', 'imdb']
finetunes = ['full', 'lora', 'adapters']

# Selecting index
model, dataset, finetune = 0, 0, 0

In [None]:
task = {
    "squad": "qa",
    "wmt16_en_de": "translation",
    "imdb": "textsentiment"
}

model_path = f'models/ft-{models[model]}-{finetunes[finetune]}-{task[datasets[dataset]]}'

## Testing Generate

In [None]:
# Load finetuned model
model, tokenizer = get_model_tokenizer(model_path, device)

In [None]:
def generate_output(input, max_length=512):
    input_text = input
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=max_length, truncation=True)

    with torch.no_grad():
        outputs = model.generate(inputs, max_length=64, num_beams=4, early_stopping=True)

    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer