# Inference

## Libraries

In [None]:
# For loading finetuned model
from modules.train.trainer import get_model_tokenizer

# For testing performance
from modules.inference.inference import run_inference

# For loading test dataset
from modules.data.hfdatasets.squad import load_squad
from modules.data.hfdatasets.wmt import load_wmt
from modules.data.hfdatasets.imdb import load_imdb

# For saving
import json
import torch
import os

## Configuration

In [None]:
# List of implemented methods
models   = ['t5-base', 'bart-base', 'flan-t5-small']
datasets = ['squad', 'wmt16_en_de', 'imdb']
finetunes = ['full', 'lora', 'adapters']

# Selecting index
model_idx, dataset_idx, finetune_idx = 1, 0, 2

In [None]:
task = {
    "squad": "qa",
    "wmt16_en_de": "translation",
    "imdb": "textsentiment"
}

model_path = f'models/ft-{models[model_idx]}-{finetunes[finetune_idx]}-{task[datasets[dataset_idx]]}'

Device

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}.')

Using device: cuda.


# Loading Model & Tokenizer

In [None]:
data2task = {
    "squad": "question_answering",
    "imdb": "text_sentiment_analysis",
    "wmt16_en_de": "english_to_german_translation"
}

# Load finetuned model
model, tokenizer = get_model_tokenizer(model_path, finetunes[finetune_idx], data2task[datasets[dataset_idx]], device)

model.eval()

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): BartForConditionalGeneration(
      (model): BartModel(
        (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
        (encoder): BartEncoder(
          (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
          (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
          (layers): ModuleList(
            (0-5): 6 x BartEncoderLayer(
              (self_attn): BartSdpaAttention(
                (k_proj): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.05, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=768, out_features=16, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(in_features=16, out_features=768, bias=

# Loading Dataset

Getting data config

In [6]:
# get config from file
with open('modules/data/config.json', 'r', encoding='utf-8') as file:
    data_config = json.load(file)

In [None]:
test_dataset = None
test = False

if dataset_idx == 0:
    _, test_dataset, _ = load_squad(test=test, data_config=data_config)
    test_dataset = [
        {
            "input": "question: " + q + " context: " + c,
            "target": "answer: " + a["text"][0]
        }
        for q, c, a in zip(test_dataset["question"], test_dataset["context"], test_dataset["answers"])
    ]

elif dataset_idx == 1:
    _, test_dataset, _ = load_wmt(test=test, data_config=data_config)
    test_dataset = [
        {
            "input": f'translate to german. english: {d["translation"]["en"]}',
            "target": f'german: {d["translation"]["de"]}'
        }
        for d in test_dataset
    ]
else: # dataset == 2
    _, test_dataset, _ = load_imdb(test=test, data_config=data_config)
    test_dataset = [
        {
            "input": "sentiment analysis: " + d['text'],
            "target": "sentiment: " + ("True" if d['label'] == 1 else "False")
        }
        for d in test_dataset
    ]

# Run Evaluation

In [None]:
os.makedirs('prediction', exist_ok=True)

In [None]:
if model_idx == 2 and dataset_idx == 0 and finetune_idx == 0:
    extractive = True
else:
    extractive = False

In [None]:
output_dir = f'prediction/prediction-{models[model_idx]}-{finetunes[finetune_idx]}-{task[datasets[dataset_idx]]}.json'

success = run_inference(model=model, tokenizer=tokenizer, test_dataset=test_dataset, device=device, output_dir=output_dir)

if success:
    print(f'Predicted values stored in {output_dir}')
else:
    print('Error in predicting.')

Processing:   2%|▏         | 211/14016 [03:52<4:09:04,  1.08s/it]

In [None]:
import json

with open(output_dir, 'r', encoding='utf-8') as file:
    model_output = json.load(file)

for i in range(10):
    print(json.dumps(model_output[i], indent=2, ensure_ascii=False))