In [1]:
import json, re

from typing import Any, Dict, List, Tuple
from tqdm import tqdm

import pandas as pd
import evaluate

from transformers import BartTokenizer

import nlp2go

In [2]:
with open("EgeEvalDataset.json", 'r', encoding="utf8") as inp:
    ege_rus = json.load(inp)

with open("EgeEvalDataset_translated.json", 'r', encoding="utf8") as inp:
    ege_eng = json.load(inp)

In [4]:
bdg_model = nlp2go.Model('./BDG_v2/BDG.pt')
bdg_pm_model = nlp2go.Model('./BDG_v2/BDG_PM.pt')
bdg_anpm_model = nlp2go.Model('./BDG_v2/BDG_ANPM.pt')

  torchpack = torch.load(model_path, map_location=device)


===model info===
model_config facebook/bart-base
tags ['seq2seq_0']
type ['seq2seq']
maxlen 1024
epoch 8
loading saved model




===ADD TOKEN===
We have added 0 tokens
Using device: cuda
finish loading
loaded model predict_parameter {}
===model info===
model_config facebook/bart-base
tags ['seq2seq_0']
type ['seq2seq']
maxlen 1024
epoch 10
loading saved model
===ADD TOKEN===
We have added 0 tokens
Using device: cuda
finish loading
loaded model predict_parameter {}
===model info===
model_config facebook/bart-base
tags ['seq2seq_0']
type ['seq2seq']
maxlen 1024
epoch 8
loading saved model
===ADD TOKEN===
We have added 0 tokens
Using device: cuda
finish loading
loaded model predict_parameter {}


In [3]:
def create_model_input(example: Dict[str, Any], model, max_length: int=1024) -> str:
    if example["question"] == "What statement does not correspond to the text?":
        question = "Which of the following statements is NOT TRUE according to the passage?"
    elif example["question"] == "What statement is appropriate to the text?":
        question = "Which of the following statements is TRUE according to the passage?"
    else:
        print(example["question"])
        raise Exception

    text_encoded = model.model.tokenizer.encode(example["reading_text"])
    question_encoded = model.model.tokenizer.encode(question)
    ra_encoded = model.model.tokenizer.encode(example["right_answer"])
    overall_length = len(text_encoded) + len(question_encoded) + len(ra_encoded)

    if overall_length > max_length:
        diff = overall_length - max_length
        text_encoded = text_encoded[1:-diff]
        text = model.model.tokenizer.decode(text_encoded)
        output = f"{text} </s> {question} </s> {example['right_answer']}"
    else:
        output = f"{example['reading_text']} </s> {question} </s> {example['right_answer']}"

    return output

In [5]:
input_ = create_model_input(ege_eng[0], bdg_model)

In [6]:
input_

' On the back of our village, there was a long, boarded room. This is the first time I\'ve heard music in my life, the violin. She was played by Vasya Poliac. What did the music tell me? Something very big. What did she complain about, who she was angry with? Worried and bitter to me. I want to cry because I feel sorry for myself, and I feel sorry for those who sleep in the graveyard! Wasa continued to play, saying: "This music was written by a man who had been deprived of the most precious one. If a man has no mother, no father, but a homeland, he\'s not an orphan yet. Everything goes by: love, regret, grief, even pain from wounds — but it never goes away, and it doesn\'t go away. This music was written by my countryman Oginsky. He wrote at the border saying goodbye to his motherland. He sent her last hello. There has been no composer in the world for a long time, but his pain, his sorrow, love for his own land, which no one can take away, is still alive." "Thank you, Uncle," I whispe

In [7]:
model_outputs = {
    "bdg": [], "bdg_pm": [], "bdg_anpm": []
}
for model_name, model in zip(
    ["bdg", "bdg_pm", "bdg_anpm"], [bdg_model, bdg_pm_model, bdg_anpm_model]
):
    for item in tqdm(ege_eng):
        model_outputs[model_name].append(
            {
                "reading_text": item["reading_text"],
                "question": item["question"],
                "right_answer": item["right_answer"],
                "distractors": item["distractors"],
                "predicted_distractors": model.predict(
                    {"input": create_model_input(item, model)}, decodenum=3
                )["result"]
            }
        )

100%|██████████| 55/55 [01:25<00:00,  1.55s/it]
100%|██████████| 55/55 [01:30<00:00,  1.64s/it]
100%|██████████| 55/55 [01:24<00:00,  1.53s/it]


In [8]:
model_outputs["bdg"][0]

{'reading_text': ' On the back of our village, there was a long, boarded room. This is the first time I\'ve heard music in my life, the violin. She was played by Vasya Poliac. What did the music tell me? Something very big. What did she complain about, who she was angry with? Worried and bitter to me. I want to cry because I feel sorry for myself, and I feel sorry for those who sleep in the graveyard! Wasa continued to play, saying: "This music was written by a man who had been deprived of the most precious one. If a man has no mother, no father, but a homeland, he\'s not an orphan yet. Everything goes by: love, regret, grief, even pain from wounds — but it never goes away, and it doesn\'t go away. This music was written by my countryman Oginsky. He wrote at the border saying goodbye to his motherland. He sent her last hello. There has been no composer in the world for a long time, but his pain, his sorrow, love for his own land, which no one can take away, is still alive." "Thank you,

In [9]:
model_outputs["bdg_pm"][0]

{'reading_text': ' On the back of our village, there was a long, boarded room. This is the first time I\'ve heard music in my life, the violin. She was played by Vasya Poliac. What did the music tell me? Something very big. What did she complain about, who she was angry with? Worried and bitter to me. I want to cry because I feel sorry for myself, and I feel sorry for those who sleep in the graveyard! Wasa continued to play, saying: "This music was written by a man who had been deprived of the most precious one. If a man has no mother, no father, but a homeland, he\'s not an orphan yet. Everything goes by: love, regret, grief, even pain from wounds — but it never goes away, and it doesn\'t go away. This music was written by my countryman Oginsky. He wrote at the border saying goodbye to his motherland. He sent her last hello. There has been no composer in the world for a long time, but his pain, his sorrow, love for his own land, which no one can take away, is still alive." "Thank you,

In [10]:
model_outputs["bdg_anpm"][0]

{'reading_text': ' On the back of our village, there was a long, boarded room. This is the first time I\'ve heard music in my life, the violin. She was played by Vasya Poliac. What did the music tell me? Something very big. What did she complain about, who she was angry with? Worried and bitter to me. I want to cry because I feel sorry for myself, and I feel sorry for those who sleep in the graveyard! Wasa continued to play, saying: "This music was written by a man who had been deprived of the most precious one. If a man has no mother, no father, but a homeland, he\'s not an orphan yet. Everything goes by: love, regret, grief, even pain from wounds — but it never goes away, and it doesn\'t go away. This music was written by my countryman Oginsky. He wrote at the border saying goodbye to his motherland. He sent her last hello. There has been no composer in the world for a long time, but his pain, his sorrow, love for his own land, which no one can take away, is still alive." "Thank you,

In [11]:
with open("model_outputs_bart.json", 'w', encoding="utf8") as outp:
    json.dump(model_outputs, outp, indent=4, ensure_ascii=False)