In [1]:
from models.evidence_selection_model import EvidenceSelectionModel
import torch
from transformers import AutoModel, AutoTokenizer, pipeline
from tqdm import tqdm
from pipeline.pipeline import WikiPipeline
from config import PROJECT_DIR
from general_utils.reader import JSONReader
from dataset.def_dataset import DefinitionDataset
from dataset.def_dataset import process_sentence
from datasets import load_dataset
from general_utils.utils import pretty_string_list

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

selection_model_tokenizer = AutoTokenizer.from_pretrained('Snowflake/snowflake-arctic-embed-m-long')
model = AutoModel.from_pretrained('lukasellinger/evidence_selection_model-v1', trust_remote_code=True, add_pooling_layer=False, safe_serialization=True)
selection_model = EvidenceSelectionModel(model).to(device)

# still using base
verification_model=None
verification_model_tokenizer=None

wiki_pipeline = WikiPipeline(selection_model=selection_model,selection_model_tokenizer=selection_model_tokenizer)

<All keys matched successfully>


# Nudossi Examples

In [3]:
data = JSONReader().read(PROJECT_DIR.joinpath('dataset/nudossi_example.json'))
word = data[0].get('context_word')
examples = data[1].values()
print(word)
print(examples)

Nudossi
dict_values(['nicht-existierendes Wort (trinkt man wahrscheinlich Kakao)', 'Nudossi ist eine Marke von Milchmädcheneis.\n\n(Anmerkung: Nudossi ist ein historisches deutsches Eisprodukt, das nicht mehr hergestellt wird. Im Beispielsatz ist es eine Marke von Milchmädcheneis.)', 'Nudossi ist eine Marke von Kaugummi in Deutschland.\n\n(Anmerkung: Nudossi ist ein historischer Bezug, da es in den 1980er-Jahren in der DDR sehr beliebt war. Heutzutage ist die Marke nicht mehr weit verbreitet.)', 'Nudossi ist eine Marke von Schokocreme aus der DDR.\nDie Antwort wäre also: "Nudossi ist eine Marke von Schokocreme" oder einfach "DDR-Schokocreme".', 'fiktive, magische Substanz, ohne Bedeutung außerhalb des Beispiels', 'Es gibt keine bekannte Verwendung des Begriffs „Nudossi“. Möglicherweise ist dies ein Tippfehler oder ein nicht allgemein bekanntes Wort.', 'Im Satz "Kann ich bitte die (das, den) Nudossi haben?" bezieht sich das Wort "Nudossi" auf ein noch zu bestimmtes Objekt, da es in dies

In [9]:
pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-de-en")

for example in examples:
    translation = pipe(example)
    translated_example = 'Nudossi: ' + translation[0].get('translation_text')
    output = wiki_pipeline.verify(word, translated_example)
    print(f"""
-------------------------------
Example: {example}
Translated: {translated_example}
Labels: 
{pretty_string_list(output['factualities'])}
""")


-------------------------------
Example: nicht-existierendes Wort (trinkt man wahrscheinlich Kakao)
Translated: Nudossi: non-existent word (likely to drink cocoa)
Labels: 
('Nudossi is a non-existent word.', <Fact.NOT_SUPPORTED: 1>)
('Nudossi is likely associated with drinking cocoa.', <Fact.SUPPORTED: 0>)



-------------------------------
Example: Nudossi ist eine Marke von Milchmädcheneis.

(Anmerkung: Nudossi ist ein historisches deutsches Eisprodukt, das nicht mehr hergestellt wird. Im Beispielsatz ist es eine Marke von Milchmädcheneis.)
Translated: Nudossi: Nudossi is a brand of milk girl ice cream. (Note: Nudossi is a historical German ice cream product that is no longer produced. In the example sentence, it is a brand of milk girl ice cream.)
Labels: 
('Nudossi is a brand.', <Fact.SUPPORTED: 0>)
('Nudossi is a brand of milk girl ice cream.', <Fact.NOT_SUPPORTED: 1>)



-------------------------------
Example: Nudossi ist eine Marke von Kaugummi in Deutschland.

(Anmerkung: Nud

In [None]:
example1 = 'Nudossi ist eine Marke von Schokocreme aus der DDR.'
translation = pipe(example1)
translated_example1 = translation[0].get('translation_text')
wiki_pipeline.verify(word, translated_example1)

# Selected Documents (BM-25)

In [None]:
dataset_raw = load_dataset("lukasellinger/claim_verification_atomic-v1")

dataset = DefinitionDataset(dataset_raw['test'], None, mode='train', model='claim_verification')
max_evidence_count = 3

In [None]:
same = 0
for entry in tqdm(dataset):
    word = process_sentence(entry['document_id'])
    ev_sentences_long = wiki_pipeline.fetch_evidence(word, only_intro=False)
    ev_sentences_short = wiki_pipeline.fetch_evidence(word, only_intro=True)
    
    if ev_sentences_long and ev_sentences_short:
        selected_evidences_long = wiki_pipeline.select_evidence(entry['claim'], ev_sentences_long, max_evidence_count=max_evidence_count)
        selected_evidences_short = wiki_pipeline.select_evidence(entry['claim'], ev_sentences_short, max_evidence_count=max_evidence_count)
        long_docs = set(doc for (doc, _, _) in selected_evidences_long)
        short_docs = set(doc for (doc, _, _) in selected_evidences_short)
        
        if long_docs == short_docs:
            same += 1

print(f'Same top{max_evidence_count} documents selected: {same / len(dataset)}')

# Selected Sentences in Summary

In [None]:
dataset_raw = load_dataset("lukasellinger/claim_verification_atomic-v1")

dataset = DefinitionDataset(dataset_raw['test'].select(range(50)), None, mode='train', model='claim_verification')

In [None]:
in_summary = 0
not_found = 0
entries_not_in_summary = []
for entry in tqdm(dataset):
    word = process_sentence(entry['document_id'])
    max_summary_line_numbers = wiki_pipeline.mark_summary_sents(word)
    ev_sentences_long = wiki_pipeline.fetch_evidence(word, only_intro=False)
    if ev_sentences_long and max_summary_line_numbers:
        selected_evidences_long = wiki_pipeline.select_evidence(entry['claim'], ev_sentences_long, max_evidence_count=3)
        mark_entry = False
        entry = {
                'claim': entry['claim'],
                'selected_setences': []
        }
        for (doc, line_number, sentence) in selected_evidences_long:
            if max_summary := max_summary_line_numbers.get(doc):
                if max_summary <= int(line_number):
                    in_summary += 1
                else:
                    mark_entry = True
                    entry['selected_sentences'].append((doc, line_number, sentence))
            else:
                not_found += 1
        if mark_entry:
            entries_not_in_summary.append(entry)
            
print(f'Selected Sentences from summary: {(in_summary / len(dataset))*100}:.2f %')
print(f'Not found elements should be 0 is: {(not_found / len(dataset))*100}:.2f %')
print(f'Sentences selected but not in summary:\n{entries_not_in_summary}')

In [None]:
print(in_summary)  # 35 / 51 ???
print(not_found)

# Sentence 0 most of the time in selected sentences

In [None]:
dataset_raw = load_dataset("lukasellinger/claim_verification_atomic-v1")

dataset = DefinitionDataset(dataset_raw['test'].select(range(50)), None, mode='train', model='claim_verification')

In [None]:
from general_utils.utils import plot_graph
from collections import defaultdict

sentence_distribution = defaultdict(int)
zero_in_selections = 0
for entry in tqdm(dataset):
    word = process_sentence(entry['document_id'])
    ev_sentences = wiki_pipeline.fetch_evidence(word, only_intro=True)
    if ev_sentences:
        selected_evidences = wiki_pipeline.select_evidence(entry['claim'], ev_sentences,max_evidence_count=3)

        for (doc, line_number, sentence) in selected_evidences:
            if line_number == 0:
                zero_in_selections += 1 
            sentence_distribution[line_number] += 1

print(f'Zero in selections: {(zero_in_selections / len(dataset))*100}:.2f %')
plot_graph(list(sentence_distribution.keys()), list(sentence_distribution.values()), x_label='Sentence Number', y_label='Amount')

# Wikipedia vs Wiktionary