In [1]:
!pip install pandas transformers guidance numpy evaluate rouge-score nltk py-rouge accelerate

Collecting guidance
  Downloading guidance-0.1.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (223 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.6/223.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py-rouge
  Downloading py_rouge-1.1-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.30.0-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
Collecting diskcache (from guidance

In [2]:
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import json
import nltk
import numpy as np
from guidance import gen
from evaluate import load
import csv
import nltk.translate.bleu_score as bleu
from rouge import Rouge
from transformers import BertTokenizer, BertModel
import torch
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import warnings
from typing import Any, Dict, Tuple
import re

warnings.filterwarnings("ignore", category=UserWarning, message="A new version of the following files was downloaded from")

In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Load Dataset

- id: unique identifier of the example
- source: original model input for paraphrase generation
- hyp1: first alternative paraphrase of the source
- hyp2: second alternative paraphrase of the source
- label: hyp1 or hyp2, based on which of those has been annotated as hallucination
- type: hallucination category assigned.
  - Possible values: addition, named-entity, number, conversion, date, tense, negation, gender, pronoun, antonym, natural

## Trial dataset

This is a small list of examples, provided to help the participants get familiar with the task. Each example contains the following fields:
- id
- source
- hyp1
- hyp2
- label
- type

In [4]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


In [5]:
from datasets import load_dataset

#load the trial data for both English and Swedish
trial_ds = load_dataset("Eloquent/HalluciGen-PG", name="trial")

#load the trial data only for Swedish
trial_ds_sv = load_dataset("Eloquent/HalluciGen-PG", name="trial", split="trial_swedish")

print(trial_ds.keys())

trial_df_en = trial_ds['trial_english'].to_pandas()
trial_df_se= trial_ds['trial_swedish'].to_pandas()

print('English:')

display(trial_df_en.head())

print('Swedish')
display(trial_df_se.head())

Downloading readme:   0%|          | 0.00/3.64k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/8.19k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Generating trial_swedish split:   0%|          | 0/20 [00:00<?, ? examples/s]

Generating trial_english split:   0%|          | 0/16 [00:00<?, ? examples/s]

dict_keys(['trial_swedish', 'trial_english'])
English:


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2


Swedish


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1


In [6]:
print(trial_df_en['type'].unique())


['antonym' 'negation' 'named entity' 'natural' 'addition' 'gender'
 'number' 'pronoun' 'conversion' 'date']


In [7]:
print('English:')

display(trial_df_en.head())

English:


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2


In [8]:
print('English')
row = trial_df_en.iloc[6]  # Selecting the first row
print("Source:", row['source'])
print("hyp1:", row['hyp1'])
print("hyp2:", row['hyp2'])
print("label:",row['label'] )
print("type:",row['type'] )

row = trial_df_en.iloc[5]
print("Source:", row['source'])
print("hyp1:", row['hyp1'])
print("hyp2:", row['hyp2'])
print("label:",row['label'] )
print("type:",row['type'] )

row = trial_df_en.iloc[4]
print("Source:", row['source'])
print("hyp1:", row['hyp1'])
print("hyp2:", row['hyp2'])
print("label:",row['label'] )
print("type:",row['type'] )

English
Source: Madam President, I am speaking on behalf of our colleague, Mr Francis Decourrière, who drafted one of the motions for a resolution.
hyp1: One of the motions for a resolution was drafted by Mr Francis Decourrière.
hyp2: One of the motions for a resolution was drafted by Mrs Francis Decourrière.
label: hyp2
type: gender
Source: This state of affairs has not changed in more than 100 years, but hopefully at some stage - and perhaps soon - change will come.
hyp1: There has been no change in the status quo in over 100 years, but there is hope that change will soon come. 
hyp2: The state of affairs is1-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-6556
label: hyp2
type: addition
Source: We need quite specific legisl

In [9]:
print('Swedish')
row = trial_df_se.iloc[0]  # Selecting the first row
print("Source:", row['source'])
print("hyp1:", row['hyp1'])
print("hyp2:", row['hyp2'])
print("label:",row['label'] )
print("type:",row['type'] )

row = trial_df_se.iloc[1]
print("Source:", row['source'])
print("hyp1:", row['hyp1'])
print("hyp2:", row['hyp2'])
print("label:",row['label'] )
print("type:",row['type'] )

row = trial_df_se.iloc[2]
print("Source:", row['source'])
print("hyp1:", row['hyp1'])
print("hyp2:", row['hyp2'])
print("label:",row['label'] )
print("type:",row['type'] )

Swedish
Source: Men intäkterna från mjukvarulicenser, ett mått som finansanalytiker följer noga, minskade med 21 procent till 107,6 miljoner dollar.
hyp1: Intäkter från programvarulicenser, en metrik som noggrant övervakas av finansiella analytiker, minskade med 21 procent till ett belopp av 107,6 miljoner dollar.
hyp2: Intäkter från programvarulicenser, en metrik som noggrant övervakas av finansiella analytiker, minskade med 42 procent till ett belopp av 107,6 miljoner dollar.
label: hyp2
type: number
Source: Hongkong-universitet samarbetar med universitet, företag och statliga sektorer i Kina för att samordna utbildningsprogram och forskningscentra för att främja högteknologisk forskning, kommersialisering och tekniköverföring.
hyp1: University of Hong Kong samarbetar med olika kinesiska universitet, företag och regeringsdepartement för att samordna utbildningsprogram och forskningscentra med syftet att främja avancerad teknologisk forskning, kommersialisering och tekniköverföring.
h

In [10]:
#load the test data for the generation step in both English and Swedish
data = load_dataset("Eloquent/HalluciGen-PG", "test_detection")

print(data.keys())

data_df_en = data['test_detection_english'].to_pandas()
data_df_se= data['test_detection_swedish'].to_pandas()


Downloading data:   0%|          | 0.00/41.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/38.5k [00:00<?, ?B/s]

Generating test_detection_swedish split:   0%|          | 0/119 [00:00<?, ? examples/s]

Generating test_detection_english split:   0%|          | 0/119 [00:00<?, ? examples/s]

dict_keys(['test_detection_swedish', 'test_detection_english'])


In [11]:
data_df_en

Unnamed: 0,id,source,hyp1,hyp2
0,0,It has enabled us to support and encourage an ...,You can support and encourage an exchange of e...,We can support and encourage an exchange of ex...
1,1,"Therefore, I am calling for an increase in the...",I want to see a decrease in the premiums for a...,I want to see an increase in the premiums for ...
2,2,"In other words, a person may be prevented from...",When there is reason to fear that a violent ac...,When there is reason to fear that a violent ac...
3,3,None of my 34 amendments were adopted and my a...,Our arguments against the ridiculous administr...,My arguments against the ridiculous administra...
4,4,The final key feature of the report is its ori...,The final main feature of the report is its or...,The final features of the report are its orien...
...,...,...,...,...
114,114,I am referring to the current vote but in rela...,The vote on Amendment No 98 will take place be...,The vote on Amendment No 98 will take place af...
115,115,When I became a Member of the European Parliam...,I was struck by the abundance of knowledge whe...,I was struck by the lack of knowledge when I j...
116,116,"Mr President, let me express my thanks for the...","Mr President, I would like to extend a warm we...","Madam President, I would like to extend a warm..."
117,117,We can replace 16 out of 626 Members of the Eu...,"As voters we cannot decide on a new law, but w...","As voters decide on a new law, we can replace ..."


In [12]:
data_df_se

Unnamed: 0,id,source,hyp1,hyp2
0,0,Kvinnor kommer att möta högre bilförsäkringspr...,Det betyder att kvinnor kan förvänta sig att b...,Kvinnor kommer att få högre premier för bilför...
1,1,"Rörelseintäkterna var 1,45 miljarder dollar, e...","Intäkterna från rörelsen var 1,45 miljarder do...","Intäkterna från rörelsen var 1,45 miljarder do..."
2,2,"Mandela tillbaka på sjukhus i ""allvarligt men ...",Mandela återvänder inte till sjukhuset med all...,Mandela återvänder till sjukhuset med allvarli...
3,3,Egypten fryser Muslimska brödraskapets tillgån...,Den egyptiska regeringen har fryst tillgångar ...,Egypten fryser tillgångarna för Muslimska bröd...
4,4,"P-pillret, som de kallar ""polypiller"", skulle ...","Medicinen som vanligtvis kallas ""poly pill"" sä...","Medicinen som vanligtvis kallas ""poly pill"" sä..."
...,...,...,...,...
114,114,Texaner som dignar under skyhöga hemförsäkring...,Invånare i Texas som har belastats med orimlig...,Invånare i Texas som har belastats med orimlig...
115,115,Uppdaterad - Två explosioner nära mållinjen i ...,Uppdatering: Nära målet i Boston Marathon intr...,Två explosioner inträffade under loppet av Bos...
116,116,Anthony Weiner halkar ned till fjärde plats i ...,Anthony Weiner halkar till fjärde plats i ny o...,Enligt en ny opinionsundersökning från Quinnip...
117,117,Östtimor förbjuder kampsportsklubbar på grund ...,I Östtimor ska man förbjuda klubbar som utövar...,I Östtimor har man förbjudit klubbar som utöva...


In [13]:
# Replace "'" with '//"'
data_df_en['source'] = data_df_en['source'].str.replace("'", "\\'")

In [14]:
data_df_se['source'] = data_df_se['source'].str.replace("'", "\\'")

In [15]:
display(data_df_se)

Unnamed: 0,id,source,hyp1,hyp2
0,0,Kvinnor kommer att möta högre bilförsäkringspr...,Det betyder att kvinnor kan förvänta sig att b...,Kvinnor kommer att få högre premier för bilför...
1,1,"Rörelseintäkterna var 1,45 miljarder dollar, e...","Intäkterna från rörelsen var 1,45 miljarder do...","Intäkterna från rörelsen var 1,45 miljarder do..."
2,2,"Mandela tillbaka på sjukhus i ""allvarligt men ...",Mandela återvänder inte till sjukhuset med all...,Mandela återvänder till sjukhuset med allvarli...
3,3,Egypten fryser Muslimska brödraskapets tillgån...,Den egyptiska regeringen har fryst tillgångar ...,Egypten fryser tillgångarna för Muslimska bröd...
4,4,"P-pillret, som de kallar ""polypiller"", skulle ...","Medicinen som vanligtvis kallas ""poly pill"" sä...","Medicinen som vanligtvis kallas ""poly pill"" sä..."
...,...,...,...,...
114,114,Texaner som dignar under skyhöga hemförsäkring...,Invånare i Texas som har belastats med orimlig...,Invånare i Texas som har belastats med orimlig...
115,115,Uppdaterad - Två explosioner nära mållinjen i ...,Uppdatering: Nära målet i Boston Marathon intr...,Två explosioner inträffade under loppet av Bos...
116,116,Anthony Weiner halkar ned till fjärde plats i ...,Anthony Weiner halkar till fjärde plats i ny o...,Enligt en ny opinionsundersökning från Quinnip...
117,117,Östtimor förbjuder kampsportsklubbar på grund ...,I Östtimor ska man förbjuda klubbar som utövar...,I Östtimor har man förbjudit klubbar som utöva...


In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
!pip install -U "transformers==4.38.1" --upgrade
!pip install accelerate

# Load the model

In [16]:
#load model Llama3
from transformers import AutoTokenizer, pipeline
import torch

model = "meta-llama/Meta-Llama-3-8B-Instruct"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)



tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Prompt Version 1

In [24]:
def detect_hallu(source, hyp1, hyp2):
   # Generate the prompt

  answer_format = {"label": " "}

  user_prompt = f'''
    You are a researcher investigating a new phenomenon.
    You have gathered data (source) and formulated two competing hypotheses (hyp1 and hyp2) to explain it.
    Identify the hypothesis that contradicts the information provided in the given source.
    Provide the result in the following format: {answer_format}.

    source: {source}
    hyp1: {hyp1}
    hyp2: {hyp2}

    Result:
  '''

  messages = [
    {"role": "system", "content": "You are a hallucination detector. Your task is to detect which hypothesis is a hallucination."},
    {"role": "user", "content": user_prompt},
  ]

  prompt = pipeline.tokenizer.apply_chat_template(
          messages,
          tokenize=False,
          add_generation_prompt=True
  )

  terminators = [
      pipeline.tokenizer.eos_token_id,
      pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
  ]

  outputs = pipeline(
      prompt,
      max_new_tokens=150,
      eos_token_id=terminators,
      do_sample=True,
      temperature=0.6,
      top_p=0.9,
  )
  print("Label: ", outputs[0]["generated_text"][len(prompt):])
  hallu_label = outputs[0]["generated_text"][len(prompt):]

  return hallu_label

In [None]:
def detect_hallu_se(source, hyp1, hyp2):
   # Generate the prompt

    answer_format = {"label": ""}

    user_prompt = f'''
    Givet en ”src” och två hypoteser ”hyp1” och ”hyp2” är din uppgift att upptäcka vilken av de två hypoteserna (”label”) som inte stöds av källan.
    Ge resultatet i följande format: {answer_format}.

    Src: {source}
    hyp1 : {hyp1}
    hyp2 : {hyp2}

    Resultat:
    '''

    messages = [
      {"role": "system", "content": "Du är en hallucinationsdetektor. Din uppgift är att upptäcka vilken hypotes som är en hallucination."},
      {"role": "user", "content": user_prompt},
    ]

    prompt = pipeline.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
    )

    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = pipeline(
        prompt,
        max_new_tokens=150,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    print("Label: ", outputs[0]["generated_text"][len(prompt):])
    hallu_label = outputs[0]["generated_text"][len(prompt):]


    return hallu_label

In [None]:
def detect_hallu_2(source, hyp1, hyp2):

    # Generate the prompt

    answer_format = {"label": ""}
    user_prompt =  f'''
    As an AI model, your task is to determine which of the two hypotheses, "hyp1" and "hyp2," is not supported by the given source, labeled as either "hyp1" or "hyp2."

    Now, with the provided "src" and two hypotheses "hyp1" and "hyp2," identify the hypothesis that is not supported by the source.

    Src: {source}
    hyp1: {hyp1}
    hyp2: {hyp2}

    DO NOT GENERATE CODE AND ONLY ANSWER WITH THE GIVEN FORMAT USING THE FOLLOWING FORMAT: {answer_format}.



    Example 1:
    Src: The population has declined in some 210 of the 280 municipalities in Sweden, mainly in inland central and northern Sweden.
    hyp1: In the majority of Sweden's 280 municipalities, the population has gone up.
    hyp2: In the majority of Sweden's 280 municipalities, the population has gone down.

    Result:
    {"label": "hyp1"}

    Explanation: Hyp1 suggests that the population has increased in most municipalities, which contradicts the information provided in the source. The source clearly states that the population has declined in some municipalities, not increased.
    '''

    messages = [
      {"role": "system", "content": "You are a hallucination detector. Your task is to detect which hypothesis is a hallucination."},
      {"role": "user", "content": user_prompt},
    ]

    prompt = pipeline.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
    )

    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = pipeline(
        prompt,
        max_new_tokens=150,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    print("Label: ", outputs[0]["generated_text"][len(prompt):])
    hallu_label = outputs[0]["generated_text"][len(prompt):]


    return hallu_label

In [18]:
import json
import re

def extract_json_data(result_text):

    print(result_text)
    # Find the start and end indices of the JSON data
    start_index = result_text.find('{')
    end_index = result_text.find('}') + 1  # Add 1 to include the closing brace

    # Extract the JSON data from the string
    result_text = result_text[start_index:end_index]
    #print("result text: ", result_text)

    # Replace single quotes used as delimiters with double quotes
    result_text = json.loads(result_text.replace("'", "\""))
    print("result_text:",result_text)
    return result_text

In [19]:
source = "My mom's food is better than my sister's."
hyp1= "My mom's food is amazing"
hyp2="My sister's food is better than my mom's"
result = detect_hallu(source,hyp1,hyp2)
print()
print(result)
result = extract_json_data(result)
print()

print("result:",result)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}

result: {'label': 'hyp2'}


## Generate dataset

In [20]:
import os
import numpy as np
import pandas as pd

#save results
def save_results(detect_hallu, df, csv_filename):
    skipped_rows = []
    # Split DataFrame into chunks of 10 rows
    chunks = np.array_split(df, len(df) // 10 if len(df) % 10 == 0 else len(df) // 10 + 1)

    for chunk in chunks:
        # Iterate through each row in the chunk
        for index, row in chunk.iterrows():
            source = row['source']
            hyp1 = row["hyp1"]
            hyp2 = row["hyp2"]

            try:
                # Generate translation
                result = detect_hallu(source, hyp1, hyp2)

                print(result)
                extracted_hallu = extract_json_data(result)

                hallu_label = extracted_hallu["label"]


                # Update DataFrame with new values
                chunk.at[index, "prediction"] = hallu_label
            except:
                print(f"Skipping row {index} due to error in data extraction.")
                skipped_rows.append(index)
                continue

        # Append chunk to CSV file
        if not os.path.isfile(csv_filename):
            chunk.to_csv(csv_filename, mode='w', header=True, index=False)
        else:
            chunk.to_csv(csv_filename, mode='a', header=False, index=False)

        # Load CSV file
        df = pd.read_csv(csv_filename)
        #display(df)
    return skipped_rows

# Evaluate prompts on the trial dataset

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/results_llama3_prompt_narjes_en_v1_trial.csv"

save_results(detect_hallu, trial_df_en, csv_filename)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("I think I need to listen to everyone's views in order to be an ineffective President") contradicts the information provided in the source, which states that the speaker believes listening to everyone's views is necessary to be an effective President.
{'label': 'hyp1'}

Hypothesis 1 ("I think I need to listen to everyone's views in order to be an ineffective President") contradicts the information provided in the source, which states that the speaker believes listening to everyone's views is necessary to be an effective President.
{'label': 'hyp1'}

Hypothesis 1 ("I think I need to listen to everyone's views in order to be an ineffective President") contradicts the information provided in the source, which states that the speaker believes listening to everyone's views is necessary to be an effective President.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, which states that Agenda 2000 lacks a chapter on renewable energy. Agenda 2030 is not mentioned in the source, so it's impossible to determine if it includes a chapter on renewable energy or not. Therefore, hypothesis 1 is the one that contradicts the given information.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, which states that Agenda 2000 lacks a chapter on renewable energy. Agenda 2030 is not mentioned in the source, so it's impossible to determine if it includes a chapter on renewable energy or not. Therefore, hypothesis 1 is the one that contradicts the given information.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, which states that Agenda 2000 lacks a chapter on renewable energy. Agenda 2030 is not mentioned in the source, so it's impossible to determine if it includes a chapter on renewable energy or not. Th

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the speaker is speaking on behalf of Mr Francis Decourrière, implying that the drafter of the motion is a male, not a female.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the speaker is speaking on behalf of Mr Francis Decourrière, implying that the drafter of the motion is a male, not a female.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the speaker is speaking on behalf of Mr Francis Decourrière, implying that the drafter of the motion is a male, not a female.
result_text: {'label': 'hyp2'}
Label:  {'label': 'hyp2'}

Explanation: According to the source, Amendment No 4 omits the expression 'police authorities', which means that the expression is still present in the French version. Hyp2 contradicts this information by stating that the expression is excluded from 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the EU wants this information to enter into force within a period of three years from 1 July 1998, not thirty years.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the EU wants this information to enter into force within a period of three years from 1 July 1998, not thirty years.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the EU wants this information to enter into force within a period of three years from 1 July 1998, not thirty years.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the speaker explicitly states "I did prepare a speech" and "many of my points have already been excellently made by previous speakers", indicating that they are the one who prepared the speech, not "they".
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the speaker explicitly states "I did prepare a speech" and "many of my points have already been excellently made by previous speakers", indicating that they are the one who prepared the speech, not "they".
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the speaker explicitly states "I did prepare a speech" and "many of my points have already been excellently made by previous speakers", indicating that they are the one who prepared the speech, not "they".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("We published a press release that dealt with the questions you raised.") contradicts the information provided in the source, which states "dealing with the questions you have raised", not "you raised".
{'label': 'hyp2'}

Hypothesis 2 ("We published a press release that dealt with the questions you raised.") contradicts the information provided in the source, which states "dealing with the questions you have raised", not "you raised".
{'label': 'hyp2'}

Hypothesis 2 ("We published a press release that dealt with the questions you raised.") contradicts the information provided in the source, which states "dealing with the questions you have raised", not "you raised".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, which only mentions "study days", not "study months".
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, which only mentions "study days", not "study months".
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, which only mentions "study days", not "study months".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, as it states the losses were in the amount of approximately 15 million dollars, whereas the source mentions the losses were in the amount of approximately 15 million euros.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, as it states the losses were in the amount of approximately 15 million dollars, whereas the source mentions the losses were in the amount of approximately 15 million euros.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, as it states the losses were in the amount of approximately 15 million dollars, whereas the source mentions the losses were in the amount of approximately 15 million euros.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source mentions a dual ceiling, which implies that there are two options: a 20% rate of taxation on interest payments or the supply of information. Hypothesis 2 only mentions one option, which is a 20% rate of taxation on interest payments.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source mentions a dual ceiling, which implies that there are two options: a 20% rate of taxation on interest payments or the supply of information. Hypothesis 2 only mentions one option, which is a 20% rate of taxation on interest payments.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source mentions a dual ceiling, which implies that there are two options: a 20% rate of taxation on interest payments or the supply of information. Hypothesis 2 only mentions one option, which is a 20% rate of taxation on interest paymen

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 1 (hyp1) is supported by the source, as it states that there is no other way to clean up the beaches on the North Sea and Baltic coasts, which matches the information provided.

Hypothesis 2 (hyp2) contradicts the source, as it mentions the Bothnian Bay coastline instead of the Baltic coasts, which is not mentioned in the source.
{'label': 'hyp2'}

Hypothesis 1 (hyp1) is supported by the source, as it states that there is no other way to clean up the beaches on the North Sea and Baltic coasts, which matches the information provided.

Hypothesis 2 (hyp2) contradicts the source, as it mentions the Bothnian Bay coastline instead of the Baltic coasts, which is not mentioned in the source.
{'label': 'hyp2'}

Hypothesis 1 (hyp1) is supported by the source, as it states that there is no other way to clean up the beaches on the North Sea and Baltic coasts, which matches the information provided.

Hypothesis 2 (hyp2) contradicts the source, as it mentions t

[]

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/results_llama3_prompt_narjes_en_v1_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/classification_report_llama3_prompt_narjes_en_v1_trial.txt", "w") as text_file:
    text_file.write(report)


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp2
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


              precision    recall  f1-score   support

        hyp1       1.00      0.67      0.80         9
        hyp2       0.70      1.00      0.82         7

    accuracy                           0.81        16
   macro avg       0.85      0.83      0.81        16
weighted avg       0.87      0.81      0.81        16



326

In [None]:
#csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/results_llama3_en_v2_trial.csv"


#save_results(detect_hallu_2, trial_df_en, csv_filename)

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/results_llama3_prompt_narjes_en_v2_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/classification_report_llama3_prompt_narjes_en_v2_trial.txt", "w") as text_file:
    text_file.write(report)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/results_llama3_prompt_narjes_se_v1_trial.csv"


save_results(detect_hallu, trial_df_se, csv_filename)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the revenue from software licenses decreased by 21%, not 42%.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the revenue from software licenses decreased by 21%, not 42%.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the revenue from software licenses decreased by 21%, not 42%.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2 is a hallucination'}

The source text does not mention "regeringsdepartement" (government department) which is present in hyp2 but not in hyp1 or the source text. Therefore, hyp2 contradicts the information provided in the source and is considered a hallucination.
{'label': 'hyp2 is a hallucination'}

The source text does not mention "regeringsdepartement" (government department) which is present in hyp2 but not in hyp1 or the source text. Therefore, hyp2 contradicts the information provided in the source and is considered a hallucination.
{'label': 'hyp2 is a hallucination'}

The source text does not mention "regeringsdepartement" (government department) which is present in hyp2 but not in hyp1 or the source text. Therefore, hyp2 contradicts the information provided in the source and is considered a hallucination.
result_text: {'label': 'hyp2 is a hallucination'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the counties with population decline are Vermillion, Posey, and Madison.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the counties with population decline are Vermillion, Posey, and Madison.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the counties with population decline are Vermillion, Posey, and Madison.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which only mentions that Google is presenting a prototype for a self-driving car, without mentioning any details about the car being able to navigate complex traffic environments without human oversight.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which only mentions that Google is presenting a prototype for a self-driving car, without mentioning any details about the car being able to navigate complex traffic environments without human oversight.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which only mentions that Google is presenting a prototype for a self-driving car, without mentioning any details about the car being able to navigate complex traffic environments without human oversight.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

The source states that the proposal says that a woman who undergoes such an abortion cannot be prosecuted. Hyp1 contradicts this information by stating that the proposal says a woman who undergoes such an abortion can be prosecuted, which is the opposite of what the source claims. Hyp2, on the other hand, is consistent with the source and states that the proposal says a woman who undergoes such an abortion cannot be prosecuted, which is in line with the information provided in the source.
{'label': 'hyp1'}

The source states that the proposal says that a woman who undergoes such an abortion cannot be prosecuted. Hyp1 contradicts this information by stating that the proposal says a woman who undergoes such an abortion can be prosecuted, which is the opposite of what the source claims. Hyp2, on the other hand, is consistent with the source and states that the proposal says a woman who undergoes such an abortion cannot be prosecuted, which is in line with the in

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

The source states "Spaniens prinsessa" (Spain's princess), which implies that the subject is a female. Hyp2, on the other hand, states "Spanska prinsen" (Spain's prince), which implies that the subject is a male. Since the source does not mention a prince, hyp2 contradicts the information provided in the source and is therefore the hallucination.
{'label': 'hyp2'}

The source states "Spaniens prinsessa" (Spain's princess), which implies that the subject is a female. Hyp2, on the other hand, states "Spanska prinsen" (Spain's prince), which implies that the subject is a male. Since the source does not mention a prince, hyp2 contradicts the information provided in the source and is therefore the hallucination.
{'label': 'hyp2'}

The source states "Spaniens prinsessa" (Spain's princess), which implies that the subject is a female. Hyp2, on the other hand, states "Spanska prinsen" (Spain's prince), which implies that the subject is a male. Since the source does no

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states "Mannen använder en slägga för att bryta betongblocket som finns på den andre mannen", which implies that the concrete block is on top of the other man. Hyp1 is consistent with this information, as it states that the man is using a sledgehammer to break the concrete block that is lying on top of the other man. On the other hand, Hyp2 contradicts the information in the source, as it suggests that the concrete block is under the other man, which is not mentioned in the source.
{'label': 'hyp2'}

Explanation: The source states "Mannen använder en slägga för att bryta betongblocket som finns på den andre mannen", which implies that the concrete block is on top of the other man. Hyp1 is consistent with this information, as it states that the man is using a sledgehammer to break the concrete block that is lying on top of the other man. On the other hand, Hyp2 contradicts the information in the source, as it suggests that the concrete 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that Nordkorea "carnar utlänningar i söder" which translates to "warns foreigners in the south". This information directly contradicts hypothesis 2, which states that Nordkorea "varnar" (warns) foreigners in the south. Therefore, hypothesis 2 is the one that contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that Nordkorea "carnar utlänningar i söder" which translates to "warns foreigners in the south". This information directly contradicts hypothesis 2, which states that Nordkorea "varnar" (warns) foreigners in the south. Therefore, hypothesis 2 is the one that contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that Nordkorea "carnar utlänningar i söder" which translates to "warns foreigners in the south". This information directly contradicts hypothesis 2, which states that Nordkorea "varnar" (warns) foreigners in the south. T

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that the man with a gun killed at least 7 people and injured several others, whereas the source states that the armed man was among the 7 dead.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that the man with a gun killed at least 7 people and injured several others, whereas the source states that the armed man was among the 7 dead.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that the man with a gun killed at least 7 people and injured several others, whereas the source states that the armed man was among the 7 dead.
result_text: {'label': 'hyp2'}
Label:  {'label': 'hyp2'}

Explanation: The source states that Iran's nuclear weapon negotiations are entering their third day, which implies that the discussions have not yet reached their fourth day. Therefore, hypothesis 2, which state

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that AstraZeneca is buying out Bristol-Myers Squibb from the diabetes alliance, not the other way around.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that AstraZeneca is buying out Bristol-Myers Squibb from the diabetes alliance, not the other way around.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that AstraZeneca is buying out Bristol-Myers Squibb from the diabetes alliance, not the other way around.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source because the source only mentions that a Greek right-wing leader has been arrested in anticipation of a trial, without mentioning any involvement in violent protests, a murder, or being a former member of a nationalist party.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source because the source only mentions that a Greek right-wing leader has been arrested in anticipation of a trial, without mentioning any involvement in violent protests, a murder, or being a former member of a nationalist party.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source because the source only mentions that a Greek right-wing leader has been arrested in anticipation of a trial, without mentioning any involvement in violent protests, a murder, or being a former member of a nationalist party.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("Peres i Israel ger en uppmaning att återgå till fredssamtalen") contradicts the information provided in the source, which states that Peres "uppmanade på måndagen världssamfundet att återuppta fredsförhandlingarna med palestinierna", indicating that Peres addressed the world community, not just Israel.
{'label': 'hyp1'}

Hypothesis 1 ("Peres i Israel ger en uppmaning att återgå till fredssamtalen") contradicts the information provided in the source, which states that Peres "uppmanade på måndagen världssamfundet att återuppta fredsförhandlingarna med palestinierna", indicating that Peres addressed the world community, not just Israel.
{'label': 'hyp1'}

Hypothesis 1 ("Peres i Israel ger en uppmaning att återgå till fredssamtalen") contradicts the information provided in the source, which states that Peres "uppmanade på måndagen världssamfundet att återuppta fredsförhandlingarna med palestinierna", indicating that Peres addressed the world commun

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source mentions "kongressen" (Swedish for "Congress"), but hyp1 uses "underhuset" (Swedish for "House of Representatives"), which is a different part of the legislative branch. Hyp2 is the only one that matches the source's terminology, making it the correct hypothesis.
{'label': 'hyp2'}

Explanation: The source mentions "kongressen" (Swedish for "Congress"), but hyp1 uses "underhuset" (Swedish for "House of Representatives"), which is a different part of the legislative branch. Hyp2 is the only one that matches the source's terminology, making it the correct hypothesis.
{'label': 'hyp2'}

Explanation: The source mentions "kongressen" (Swedish for "Congress"), but hyp1 uses "underhuset" (Swedish for "House of Representatives"), which is a different part of the legislative branch. Hyp2 is the only one that matches the source's terminology, making it the correct hypothesis.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

The source states that Pentagon för första gången pekade ut en officer, Dallager, för att han inte tog upp skandalen, which implies that it was the Pentagon that identified Dallager, not the FBI. Therefore, hypothesis 1, which states that FBI offentligt identifierade en officer, Dallager, för första gången, contradicts the information provided in the source and is the hallucination.
{'label': 'hyp1'}

The source states that Pentagon för första gången pekade ut en officer, Dallager, för att han inte tog upp skandalen, which implies that it was the Pentagon that identified Dallager, not the FBI. Therefore, hypothesis 1, which states that FBI offentligt identifierade en officer, Dallager, för första gången, contradicts the information provided in the source and is the hallucination.
{'label': 'hyp1'}

The source states that Pentagon för första gången pekade ut en officer, Dallager, för att han inte tog upp skandalen, which implies that it was the Pentagon that i

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the microphone was cut off twice, not five times.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the microphone was cut off twice, not five times.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the microphone was cut off twice, not five times.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("Förare i spansk tågolycka utfrågas av domare.") is a hallucination because the original source ("Förare i spansk tågolycka frågas ut av domaren.") specifically states that the driver is being questioned by a judge, not just any domare (which is a general term for a judge or magistrate).
{'label': 'hyp2'}

Hypothesis 2 ("Förare i spansk tågolycka utfrågas av domare.") is a hallucination because the original source ("Förare i spansk tågolycka frågas ut av domaren.") specifically states that the driver is being questioned by a judge, not just any domare (which is a general term for a judge or magistrate).
{'label': 'hyp2'}

Hypothesis 2 ("Förare i spansk tågolycka utfrågas av domare.") is a hallucination because the original source ("Förare i spansk tågolycka frågas ut av domaren.") specifically states that the driver is being questioned by a judge, not just any domare (which is a general term for a judge or magistrate).
result_text: {'label': 'hy

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 states that the Iranian government declared that the country's nuclear energy program "used to be" peaceful, which contradicts the information provided in the source, which states that the Iranian government declared that the country's nuclear energy program "will remain" peaceful.
{'label': 'hyp2'}

Hypothesis 2 states that the Iranian government declared that the country's nuclear energy program "used to be" peaceful, which contradicts the information provided in the source, which states that the Iranian government declared that the country's nuclear energy program "will remain" peaceful.
{'label': 'hyp2'}

Hypothesis 2 states that the Iranian government declared that the country's nuclear energy program "used to be" peaceful, which contradicts the information provided in the source, which states that the Iranian government declared that the country's nuclear energy program "will remain" peaceful.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that the speaker was employed by a company that the listener hired to erase their own memory, whereas the source states that the listener hired a company to erase the speaker's memory.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that the speaker was employed by a company that the listener hired to erase their own memory, whereas the source states that the listener hired a company to erase the speaker's memory.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that the speaker was employed by a company that the listener hired to erase their own memory, whereas the source states that the listener hired a company to erase the speaker's memory.
result_text: {'label': 'hyp2'}
Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states that Abu B

[]

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/results_llama3_prompt_narjes_se_v1_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/trial/classification_report_llama3_prompt_narjes_se_v1_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2 is a hallucination
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp2
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp2
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp2
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


                         precision    recall  f1-score   support

                   hyp1       0.67      0.22      0.33         9
                   hyp2       0.62      0.91      0.74        11
hyp2 is a hallucination       0.00      0.00      0.00         0

               accuracy                           0.60        20
              macro avg       0.43      0.38      0.36        20
           weighted avg       0.64      0.60      0.56        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


457

# Generate predictions on the test data

## English

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/results_llama3_prompt_narjes_en_v1.csv"

save_results(detect_hallu, data_df_en, csv_filename)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("We can support...") contradicts the information provided in the source, which states "It has enabled us to support...". The use of "It has enabled" implies a past action or state, whereas "We can support" implies a future or present possibility, which is not supported by the source.
{'label': 'hyp2'}

Hypothesis 2 ("We can support...") contradicts the information provided in the source, which states "It has enabled us to support...". The use of "It has enabled" implies a past action or state, whereas "We can support" implies a future or present possibility, which is not supported by the source.
{'label': 'hyp2'}

Hypothesis 2 ("We can support...") contradicts the information provided in the source, which states "It has enabled us to support...". The use of "It has enabled" implies a past action or state, whereas "We can support" implies a future or present possibility, which is not supported by the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states "their victim", indicating that the person is male. Hyp2, however, states "her victim", which contradicts the information provided in the source and is a hallucination.
{'label': 'hyp2'}

Explanation: The source states "their victim", indicating that the person is male. Hyp2, however, states "her victim", which contradicts the information provided in the source and is a hallucination.
{'label': 'hyp2'}

Explanation: The source states "their victim", indicating that the person is male. Hyp2, however, states "her victim", which contradicts the information provided in the source and is a hallucination.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("The final features of the report are its orientation and two instances") contradicts the information provided in the source, which states that the final key feature of the report is its orientation, and it aims to protest against certain drifts. Hypothesis 1 accurately summarizes the information in the source, whereas Hypothesis 2 introduces new information that is not present in the source.
{'label': 'hyp2'}

Hypothesis 2 ("The final features of the report are its orientation and two instances") contradicts the information provided in the source, which states that the final key feature of the report is its orientation, and it aims to protest against certain drifts. Hypothesis 1 accurately summarizes the information in the source, whereas Hypothesis 2 introduces new information that is not present in the source.
{'label': 'hyp2'}

Hypothesis 2 ("The final features of the report are its orientation and two instances") contradicts the information

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 specifically mentions firefighters or security officers, which is not mentioned in the source. The source only talks about "those whose work does not fit into the usual pattern", without providing any specific examples. Hypothesis 1 is consistent with the source, as it only repeats the same idea without adding any additional information.
{'label': 'hyp2'}

Hypothesis 2 specifically mentions firefighters or security officers, which is not mentioned in the source. The source only talks about "those whose work does not fit into the usual pattern", without providing any specific examples. Hypothesis 1 is consistent with the source, as it only repeats the same idea without adding any additional information.
{'label': 'hyp2'}

Hypothesis 2 specifically mentions firefighters or security officers, which is not mentioned in the source. The source only talks about "those whose work does not fit into the usual pattern", without providing any specific exampl

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

The source states that the substantial fall in the average earnings of the Palestinians is proof that the sums have not produced the desired effect. Hyp1, on the other hand, suggests that the fall in average earnings of Egyptians is proof that the sums have not produced the desired effect, which contradicts the information provided in the source.
{'label': 'hyp1'}

The source states that the substantial fall in the average earnings of the Palestinians is proof that the sums have not produced the desired effect. Hyp1, on the other hand, suggests that the fall in average earnings of Egyptians is proof that the sums have not produced the desired effect, which contradicts the information provided in the source.
{'label': 'hyp1'}

The source states that the substantial fall in the average earnings of the Palestinians is proof that the sums have not produced the desired effect. Hyp1, on the other hand, suggests that the fall in average earnings of Egyptians is proo

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("The Berlin summit is important to the future of the European Union") contradicts the information provided in the source, which states that the Berlin summit is of particular importance for the future of the European Union, implying that the future of the European Union is important to the Berlin summit, not the other way around.
{'label': 'hyp2'}

Hypothesis 2 ("The Berlin summit is important to the future of the European Union") contradicts the information provided in the source, which states that the Berlin summit is of particular importance for the future of the European Union, implying that the future of the European Union is important to the Berlin summit, not the other way around.
{'label': 'hyp2'}

Hypothesis 2 ("The Berlin summit is important to the future of the European Union") contradicts the information provided in the source, which states that the Berlin summit is of particular importance for the future of the European Union, imply

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("Because of this, it's not possible for me to promise the chairman of the Committee on Budgets that there would be 210 % funding for the financial programme.") contradicts the information provided in the source, which states that the speaker cannot promise 100% funding, but does not mention anything about 210% funding.
{'label': 'hyp2'}

Hypothesis 2 ("Because of this, it's not possible for me to promise the chairman of the Committee on Budgets that there would be 210 % funding for the financial programme.") contradicts the information provided in the source, which states that the speaker cannot promise 100% funding, but does not mention anything about 210% funding.
{'label': 'hyp2'}

Hypothesis 2 ("Because of this, it's not possible for me to promise the chairman of the Committee on Budgets that there would be 210 % funding for the financial programme.") contradicts the information provided in the source, which states that the speaker cannot pr

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2 is a hallucination'}

Explanation: The source states "as from 2005", which implies that the type approval rules started being effective from 2005, whereas hyp2 states "as of 2005", which implies that the rules were already in place before 2005. This is a contradiction with the information provided in the source, making hyp2 a hallucination.
{'label': 'hyp2 is a hallucination'}

Explanation: The source states "as from 2005", which implies that the type approval rules started being effective from 2005, whereas hyp2 states "as of 2005", which implies that the rules were already in place before 2005. This is a contradiction with the information provided in the source, making hyp2 a hallucination.
{'label': 'hyp2 is a hallucination'}

Explanation: The source states "as from 2005", which implies that the type approval rules started being effective from 2005, whereas hyp2 states "as of 2005", which implies that the rules were already in place before 2005. This is a con

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

The source mentions Leonard Peltier, not Greg Summers, and does not mention Gregory Winters. Therefore, hyp2 contradicts the information provided in the source.
{'label': 'hyp2'}

The source mentions Leonard Peltier, not Greg Summers, and does not mention Gregory Winters. Therefore, hyp2 contradicts the information provided in the source.
{'label': 'hyp2'}

The source mentions Leonard Peltier, not Greg Summers, and does not mention Gregory Winters. Therefore, hyp2 contradicts the information provided in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("Since then, we have restricted air transport, road transport and sea transport.") contradicts the information provided in the source, which states that "we have liberalised sea transport, air transport and road transport". This suggests that the opposite of restriction, which is liberalisation, has occurred.
{'label': 'hyp1'}

Hypothesis 1 ("Since then, we have restricted air transport, road transport and sea transport.") contradicts the information provided in the source, which states that "we have liberalised sea transport, air transport and road transport". This suggests that the opposite of restriction, which is liberalisation, has occurred.
{'label': 'hyp1'}

Hypothesis 1 ("Since then, we have restricted air transport, road transport and sea transport.") contradicts the information provided in the source, which states that "we have liberalised sea transport, air transport and road transport". This suggests that the opposite of restriction,

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("The countries that are still in membership negotiations are being discriminated against.") contradicts the information provided in the source, which states that these countries are "being discriminated against" because they are "still in the process of membership negotiations, but which in the meantime are being discriminated against." This implies that the discrimination is not solely because of the membership negotiations, but also because of the fact that they are still in the process, which is not captured by Hypothesis 2.
{'label': 'hyp2'}

Hypothesis 2 ("The countries that are still in membership negotiations are being discriminated against.") contradicts the information provided in the source, which states that these countries are "being discriminated against" because they are "still in the process of membership negotiations, but which in the meantime are being discriminated against." This implies that the discrimination is not solely be

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2 is a hallucination'}

The source explicitly asks about the initiative of Rambouillet, and both hypotheses (hyp1 and hyp2) are trying to answer that question. However, the source does not provide any information that contradicts either of the hypotheses. Therefore, hyp1 is a valid hypothesis that is in line with the source. On the other hand, hyp2 is essentially a duplicate of hyp1 and does not add any new information. It can be considered a hallucination as it does not provide any additional insight or contradict the source in any way.
{'label': 'hyp2 is a hallucination'}

The source explicitly asks about the initiative of Rambouillet, and both hypotheses (hyp1 and hyp2) are trying to answer that question. However, the source does not provide any information that contradicts either of the hypotheses. Therefore, hyp1 is a valid hypothesis that is in line with the source. On the other hand, hyp2 is essentially a duplicate of hyp1 and does not add any new informati

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

The source states "Mr Fabre-Aubrespy referred" to the third group of amendments, which contradicts hypothesis 2 that states "The third group of amendments was referred to by Mr Fabre- Aubrespy". This is a hallucination as it adds an extra layer of information that is not present in the source.
{'label': 'hyp2'}

The source states "Mr Fabre-Aubrespy referred" to the third group of amendments, which contradicts hypothesis 2 that states "The third group of amendments was referred to by Mr Fabre- Aubrespy". This is a hallucination as it adds an extra layer of information that is not present in the source.
{'label': 'hyp2'}

The source states "Mr Fabre-Aubrespy referred" to the third group of amendments, which contradicts hypothesis 2 that states "The third group of amendments was referred to by Mr Fabre- Aubrespy". This is a hallucination as it adds an extra layer of information that is not present in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'None','result': 'Both hypotheses (hyp1 and hyp2) are consistent with the information provided in the source.'}
{'label': 'None','result': 'Both hypotheses (hyp1 and hyp2) are consistent with the information provided in the source.'}
{'label': 'None','result': 'Both hypotheses (hyp1 and hyp2) are consistent with the information provided in the source.'}
result_text: {'label': 'None', 'result': 'Both hypotheses (hyp1 and hyp2) are consistent with the information provided in the source.'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, which states that the report was "put together" under Susan Waddington's leadership, implying a positive or neutral tone. The word "critical" in hyp2 suggests a negative tone, which is not supported by the source.
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, which states that the report was "put together" under Susan Waddington's leadership, implying a positive or neutral tone. The word "critical" in hyp2 suggests a negative tone, which is not supported by the source.
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, which states that the report was "put together" under Susan Waddington's leadership, implying a positive or neutral tone. The word "critical" in hyp2 suggests a negative tone, which is not supported by the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that these (referring to the phenomenon being investigated) have largely been behind the development of economic policies, which is good for employment. This implies that the economic policies are good for employment because of the phenomenon, not because of the phenomenon itself.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that these (referring to the phenomenon being investigated) have largely been behind the development of economic policies, which is good for employment. This implies that the economic policies are good for employment because of the phenomenon, not because of the phenomenon itself.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that these (referring to the phenomenon being investigated) have largely been behind the development of economic 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("even though") contradicts the information provided in the source, which states that the Commission cannot accept Amendments 27 and 28 because they "go beyond" the scope of the programme. The word "beyond" implies a scope that is exceeded, which is the opposite of "even though" which implies a scope that is not exceeded.
{'label': 'hyp2'}

Hypothesis 2 ("even though") contradicts the information provided in the source, which states that the Commission cannot accept Amendments 27 and 28 because they "go beyond" the scope of the programme. The word "beyond" implies a scope that is exceeded, which is the opposite of "even though" which implies a scope that is not exceeded.
{'label': 'hyp2'}

Hypothesis 2 ("even though") contradicts the information provided in the source, which states that the Commission cannot accept Amendments 27 and 28 because they "go beyond" the scope of the programme. The word "beyond" implies a scope that is exceeded, which i

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Explanation: Hyp1 mentions "Hercules stable", which is not present in the source. The source specifically mentions "Augean stable", which is consistent with Hyp2.
{'label': 'hyp1'}

Explanation: Hyp1 mentions "Hercules stable", which is not present in the source. The source specifically mentions "Augean stable", which is consistent with Hyp2.
{'label': 'hyp1'}

Explanation: Hyp1 mentions "Hercules stable", which is not present in the source. The source specifically mentions "Augean stable", which is consistent with Hyp2.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: Hyp2 contradicts the information provided in the source by stating that "The European Capital of Culture" was presented by the Commission, whereas the source specifically mentions a "Community initiative for the European Capital of Culture". The source does not suggest that the concept of "European Capital of Culture" itself was presented, but rather a community initiative related to it.
{'label': 'hyp2'}

Explanation: Hyp2 contradicts the information provided in the source by stating that "The European Capital of Culture" was presented by the Commission, whereas the source specifically mentions a "Community initiative for the European Capital of Culture". The source does not suggest that the concept of "European Capital of Culture" itself was presented, but rather a community initiative related to it.
{'label': 'hyp2'}

Explanation: Hyp2 contradicts the information provided in the source by stating that "The European Capital of Culture" was pres

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the Committee on Agriculture and Rural Development has made changes.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the Committee on Agriculture and Rural Development has made changes.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the Committee on Agriculture and Rural Development has made changes.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source mentions "Amendment No 1" and "this" (referring to the phenomenon being investigated), whereas Hypothesis 2 states that there are excellent examples of the Amendment itself in the Nordic countries.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source mentions "Amendment No 1" and "this" (referring to the phenomenon being investigated), whereas Hypothesis 2 states that there are excellent examples of the Amendment itself in the Nordic countries.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source mentions "Amendment No 1" and "this" (referring to the phenomenon being investigated), whereas Hypothesis 2 states that there are excellent examples of the Amendment itself in the Nordic countries.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("Politicians don't understand the link between CO2 emissions and energy consumption") contradicts the information provided in the source, which states that "It is we politicians who are most concerned, because we understand the link between energy consumption and CO2 emissions". This implies that politicians do understand the link between CO2 emissions and energy consumption, which is the opposite of what hypothesis 2 claims.
{'label': 'hyp2'}

Hypothesis 2 ("Politicians don't understand the link between CO2 emissions and energy consumption") contradicts the information provided in the source, which states that "It is we politicians who are most concerned, because we understand the link between energy consumption and CO2 emissions". This implies that politicians do understand the link between CO2 emissions and energy consumption, which is the opposite of what hypothesis 2 claims.
{'label': 'hyp2'}

Hypothesis 2 ("Politicians don't understand the

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1','result': 'hallucination'}

Hypothesis 1 ("Tobacco growing will benefit by a 3 % phased reduction in aid") contradicts the information provided in the source, which states that a uniform phased reduction of 3 % will primarily penalize the sectors of production which receive the most aid, including tobacco growing. This implies that tobacco growing will be negatively affected, not benefited.
{'label': 'hyp1','result': 'hallucination'}

Hypothesis 1 ("Tobacco growing will benefit by a 3 % phased reduction in aid") contradicts the information provided in the source, which states that a uniform phased reduction of 3 % will primarily penalize the sectors of production which receive the most aid, including tobacco growing. This implies that tobacco growing will be negatively affected, not benefited.
{'label': 'hyp1','result': 'hallucination'}

Hypothesis 1 ("Tobacco growing will benefit by a 3 % phased reduction in aid") contradicts the information provided in the so

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the women are "eliminated right from the stage of conception". This suggests that the elimination occurs at the earliest stage possible, which is before conception. Hypothesis 2, on the other hand, proposes that the elimination occurs directly after conception, which contradicts the source's statement.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the women are "eliminated right from the stage of conception". This suggests that the elimination occurs at the earliest stage possible, which is before conception. Hypothesis 2, on the other hand, proposes that the elimination occurs directly after conception, which contradicts the source's statement.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the women are "eliminated right from the stage of conception". This suggests t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source mentions "I hope that the Euro-Mediterranean Conference to held in Stuttgart between 4 and 6 April will enable us to make progress in that direction", which implies the speaker is referring to themselves (using "us"). Hyp2, on the other hand, uses "you" instead of "us", which contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source mentions "I hope that the Euro-Mediterranean Conference to held in Stuttgart between 4 and 6 April will enable us to make progress in that direction", which implies the speaker is referring to themselves (using "us"). Hyp2, on the other hand, uses "you" instead of "us", which contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source mentions "I hope that the Euro-Mediterranean Conference to held in Stuttgart between 4 and 6 April will enable us to make progress in that direction", which implies the speaker is referring to themselves (

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which only mentions that Turkey was included in the proposal for three specific programmes (Socrates, Youth for Europe, and Leonardo).
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which only mentions that Turkey was included in the proposal for three specific programmes (Socrates, Youth for Europe, and Leonardo).
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which only mentions that Turkey was included in the proposal for three specific programmes (Socrates, Youth for Europe, and Leonardo).
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that Amendments Nos 19, 21 and 51 are aimed at regulating the import and export of genetically modified organisms. Hyp2 specifically mentions Amendments Nos 19 and 21, which contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that Amendments Nos 19, 21 and 51 are aimed at regulating the import and export of genetically modified organisms. Hyp2 specifically mentions Amendments Nos 19 and 21, which contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that Amendments Nos 19, 21 and 51 are aimed at regulating the import and export of genetically modified organisms. Hyp2 specifically mentions Amendments Nos 19 and 21, which contradicts the information provided in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source specifically mentions that the amendments are related to public inquiries or consultation in connection with trial releases, which implies that there is more to the amendments than just trial releases. Hyp1 accurately captures this information, whereas hyp2 oversimplifies it by stating that the amendments are only related to trial releases, contradicting the source.
{'label': 'hyp2'}

Explanation: The source specifically mentions that the amendments are related to public inquiries or consultation in connection with trial releases, which implies that there is more to the amendments than just trial releases. Hyp1 accurately captures this information, whereas hyp2 oversimplifies it by stating that the amendments are only related to trial releases, contradicting the source.
{'label': 'hyp2'}

Explanation: The source specifically mentions that the amendments are related to public inquiries or consultation in connection with trial releases, 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that this great country has a billion inhabitants, not a million.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that this great country has a billion inhabitants, not a million.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that this great country has a billion inhabitants, not a million.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source mentions that it would "perhaps be possible to set up surveillance operations to detect the flimsy craft used to make the crossing", which implies that detection is possible. Hyp1 aligns with this information, stating that detection is possible. On the other hand, hyp2 contradicts the source by stating that it would be possible to overlook the flimsy craft, which is not supported by the provided information.
{'label': 'hyp2'}

Explanation: The source mentions that it would "perhaps be possible to set up surveillance operations to detect the flimsy craft used to make the crossing", which implies that detection is possible. Hyp1 aligns with this information, stating that detection is possible. On the other hand, hyp2 contradicts the source by stating that it would be possible to overlook the flimsy craft, which is not supported by the provided information.
{'label': 'hyp2'}

Explanation: The source mentions that it would "perhaps be poss

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 states that every individual Commissioner's accountability must be known by the public, which contradicts the source that states the public must be aware of the collective responsibility borne by the whole Commission, but also of every individual Commissioner's accountability. The source does not suggest that the public should know every individual Commissioner's accountability, but rather that they should be aware of the accountability of each Commissioner.
{'label': 'hyp2'}

Hypothesis 2 states that every individual Commissioner's accountability must be known by the public, which contradicts the source that states the public must be aware of the collective responsibility borne by the whole Commission, but also of every individual Commissioner's accountability. The source does not suggest that the public should know every individual Commissioner's accountability, but rather that they should be aware of the accountability of each Commissioner.
{'

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("We need to make sure that the investigations at the EU's external borders are not effective") contradicts the information provided in the source, which states that the goal is to ensure that the investigations are effective.
{'label': 'hyp2'}

Hypothesis 2 ("We need to make sure that the investigations at the EU's external borders are not effective") contradicts the information provided in the source, which states that the goal is to ensure that the investigations are effective.
{'label': 'hyp2'}

Hypothesis 2 ("We need to make sure that the investigations at the EU's external borders are not effective") contradicts the information provided in the source, which states that the goal is to ensure that the investigations are effective.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the Malta forum will take place in March, but does not specify the year.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the Malta forum will take place in March, but does not specify the year.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the Malta forum will take place in March, but does not specify the year.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: Hyp1 is a paraphrased version of the source text, while hyp2 introduces a third-person pronoun ("they") that is not present in the source text.
{'label': 'hyp2'}

Explanation: Hyp1 is a paraphrased version of the source text, while hyp2 introduces a third-person pronoun ("they") that is not present in the source text.
{'label': 'hyp2'}

Explanation: Hyp1 is a paraphrased version of the source text, while hyp2 introduces a third-person pronoun ("they") that is not present in the source text.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the taxable interest payments will be outside the 15 Member States, implying that interest payments within the 15 Member States will not be taxed. Hypothesis 2, on the other hand, suggests that only interest payments outside of the 15 Member States can be taxed, which is not supported by the source.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the taxable interest payments will be outside the 15 Member States, implying that interest payments within the 15 Member States will not be taxed. Hypothesis 2, on the other hand, suggests that only interest payments outside of the 15 Member States can be taxed, which is not supported by the source.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that the taxable interest payments will be outside the 15 Member States, implying that in

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 states that the directive would not apply from the year 2020, which contradicts the information provided in the source. The source states that the directive would only apply from the year 2020, which is consistent with hypothesis 2.
{'label': 'hyp1'}

Hypothesis 1 states that the directive would not apply from the year 2020, which contradicts the information provided in the source. The source states that the directive would only apply from the year 2020, which is consistent with hypothesis 2.
{'label': 'hyp1'}

Hypothesis 1 states that the directive would not apply from the year 2020, which contradicts the information provided in the source. The source states that the directive would only apply from the year 2020, which is consistent with hypothesis 2.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("The way in which the framework directive was dealt with was very confusing") contradicts the information provided in the source, which states that the way it was dealt with was "extremely confusing". This suggests that the actual experience was one of confusion, which is the opposite of what Hypothesis 2 proposes.
{'label': 'hyp2'}

Hypothesis 2 ("The way in which the framework directive was dealt with was very confusing") contradicts the information provided in the source, which states that the way it was dealt with was "extremely confusing". This suggests that the actual experience was one of confusion, which is the opposite of what Hypothesis 2 proposes.
{'label': 'hyp2'}

Hypothesis 2 ("The way in which the framework directive was dealt with was very confusing") contradicts the information provided in the source, which states that the way it was dealt with was "extremely confusing". This suggests that the actual experience was one of confus

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as there is no mention of paragraph 11.4 in the source.
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as there is no mention of paragraph 11.4 in the source.
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as there is no mention of paragraph 11.4 in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that the reduction is "compared to 1990 levels", whereas hypothesis 2 mentions a specific quantity of "8 metric tons of CO2", which is not mentioned in the source.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that the reduction is "compared to 1990 levels", whereas hypothesis 2 mentions a specific quantity of "8 metric tons of CO2", which is not mentioned in the source.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as the source states that the reduction is "compared to 1990 levels", whereas hypothesis 2 mentions a specific quantity of "8 metric tons of CO2", which is not mentioned in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: Hyp2 contradicts the information provided in the source, which states that the tax burden is eased on companies and large conglomerates. Hyp2, on the other hand, suggests that the tax burden is lowered on companies and large conglomerates, which is a contradiction.
{'label': 'hyp2'}

Explanation: Hyp2 contradicts the information provided in the source, which states that the tax burden is eased on companies and large conglomerates. Hyp2, on the other hand, suggests that the tax burden is lowered on companies and large conglomerates, which is a contradiction.
{'label': 'hyp2'}

Explanation: Hyp2 contradicts the information provided in the source, which states that the tax burden is eased on companies and large conglomerates. Hyp2, on the other hand, suggests that the tax burden is lowered on companies and large conglomerates, which is a contradiction.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: Hyp2 states that the Community plan of action has already been put in place, which contradicts the information in the source that the plan has already been established. The source does not mention that it has been put in place successfully or with only a few delays, which is mentioned in hyp1. Therefore, hyp2 is the hypothesis that contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: Hyp2 states that the Community plan of action has already been put in place, which contradicts the information in the source that the plan has already been established. The source does not mention that it has been put in place successfully or with only a few delays, which is mentioned in hyp1. Therefore, hyp2 is the hypothesis that contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: Hyp2 states that the Community plan of action has already been put in place, which contradicts the information in the source

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Explanation: The source states that "he" returned to Berlin in 1945, which contradicts hypothesis 1 that "she" came back. Hypothesis 2, which states that "he" came back, is consistent with the source and therefore is not a hallucination.
{'label': 'hyp1'}

Explanation: The source states that "he" returned to Berlin in 1945, which contradicts hypothesis 1 that "she" came back. Hypothesis 2, which states that "he" came back, is consistent with the source and therefore is not a hallucination.
{'label': 'hyp1'}

Explanation: The source states that "he" returned to Berlin in 1945, which contradicts hypothesis 1 that "she" came back. Hypothesis 2, which states that "he" came back, is consistent with the source and therefore is not a hallucination.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 (hyp1) contradicts the information provided in the source, which specifically mentions the ISO, whereas hyp1 mentions the MSZT.
{'label': 'hyp1'}

Hypothesis 1 (hyp1) contradicts the information provided in the source, which specifically mentions the ISO, whereas hyp1 mentions the MSZT.
{'label': 'hyp1'}

Hypothesis 1 (hyp1) contradicts the information provided in the source, which specifically mentions the ISO, whereas hyp1 mentions the MSZT.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 (hyp1) contradicts the information provided in the source, which states that the Single Market Council adopted provisions regarding the development of a single market in medicinal products in May 1998, not in 1989.
{'label': 'hyp1'}

Hypothesis 1 (hyp1) contradicts the information provided in the source, which states that the Single Market Council adopted provisions regarding the development of a single market in medicinal products in May 1998, not in 1989.
{'label': 'hyp1'}

Hypothesis 1 (hyp1) contradicts the information provided in the source, which states that the Single Market Council adopted provisions regarding the development of a single market in medicinal products in May 1998, not in 1989.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as the source states that the report is "part of the consequences of our withholding the discharge for 1996", implying that the report is a consequence, not a part of a consequence.
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as the source states that the report is "part of the consequences of our withholding the discharge for 1996", implying that the report is a consequence, not a part of a consequence.
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as the source states that the report is "part of the consequences of our withholding the discharge for 1996", implying that the report is a consequence, not a part of a consequence.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("It is important that innovation becomes a more essential part of interregional and cross border cooperation.") contradicts the source, which only states that innovation should "become a more integral part", not "more essential".
{'label': 'hyp2'}

Hypothesis 2 ("It is important that innovation becomes a more essential part of interregional and cross border cooperation.") contradicts the source, which only states that innovation should "become a more integral part", not "more essential".
{'label': 'hyp2'}

Hypothesis 2 ("It is important that innovation becomes a more essential part of interregional and cross border cooperation.") contradicts the source, which only states that innovation should "become a more integral part", not "more essential".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that "Environmental considerations must feature prominently in this process, particularly sustainable development, as it is one of the distinguishing features of the European Union." This implies that sustainable development is a distinguishing feature of the European Union, not the environment. Therefore, hyp2, which states that the environment is a distinguishing feature, contradicts the information provided in the source and is the hallucination.
{'label': 'hyp2'}

Explanation: The source states that "Environmental considerations must feature prominently in this process, particularly sustainable development, as it is one of the distinguishing features of the European Union." This implies that sustainable development is a distinguishing feature of the European Union, not the environment. Therefore, hyp2, which states that the environment is a distinguishing feature, contradicts the information provided in the source and is the

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

The source states "Mrs Haug's report", which contradicts hypothesis 2 which states "Mx Haug's report".
{'label': 'hyp2'}

The source states "Mrs Haug's report", which contradicts hypothesis 2 which states "Mx Haug's report".
{'label': 'hyp2'}

The source states "Mrs Haug's report", which contradicts hypothesis 2 which states "Mx Haug's report".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source. The source states that 80% of people who carry out secretarial and office work are women, while men occupy 87% of managerial positions. Hypothesis 1 combines these two groups and states that 80% of people who work in the office are women, while 87% are men. This is not supported by the source, which provides separate information about secretarial and managerial positions.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source. The source states that 80% of people who carry out secretarial and office work are women, while men occupy 87% of managerial positions. Hypothesis 1 combines these two groups and states that 80% of people who work in the office are women, while 87% are men. This is not supported by the source, which provides separate information about secretarial and managerial positions.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in th

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("The US is overstepping its bounds by threatening to boycott trade") contradicts the information provided in the source, which specifically mentions "the oil trade".
{'label': 'hyp2'}

Hypothesis 2 ("The US is overstepping its bounds by threatening to boycott trade") contradicts the information provided in the source, which specifically mentions "the oil trade".
{'label': 'hyp2'}

Hypothesis 2 ("The US is overstepping its bounds by threatening to boycott trade") contradicts the information provided in the source, which specifically mentions "the oil trade".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that people would think we were mad if we said drivers were allowed to take their cars on the road without an insurance certificate. Hyp1 contradicts this information by stating that people would not think we were crazy, which is the opposite of what the source says. Hyp2, on the other hand, aligns with the source's statement, suggesting that people would indeed think we were crazy if we said drivers could take their cars on the road without insurance.
{'label': 'hyp2'}

Explanation: The source states that people would think we were mad if we said drivers were allowed to take their cars on the road without an insurance certificate. Hyp1 contradicts this information by stating that people would not think we were crazy, which is the opposite of what the source says. Hyp2, on the other hand, aligns with the source's statement, suggesting that people would indeed think we were crazy if we said drivers could take their cars on the ro

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

The source explicitly states that the adoption of the stance is incompatible with the aims of the Treaty, which is only compatible with hypothesis 2. Hypothesis 1 contradicts the information provided in the source, suggesting that the aims of the Treaty are compatible with the stance adopted, which is not supported by the evidence.
{'label': 'hyp1'}

The source explicitly states that the adoption of the stance is incompatible with the aims of the Treaty, which is only compatible with hypothesis 2. Hypothesis 1 contradicts the information provided in the source, suggesting that the aims of the Treaty are compatible with the stance adopted, which is not supported by the evidence.
{'label': 'hyp1'}

The source explicitly states that the adoption of the stance is incompatible with the aims of the Treaty, which is only compatible with hypothesis 2. Hypothesis 1 contradicts the information provided in the source, suggesting that the aims of the Treaty are compatibl

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("The majority of the national authorities in Sweden have lost population") contradicts the information provided in the source, which states that the authorities that have lost population are all local authorities, not national authorities.
{'label': 'hyp1'}

Hypothesis 1 ("The majority of the national authorities in Sweden have lost population") contradicts the information provided in the source, which states that the authorities that have lost population are all local authorities, not national authorities.
{'label': 'hyp1'}

Hypothesis 1 ("The majority of the national authorities in Sweden have lost population") contradicts the information provided in the source, which states that the authorities that have lost population are all local authorities, not national authorities.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source explicitly states that Amendment No 25 and Amendments Nos 46 and 54 cannot be accepted, whereas hyp2 suggests that we can accept them. This contradicts the information provided in the source, making hyp2 the hallucination.
{'label': 'hyp2'}

Explanation: The source explicitly states that Amendment No 25 and Amendments Nos 46 and 54 cannot be accepted, whereas hyp2 suggests that we can accept them. This contradicts the information provided in the source, making hyp2 the hallucination.
{'label': 'hyp2'}

Explanation: The source explicitly states that Amendment No 25 and Amendments Nos 46 and 54 cannot be accepted, whereas hyp2 suggests that we can accept them. This contradicts the information provided in the source, making hyp2 the hallucination.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: Hyp1 states that the section "news from 50 years ago" appeared in yesterday's International Herald Tribune, which is a factual statement supported by the source. On the other hand, hyp2 adds an extra layer of information ("This appeared") which is not present in the source, making it a hallucination.
{'label': 'hyp2'}

Explanation: Hyp1 states that the section "news from 50 years ago" appeared in yesterday's International Herald Tribune, which is a factual statement supported by the source. On the other hand, hyp2 adds an extra layer of information ("This appeared") which is not present in the source, making it a hallucination.
{'label': 'hyp2'}

Explanation: Hyp1 states that the section "news from 50 years ago" appeared in yesterday's International Herald Tribune, which is a factual statement supported by the source. On the other hand, hyp2 adds an extra layer of information ("This appeared") which is not present in the source, making it a hallu

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source mentions a 12-mile limit, not a 12-kilometer limit. Therefore, hypothesis 2, which states a 12-mile limit, contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source mentions a 12-mile limit, not a 12-kilometer limit. Therefore, hypothesis 2, which states a 12-mile limit, contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source mentions a 12-mile limit, not a 12-kilometer limit. Therefore, hypothesis 2, which states a 12-mile limit, contradicts the information provided in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source by not specifying the exact date of the conclave, which is mentioned in the source as "on 21 February".
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source by not specifying the exact date of the conclave, which is mentioned in the source as "on 21 February".
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source by not specifying the exact date of the conclave, which is mentioned in the source as "on 21 February".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 explicitly states that the report excludes cigarettes, which contradicts the source information that cigarettes account for 95% of the data.
{'label': 'hyp1'}

Hypothesis 1 explicitly states that the report excludes cigarettes, which contradicts the source information that cigarettes account for 95% of the data.
{'label': 'hyp1'}

Hypothesis 1 explicitly states that the report excludes cigarettes, which contradicts the source information that cigarettes account for 95% of the data.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source because it suggests a monetary value (70 dollars) instead of a percentage (70%) which is mentioned in the source.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source because it suggests a monetary value (70 dollars) instead of a percentage (70%) which is mentioned in the source.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source because it suggests a monetary value (70 dollars) instead of a percentage (70%) which is mentioned in the source.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that Member States are being asked to "harmonise" their economic policies, which means to bring them into line or make them consistent. Hyp2 suggests that Member States are being asked to "reconcile" their economic policies, which means to settle or resolve differences or conflicts. This is a contradictory idea, as reconciling implies resolving differences, whereas harmonising implies making consistent.
{'label': 'hyp2'}

Explanation: The source states that Member States are being asked to "harmonise" their economic policies, which means to bring them into line or make them consistent. Hyp2 suggests that Member States are being asked to "reconcile" their economic policies, which means to settle or resolve differences or conflicts. This is a contradictory idea, as reconciling implies resolving differences, whereas harmonising implies making consistent.
{'label': 'hyp2'}

Explanation: The source states that Member States are being

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that the vehicles should only be exempt from Articles 4 and 7 of the proposal. Hyp1 is consistent with this information, as it specifically mentions exempting articles 4 and 7. On the other hand, hyp2 is contradictory, as it does not specify which articles the vehicles should be exempt from, and implies a broader exemption that contradicts the source's information.
{'label': 'hyp2'}

Explanation: The source states that the vehicles should only be exempt from Articles 4 and 7 of the proposal. Hyp1 is consistent with this information, as it specifically mentions exempting articles 4 and 7. On the other hand, hyp2 is contradictory, as it does not specify which articles the vehicles should be exempt from, and implies a broader exemption that contradicts the source's information.
{'label': 'hyp2'}

Explanation: The source states that the vehicles should only be exempt from Articles 4 and 7 of the proposal. Hyp1 is consistent with thi

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("If large containers can't be brought to the trains, how can rail transport compete with road transportation?") contradicts the information provided in the source, which states that large containers cannot be brought to the trains, implying that rail transport cannot compete with road transportation.
{'label': 'hyp2'}

Hypothesis 2 ("If large containers can't be brought to the trains, how can rail transport compete with road transportation?") contradicts the information provided in the source, which states that large containers cannot be brought to the trains, implying that rail transport cannot compete with road transportation.
{'label': 'hyp2'}

Hypothesis 2 ("If large containers can't be brought to the trains, how can rail transport compete with road transportation?") contradicts the information provided in the source, which states that large containers cannot be brought to the trains, implying that rail transport cannot compete with road tra

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states "several operators have given way", whereas the source mentions "the initial birth of several operators", implying that the several operators already exist and are not "giving way" to anything. Hypothesis 1, on the other hand, is consistent with the source, as it describes the initial establishment of several operators giving way to their reconcentration in the hands of one company.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, as it states "several operators have given way", whereas the source mentions "the initial birth of several operators", implying that the several operators already exist and are not "giving way" to anything. Hypothesis 1, on the other hand, is consistent with the source, as it describes the initial establishment of several operators giving way to their reconcentration in the hands of one company.
{'label': 'hyp2'}

Hypothe

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("I agree with my colleague that more efforts could be made in this direction.") contradicts the information provided in the source, which mentions "my honourable colleague" and "we are demanding progress here", implying that the speaker is not just agreeing with a colleague, but also emphasizing their own involvement in the effort.
{'label': 'hyp2'}

Hypothesis 2 ("I agree with my colleague that more efforts could be made in this direction.") contradicts the information provided in the source, which mentions "my honourable colleague" and "we are demanding progress here", implying that the speaker is not just agreeing with a colleague, but also emphasizing their own involvement in the effort.
{'label': 'hyp2'}

Hypothesis 2 ("I agree with my colleague that more efforts could be made in this direction.") contradicts the information provided in the source, which mentions "my honourable colleague" and "we are demanding progress here", implying that 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that the phenomenon occurs in frontier regions where the inhabitants do not understand their neighbour's language. This implies that the language barrier is the key factor. Hyp2, on the other hand, states that the phenomenon occurs in regions where the inhabitants do understand their neighbour's language, which contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that the phenomenon occurs in frontier regions where the inhabitants do not understand their neighbour's language. This implies that the language barrier is the key factor. Hyp2, on the other hand, states that the phenomenon occurs in regions where the inhabitants do understand their neighbour's language, which contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that the phenomenon occurs in frontier regions where the inhabitants do not understand their neighbour's languag

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hyp1 contradicts the information provided in the source, which states that the Council of Ministers adopted general guidelines on the issue of the death penalty in June 1998, not late 1998.
{'label': 'hyp1'}

Hyp1 contradicts the information provided in the source, which states that the Council of Ministers adopted general guidelines on the issue of the death penalty in June 1998, not late 1998.
{'label': 'hyp1'}

Hyp1 contradicts the information provided in the source, which states that the Council of Ministers adopted general guidelines on the issue of the death penalty in June 1998, not late 1998.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 states that television campaigns would be designed to persuade would-be immigrants from crossing the border, which contradicts the information provided in the source. The source mentions "dissuade" would-be immigrants, implying that the campaigns would discourage them from crossing, not persuade them.
{'label': 'hyp2'}

Hypothesis 2 states that television campaigns would be designed to persuade would-be immigrants from crossing the border, which contradicts the information provided in the source. The source mentions "dissuade" would-be immigrants, implying that the campaigns would discourage them from crossing, not persuade them.
{'label': 'hyp2'}

Hypothesis 2 states that television campaigns would be designed to persuade would-be immigrants from crossing the border, which contradicts the information provided in the source. The source mentions "dissuade" would-be immigrants, implying that the campaigns would discourage them from crossing, not pe

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, as it omits the phrase "à liquider" which is present in the source.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, as it omits the phrase "à liquider" which is present in the source.
{'label': 'hyp1'}

Hypothesis 1 contradicts the information provided in the source, as it omits the phrase "à liquider" which is present in the source.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("The negotiations regarding the reform of agricultural policy have been finalized by the governments.") contradicts the information provided in the source, which states that the negotiations are still in progress and will be resumed today.
{'label': 'hyp1'}

Hypothesis 1 ("The negotiations regarding the reform of agricultural policy have been finalized by the governments.") contradicts the information provided in the source, which states that the negotiations are still in progress and will be resumed today.
{'label': 'hyp1'}

Hypothesis 1 ("The negotiations regarding the reform of agricultural policy have been finalized by the governments.") contradicts the information provided in the source, which states that the negotiations are still in progress and will be resumed today.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that Declaration No 39 was annexed to the Treaty of Amsterdam. Hypothesis 1 is consistent with the source, as it mentions the annexation of Declaration No 39 to the Treaty of Amsterdam for the purpose of discussing drafting quality.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that Declaration No 39 was annexed to the Treaty of Amsterdam. Hypothesis 1 is consistent with the source, as it mentions the annexation of Declaration No 39 to the Treaty of Amsterdam for the purpose of discussing drafting quality.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that Declaration No 39 was annexed to the Treaty of Amsterdam. Hypothesis 1 is consistent with the source, as it mentions the annexation of Declaration No 39 to the Treaty of Amsterdam for the purpose of discussing drafting quality.
re

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source states that the European Council meeting in Berlin should "cut the Gordian knot on Agenda 2000", which implies that Agenda 2000 should be handled or addressed during the meeting, not cut. Therefore, hyp2, which states that Agenda 2000 should be cut, contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that the European Council meeting in Berlin should "cut the Gordian knot on Agenda 2000", which implies that Agenda 2000 should be handled or addressed during the meeting, not cut. Therefore, hyp2, which states that Agenda 2000 should be cut, contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source states that the European Council meeting in Berlin should "cut the Gordian knot on Agenda 2000", which implies that Agenda 2000 should be handled or addressed during the meeting, not cut. Therefore, hyp2, which states that Agenda 2000 should be cut, contradic

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source because the source states "That is precisely why we cannot give way to the unjustified US demand", which implies that the US is the one demanding something, not the other way around. Hypothesis 2 flips this around, stating that "they" (presumably the researcher's team) are the ones threatening sanctions, which is not supported by the source.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source because the source states "That is precisely why we cannot give way to the unjustified US demand", which implies that the US is the one demanding something, not the other way around. Hypothesis 2 flips this around, stating that "they" (presumably the researcher's team) are the ones threatening sanctions, which is not supported by the source.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source because the source states "That is precisely why we can

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 ("A thorough investigation of all aspects of aquaculture may be warranted") contradicts the information provided in the source, which states that "all aspects of the question need to be brought together". The source explicitly states that a thorough investigation is needed, whereas Hypothesis 2 presents it as a possibility ("may be warranted").
{'label': 'hyp2'}

Hypothesis 2 ("A thorough investigation of all aspects of aquaculture may be warranted") contradicts the information provided in the source, which states that "all aspects of the question need to be brought together". The source explicitly states that a thorough investigation is needed, whereas Hypothesis 2 presents it as a possibility ("may be warranted").
{'label': 'hyp2'}

Hypothesis 2 ("A thorough investigation of all aspects of aquaculture may be warranted") contradicts the information provided in the source, which states that "all aspects of the question need to be brought together

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("I hope that the European cities of culture will be closed tomorrow.") contradicts the information provided in the source, which states that the dossier on European cities of culture will be closed, not the European cities of culture themselves.
{'label': 'hyp1'}

Hypothesis 1 ("I hope that the European cities of culture will be closed tomorrow.") contradicts the information provided in the source, which states that the dossier on European cities of culture will be closed, not the European cities of culture themselves.
{'label': 'hyp1'}

Hypothesis 1 ("I hope that the European cities of culture will be closed tomorrow.") contradicts the information provided in the source, which states that the dossier on European cities of culture will be closed, not the European cities of culture themselves.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as it uses the word "harmonisation" which is not present in the source. The source only mentions "proposal for the harmonisation of copyright law", whereas hyp2 uses "harmonisation proposal".
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as it uses the word "harmonisation" which is not present in the source. The source only mentions "proposal for the harmonisation of copyright law", whereas hyp2 uses "harmonisation proposal".
{'label': 'hyp2'}

Hyp2 contradicts the information provided in the source, as it uses the word "harmonisation" which is not present in the source. The source only mentions "proposal for the harmonisation of copyright law", whereas hyp2 uses "harmonisation proposal".
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source explicitly mentions "we", which implies that the speaker is the researcher, whereas hyp2 uses the pronoun "we" in a way that suggests a collective or third-person perspective. This subtle difference in pronoun usage makes hyp2 a hallucination that contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source explicitly mentions "we", which implies that the speaker is the researcher, whereas hyp2 uses the pronoun "we" in a way that suggests a collective or third-person perspective. This subtle difference in pronoun usage makes hyp2 a hallucination that contradicts the information provided in the source.
{'label': 'hyp2'}

Explanation: The source explicitly mentions "we", which implies that the speaker is the researcher, whereas hyp2 uses the pronoun "we" in a way that suggests a collective or third-person perspective. This subtle difference in pronoun usage makes hyp2 a hallucination that contradicts the

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("The pay and working conditions of those employed in navigation should be better than those of other jobs.") contradicts the information provided in the source, which states that the pay and working conditions of those employed in navigation should be "in line with" those of other occupations, implying equality rather than superiority.
{'label': 'hyp1'}

Hypothesis 1 ("The pay and working conditions of those employed in navigation should be better than those of other jobs.") contradicts the information provided in the source, which states that the pay and working conditions of those employed in navigation should be "in line with" those of other occupations, implying equality rather than superiority.
{'label': 'hyp1'}

Hypothesis 1 ("The pay and working conditions of those employed in navigation should be better than those of other jobs.") contradicts the information provided in the source, which states that the pay and working conditions of thos

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source explicitly states that the Commission is no longer willing to accept ever-increasing tasks without receiving the means to execute them. This implies that the Commission is prepared to accept tasks without means, which contradicts hypothesis 2. On the other hand, hypothesis 1 does not contradict the source, as it only states that the Commission is not prepared to accept tasks without means, which is consistent with the source.
{'label': 'hyp2'}

Explanation: The source explicitly states that the Commission is no longer willing to accept ever-increasing tasks without receiving the means to execute them. This implies that the Commission is prepared to accept tasks without means, which contradicts hypothesis 2. On the other hand, hypothesis 1 does not contradict the source, as it only states that the Commission is not prepared to accept tasks without means, which is consistent with the source.
{'label': 'hyp2'}

Explanation: The source exp

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Explanation: The source mentions "the enlargement of the EU", which is consistent with hypothesis 1. Hypothesis 2, on the other hand, mentions "the enlargement of the EEAS", which is not mentioned in the source.
{'label': 'hyp2'}

Explanation: The source mentions "the enlargement of the EU", which is consistent with hypothesis 1. Hypothesis 2, on the other hand, mentions "the enlargement of the EEAS", which is not mentioned in the source.
{'label': 'hyp2'}

Explanation: The source mentions "the enlargement of the EU", which is consistent with hypothesis 1. Hypothesis 2, on the other hand, mentions "the enlargement of the EEAS", which is not mentioned in the source.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 states that the vote on Amendment No 98 will take place after the current vote, which contradicts the information provided in the source that mentions the vote on Amendment No 98 in relation to the current vote.
{'label': 'hyp2'}

Hypothesis 2 states that the vote on Amendment No 98 will take place after the current vote, which contradicts the information provided in the source that mentions the vote on Amendment No 98 in relation to the current vote.
{'label': 'hyp2'}

Hypothesis 2 states that the vote on Amendment No 98 will take place after the current vote, which contradicts the information provided in the source that mentions the vote on Amendment No 98 in relation to the current vote.
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}

Hypothesis 1 ("I was struck by the abundance of knowledge when I joined the European Parliament") contradicts the information provided in the source, which states that the speaker was "struck by the lack of knowledge" when they joined the European Parliament.
{'label': 'hyp1'}

Hypothesis 1 ("I was struck by the abundance of knowledge when I joined the European Parliament") contradicts the information provided in the source, which states that the speaker was "struck by the lack of knowledge" when they joined the European Parliament.
{'label': 'hyp1'}

Hypothesis 1 ("I was struck by the abundance of knowledge when I joined the European Parliament") contradicts the information provided in the source, which states that the speaker was "struck by the lack of knowledge" when they joined the European Parliament.
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that "we can replace 16 out of 626 Members of the European Parliament, but we can never again as voters decide to bring in a new law." This implies that voters cannot decide on a new law, which is the opposite of what hypothesis 2 claims.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that "we can replace 16 out of 626 Members of the European Parliament, but we can never again as voters decide to bring in a new law." This implies that voters cannot decide on a new law, which is the opposite of what hypothesis 2 claims.
{'label': 'hyp2'}

Hypothesis 2 contradicts the information provided in the source, which states that "we can replace 16 out of 626 Members of the European Parliament, but we can never again as voters decide to bring in a new law." This implies that voters cannot decide on a new law, which is the opposite of what hypot

[]

In [None]:
# save a csv file in the task format

import pandas as pd

# Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/results_llama3_prompt_narjes_en_v1.csv')

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/final_llama3_prompt_narjes_en_v1_detection.csv', index=False)

## Swedish

In [25]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/results_llama3_prompt_narjes_se_v2.csv"

save_results(detect_hallu, data_df_se, csv_filename)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}
Label:  {'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


[]

In [26]:
# Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_llama3/results_llama3_prompt_narjes_se_v2.csv')

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/final_llama3_prompt_narjes_se_v2_detection.csv', index=False)

In [None]:
display(df)

Unnamed: 0,id,label
0,0,hyp2
1,1,hyp1
2,2,hyp2
3,3,hyp2
4,4,hyp2
...,...,...
114,114,hyp2
115,115,hyp1
116,116,hyp2
117,117,hyp2
