In [1]:
!pip install pandas transformers guidance numpy evaluate rouge-score nltk py-rouge accelerate openai

Collecting guidance
  Downloading guidance-0.1.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (234 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m234.4/234.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py-rouge
  Downloading py_rouge-1.1-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloadin

In [2]:
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import json
import nltk
import numpy as np
from guidance import gen
from evaluate import load
import csv
import nltk.translate.bleu_score as bleu
from rouge import Rouge
from transformers import BertTokenizer, BertModel
import torch
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import warnings
from typing import Any, Dict, Tuple
import re
import os

warnings.filterwarnings("ignore", category=UserWarning, message="A new version of the following files was downloaded from")

In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Load Dataset

- id: unique identifier of the example
- source: original model input for paraphrase generation
- hyp1: first alternative paraphrase of the source
- hyp2: second alternative paraphrase of the source
- label: hyp1 or hyp2, based on which of those has been annotated as hallucination
- type: hallucination category assigned.
  - Possible values: addition, named-entity, number, conversion, date, tense, negation, gender, pronoun, antonym, natural

## Trial dataset

This is a small list of examples, provided to help the participants get familiar with the task. Each example contains the following fields:
- id
- source
- hyp1
- hyp2
- label
- type

In [4]:
from datasets import load_dataset


#load the trial data for both English and Swedish
trial_ds = load_dataset("Eloquent/HalluciGen-PG", name="trial")

#load the trial data only for Swedish
trial_ds_sv = load_dataset("Eloquent/HalluciGen-PG", name="trial", split="trial_swedish")

print(trial_ds.keys())

trial_df_en = trial_ds['trial_english'].to_pandas()
trial_df_se= trial_ds['trial_swedish'].to_pandas()

print('English:')

display(trial_df_en.head())

print('Swedish')
display(trial_df_se.head())

Downloading readme:   0%|          | 0.00/3.64k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/8.19k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Generating trial_swedish split:   0%|          | 0/20 [00:00<?, ? examples/s]

Generating trial_english split:   0%|          | 0/16 [00:00<?, ? examples/s]

dict_keys(['trial_swedish', 'trial_english'])
English:


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2


Swedish


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1


In [5]:
#load the test data for the cross-model evaluation in both English and Swedish
data = load_dataset("Eloquent/HalluciGen-PG", "cross_model_evaluation")

print(data.keys())

data_df_en = data['cross_model_evaluation_english'].to_pandas()
data_df_se= data['cross_model_evaluation_swedish'].to_pandas()


Downloading data:   0%|          | 0.00/117k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/229k [00:00<?, ?B/s]

Generating cross_model_evaluation_swedish split:   0%|          | 0/380 [00:00<?, ? examples/s]

Generating cross_model_evaluation_english split:   0%|          | 0/595 [00:00<?, ? examples/s]

dict_keys(['cross_model_evaluation_swedish', 'cross_model_evaluation_english'])


In [6]:
data_df_en

Unnamed: 0,id,source,hyp1,hyp2
0,0,I would also like to draw attention to Amendme...,the speaker wants to highlight amendments nos...,the speaker mentioned some random amendments ...
1,1,"It is we politicians who are most concerned, b...",Politicians are the most concerned because the...,Politicians are not really worried about the r...
2,2,An important factor here is that the pay and o...,It is crucial that the salary and benefits of ...,One key aspect to consider is that the compens...
3,3,There are four points in relation to which the...,The Committee on Agriculture and Rural Develop...,The Committee on Agriculture and Rural Develop...
4,4,People would think we were mad if we said tomo...,If we were to announce tomorrow that drivers c...,It would be considered normal if we suddenly d...
...,...,...,...,...
590,590,"The Member States are, in other words, being a...",The Member States are being asked to harmonize...,The Member States are being asked to harmonise...
591,591,We struggle with water on a daily basis in the...,"In the Netherlands, water is a daily struggle,...","Water is readily available in the Netherlands,..."
592,592,"On the contrary, things came to a head in the ...","In contrast, the issue escalated during the cr...","Conversely, the situation reached a critical p..."
593,593,It is extremely important that the forthcoming...,It is imperative that the upcoming European Co...,It is crucial that the upcoming European Counc...


In [7]:
data_df_se

Unnamed: 0,id,source,hyp1,hyp2
0,0,Romney tillkännager Paul Ryan som vicepresiden...,romney utser paul ryan som sin running mate i...,romney offentliggör att paul ryan blir hans m...
1,1,Syriens militärpolischef hoppade av till oppos...,Syriens militärpolischef hoppade av till oppos...,Syriens militärpolischef hoppade av till oppos...
2,2,"Yahoo går med på att köpa Tumblr för 1,1 milja...",Yahoo har för avsikt att sälja Tumblr till en ...,Yahoo har avtalat att förvärva Tumblr för en s...
3,3,"Önationen rapporterade 65 nya fall den 22 maj,...",Taiwans epidemi har inte sett någon ökning av ...,Taiwans epidemi har sett en ökning av 65 nya f...
4,4,Nordkorea säger att en amerikansk medborgare s...,Nordkoreas ledare Kim Jong-un säger att en ame...,Nordkoreas ledare Kim Jong-un säger att en ame...
...,...,...,...,...
375,375,Påven uppmanar till åtgärder mot klimatförändr...,Påven uppmanar till åtgärder mot klimatförändr...,Påven uppmanar till åtgärder mot klimatförändr...
376,376,"Andy Murray förtjänaren adelstitel, säger Davi...","Andy Murray förtjänaren adelstitel, säger Davi...","Andy Murray förtjänaren adelstitel, säger Davi..."
377,377,Beväpnad man dödar 6 i skottlossning mot Wisco...,En ensam gärningsman har skjutit sex personer ...,En beväpnad individ har dödat sex personer i e...
378,378,Ska jag skriva på det här papperet? Ska jag fö...,Om man skriver på det här papperet och förneka...,Det är inte alltid rätt att säga att någon som...


In [8]:
from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY')

In [9]:
os.environ['OPENAI_API_KEY'] = api_key

In [10]:
from guidance import models, instruction, system, user, assistant
gpt = models.OpenAI("gpt-3.5-turbo")

# Prompt Version 1

In [11]:
def detect_hallu(source,hyp1,hyp2):
    # Generate the prompt

    answer_format = {"label": ""}

    user_prompt = f'''
    Given a "src" and two hypotheses "hyp1" and "hyp2" your task is to detect which of the two hypotheses ("label") is not supported by the source.
    Provide the result in the following format: {answer_format}.

    Src: {source}
    hyp1 : {hyp1}
    hyp2 : {hyp2}

    Result:
    '''


    with system():
        lm = gpt + "You are a classifier. You specialise in comparing sentences"

    with user():
        lm += user_prompt

    with assistant():
        lm += gen("answer",temperature=0.2)

    result = lm["answer"]

    print(result)

    return result

In [None]:
def detect_hallu_se(source,hyp1,hyp2):
    # Generate the prompt

    answer_format = {"label": ""}

    user_prompt = f'''
      Givet en ”src” och två hypoteser ”hyp1” och ”hyp2” är din uppgift att upptäcka vilken av de två hypoteserna (”label”) som inte stöds av källan.
      Ge resultatet i följande format: {answer_format}.

      Src: {source}
      hyp1 : {hyp1}
      hyp2 : {hyp2}

      Resultat:
        '''

    with system():
        lm = gpt + "Du är en klassificerare. Du är specialiserad på att jämföra meningar"

    with user():
        lm += user_prompt

    with assistant():
        lm += gen("answer")

    result = lm["answer"]

    print(result)

    return result

In [12]:
import json
import re

def extract_json_data(result_text):

    print(result_text)
    # Find the start and end indices of the JSON data
    start_index = result_text.find('{')
    end_index = result_text.find('}') + 1  # Add 1 to include the closing brace

    # Extract the JSON data from the string
    result_text = result_text[start_index:end_index]
    #print("result text: ", result_text)

    # Replace single quotes used as delimiters with double quotes
    result_text = json.loads(result_text.replace("'", "\""))
    print("result_text:",result_text)
    return result_text

In [13]:
# Set display options to show full content of columns and rows
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)


In [14]:
# Print the 107th row with columns 'source', 'hyp1', and 'hyp2'
print(data_df_en.loc[108, ['source', 'hyp1', 'hyp2']])

source       In May 1998, the Single Market Council adopted provisions regarding the development of a single market in medicinal products.
hyp1          The Single Market Council adopted provisions regarding the development of a single market in medicinal products in May 1998.
hyp2      The Single Market Council adopted provisions regarding the development of a single market in medicinal products in January 1998.
Name: 108, dtype: object


In [15]:
source = "In May 1998, the Single Market Council adopted provisions regarding the development of a single market in medicinal products"
hyp1= "The Single Market Council adopted provisions regarding the development of a single market in medicinal products in May 1998"
hyp2="The Single Market Council adopted provisions regarding the development of a single market in medicinal products in January 1998"
result = detect_hallu(source,hyp1,hyp2)
print()
print(result)

result = extract_json_data(result)
print(result)

{'label': 'hyp2'}

{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}
{'label': 'hyp2'}


## Generate dataset

In [16]:

import os
import numpy as np
import pandas as pd

def save_results(generate_paraphrase_func, df, csv_filename):

    # Check if the CSV file exists
    if os.path.exists(csv_filename):
        # Read the CSV file using pandas
        existing_csv = pd.read_csv(csv_filename)

        # Check if the DataFrame is not empty
        if not existing_csv.empty:
            # Get the highest ID from the DataFrame
            next_id = existing_csv['id'].max() + 1

            remainder_df = df[df['id'] >= next_id]

            chunks = np.array_split(remainder_df, len(remainder_df) // 10 if len(remainder_df) % 10 == 0 else len(remainder_df) // 10 + 1)
    else:
        # If the CSV file doesn't exist, set highest_id to 1
        next_id = 0

        chunks = np.array_split(df, len(df) // 10 if len(df) % 10 == 0 else len(df) // 10 + 1)

    print("Highest ID: ", next_id)
    # Split DataFrame into chunks of 10 rows

    for chunk in chunks:
        # Iterate through each row in the chunk
        for index, row in chunk.iterrows():
            source = row["source"]
            print("source",source)
            hyp1 = row["hyp1"]
            print("hyp1",hyp1)
            hyp2 = row["hyp2"]
            print("hyp2",hyp2)


            try:
              # Generate paraphrase

              result = generate_paraphrase_func(source, hyp1, hyp2)

              print(result)
              extracted_hallu = extract_json_data(result)

              hallu_label = extracted_hallu["label"]


              # Update DataFrame with new values
              chunk.at[index, "prediction"] = hallu_label

            except:
                print(f"Skipping row {index} due to error in data extraction.")
                continue

        # Append chunk to CSV file
        if not os.path.isfile(csv_filename):
            chunk.to_csv(csv_filename, mode='w', header=True, index=False)
        else:
            chunk.to_csv(csv_filename, mode='a', header=False, index=False)

        # Load CSV file
        df = pd.read_csv(csv_filename)
        display(df)

# Evaluation on trail data

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_narjes_trial.csv"

save_results(detect_hallu, trial_df_en, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp1
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_narjes_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_en_narjes_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp1
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


              precision    recall  f1-score   support

        hyp1       0.88      0.78      0.82         9
        hyp2       0.75      0.86      0.80         7

    accuracy                           0.81        16
   macro avg       0.81      0.82      0.81        16
weighted avg       0.82      0.81      0.81        16



In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_narjes_trial.csv"

save_results(detect_hallu_narjes, trial_df_se, csv_filename)

Result:
{'label': 'hyp2'}
Result:
{'label': 'hyp2'}
Result:
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp1
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp1
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp1
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_narjes_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_se_narjes_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp1
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp1
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp1
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


              precision    recall  f1-score   support

        hyp1       0.50      0.33      0.40         9
        hyp2       0.57      0.73      0.64        11

    accuracy                           0.55        20
   macro avg       0.54      0.53      0.52        20
weighted avg       0.54      0.55      0.53        20



In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_v1_trial.csv"

save_results(detect_hallu, trial_df_en, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp2
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_v1_trial.csv"

save_results(detect_hallu_se, trial_df_se, csv_filename)

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_v1_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_en_v1_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp2
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


              precision    recall  f1-score   support

        hyp1       1.00      0.67      0.80         9
        hyp2       0.70      1.00      0.82         7

    accuracy                           0.81        16
   macro avg       0.85      0.83      0.81        16
weighted avg       0.87      0.81      0.81        16



In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_v1_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_se_v1_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp1
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp2
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp2
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


              precision    recall  f1-score   support

        hyp1       0.80      0.44      0.57         9
        hyp2       0.67      0.91      0.77        11

    accuracy                           0.70        20
   macro avg       0.73      0.68      0.67        20
weighted avg       0.73      0.70      0.68        20



## Generate english data

In [17]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/cross_model_detection_task/cross_model_detection_task_gpt/results_gpt35_cross_model_en_v2.csv"

save_results(detect_hallu, data_df_en, csv_filename)

{'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Unnamed: 0,id,source,hyp1,hyp2,prediction
0,0,"I would also like to draw attention to Amendments Nos 27, 29 and 32 on public inquiries or consultation in connection with trial releases.","the speaker wants to highlight amendments nos. 27, 29, and 32, which are related to public involvement in trials before their release.",the speaker mentioned some random amendments with no specific connection to trials or public participation.,hyp2
1,1,"It is we politicians who are most concerned, because we understand the link between energy consumption and CO2 emissions.",Politicians are the most concerned because they comprehend the connection between energy usage and CO2 emissions.,Politicians are not really worried about the relationship between energy consumption and CO2 emissions.,hyp2
2,2,An important factor here is that the pay and other working conditions of those employed in navigation should be in line with those of other occupations.,It is crucial that the salary and benefits of workers in the navigation industry are comparable to those in other professions.,One key aspect to consider is that the compensation and perks of employees in the field of navigation must be significantly higher than those in other lines of work.,hyp2
3,3,There are four points in relation to which the Committee on Agriculture and Rural Development has made changes which we believe will be adopted.,The Committee on Agriculture and Rural Development has made changes to four points that will be adopted.,The Committee on Agriculture and Rural Development has made changes to four points that will not be adopted.,hyp2
4,4,People would think we were mad if we said tomorrow that drivers were allowed to take their cars on the road without an insurance certificate.,"If we were to announce tomorrow that drivers could drive without an insurance certificate, people would consider us crazy.",It would be considered normal if we suddenly declared that drivers could operate their vehicles without insurance certificates.,hyp2
5,5,An important report was also put together under the leadership of Susan Waddington extending the debate to cover the issue of the trade in women.,Susan Waddington was solely responsible for compiling the report on the trade in women.,Susan Waddington led the effort to compile a significant report that expanded the discussion to include the topic of trafficking women.,hyp1
6,6,"Madam President, in yesterday's International Herald Tribune, the following appeared in the section 'News from 50 years ago'.",A recent article in the International Herald Tribune revisited historical news from the past.,The International Herald Tribune has been publishing fake news for decades.,hyp2
7,7,"Thirdly, the Commission agrees with the preoccupation of the rapporteur as far as the famous RAL, reste à liquider , the backlog, is concerned.",The Commission agrees with the preoccupation of the rapporteur as far as the backlog is concerned.,The Commission agrees with the preoccupation of the rapporteur as far as the famous RAL is concerned.,hyp2
8,8,I hope that the Euro-Mediterranean Conference to held in Stuttgart between 4 and 6 April will enable us to make progress in that direction.,the euro-mediterranean conference taking place in stuttgart from 4 to 6 april is expected to help us move forward in this regard.,the euro-mediterranean conference planned in stuttgart during april 4th to 6th will definitely lead us to success.,hyp2
9,9,"Mr President, let me express my thanks for the preceding remarks and extend a special welcome to my honourable colleague the Transport Commissioner.",I would like to strongly disagree with the previous comments and dismiss the Transport Commissioner's role.,I would like to take a moment to express my gratitude for the previous comments and warmly welcome the esteemed Transport Commissioner.,hyp1


In [18]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/cross_model_detection_task/cross_model_detection_task_gpt/results_gpt35_cross_model_en_v2.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/cross_model_detection_task/final_gpt35_en_v2_cross_model_detection.csv', index=False)

## Generate swedish data

In [19]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/cross_model_detection_task/cross_model_detection_task_gpt/results_gpt35_cross_model_se_v2.csv"

save_results(detect_hallu, data_df_se, csv_filename)

{'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Unnamed: 0,id,source,hyp1,hyp2,prediction
0,0,Romney tillkännager Paul Ryan som vicepresidentkandidat.,"romney utser paul ryan som sin running mate i valet. (denna parafras är mindre strikt eftersom ""utse"" kan ha en något annorlunda konnotation än ""tillkännage"".)",romney offentliggör att paul ryan blir hans medkandidat i presidentvalet.,hyp2
1,1,Syriens militärpolischef hoppade av till opposition,"Syriens militärpolischef hoppade av till opposition, vilket minskade intäkterna från mjukvarulicenser med 21 procent till 107,6 miljoner dollar.","Syriens militärpolischef hoppade av till opposition, vilket ökade intäkterna från mjukvarulicenser med 21 procent till 107,6 miljoner dollar.",hyp2
2,2,"Yahoo går med på att köpa Tumblr för 1,1 miljarder dollar kontant.","Yahoo har för avsikt att sälja Tumblr till en annan företag för en högre summa än 1,1 miljarder dollar.","Yahoo har avtalat att förvärva Tumblr för en summa av 1,1 miljarder dollar i kontanter.",hyp1
3,3,"Önationen rapporterade 65 nya fall den 22 maj, ett endagsrekord, och 55 nya fall den 23 maj, vilket gör Taiwans epidemi till den snabbast växande i världen.",Taiwans epidemi har inte sett någon ökning av nya fall under de senaste dagarna och situationen förblir stabil.,"Taiwans epidemi har sett en ökning av 65 nya fall den 22 maj, vilket var det högsta antalet på en dag hittills. Dessutom rapporterades 55 nya fall den 23 maj, vilket visar på en fortsatt ökningstrend i landet.",hyp1
4,4,Nordkorea säger att en amerikansk medborgare ska ställas inför rätta.,Nordkoreas ledare Kim Jong-un säger att en amerikansk medborgare ska ställas inför rätta.,Nordkoreas ledare Kim Jong-un säger att en amerikansk medborgare ska ställas inför rätta.,
5,5,Bush planerar att träffa Israels premiärminister Ariel Sharon och den nya palestinske premiärministern Mahmoud Abbas i den jordanska hamnen Aqaba på onsdag.,Bush planerar att träffa Israels premiärminister Ariel Sharon och den nya palestinske premiärministern Mahmoud Abbas i den jordanska hamnen Aqaba på onsdag.,Bush planerar att träffa Israels premiärminister Ariel Sharon och den nya palestinske premiärministern Mahmoud Abbas i den jordanska hamnen Aqaba på en nära framtid.,hyp2
6,6,"Rörelseintäkterna var 1,45 miljarder dollar, en ökning jämfört med förra årets resultat på 1,38 miljarder dollar.","Rörelseintäkterna var 1,45 miljarder dollar, vilket innebär en tillväxt på 11% jämfört med föregående år.","Rörelseintäkterna var 1,38 miljarder dollar, vilket innebär en tillväxt på 4% jämfört med föregående år.",hyp2
7,7,Indiska trupper som redan kämpar mot rebeller i Kashmir är nu engagerade i en massiv kampanj för att förstöra vallmogrödor i regionen.,Indiska trupper som redan kämpar mot rebeller i Kashmir är nu engagerade i en massiv kampanj för att förstöra vallmogrödor i regionen.,"Indiska trupper som redan kämpar mot rebeller i Kashmir är nu engagerade i en massiv kampanj för att förstöra vallmogrödor i regionen, vilket kommer att öka intäkterna i de flesta kommunerna.",hyp2
8,8,Syrien förnekar att de har ett odeklarerat kärnkraftsprogram.,Syrien förnekar inte att de har ett odeklarerat kärnkraftsprogram.,Syrien förnekar att de har ett odeklarerat kärnkraftsprogram.,hyp1
9,9,En flicka som leker i en hög med färgglada bollar.,Leksaken blir mer upphetsad när bollarna är mindre.,Leksaken leker på bollarna och blir mer upphetsad när bollarna är färgglada.,hyp1


In [21]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/cross_model_detection_task/cross_model_detection_task_gpt/results_gpt35_cross_model_se_v2.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/cross_model_detection_task/final_gpt35_se_v2_cross_model_detection.csv', index=False)

In [None]:
# update failed rows
def process_csv(file_path, generate_paraphrase_func, prompt):
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(file_path)

    # Initialize an empty list to store IDs
    missing_ids = []

    # Iterate over each row
    for index, row in df.iterrows():
        # Check if any of the specified columns are empty
        if pd.isna(row['prompt']) and pd.isna(row['hyp+']) and pd.isna(row['hyp-']):
            # Update the "source" and "hyp+" columns with the specified text
            source = row['source']

            missing_ids.append(row['id'])
            try:
              # Generate paraphrase
              result = generate_paraphrase_func(source)
              extracted_hyp = extract_json_data(result)

              hyp_support = extracted_hyp['hyp+']
              print("Hyp+ : ", hyp_support)
              hyp_hallu = extracted_hyp['hyp-']
              print("Hyp- : ", hyp_hallu)

              print('source: ',source)

              # Update DataFrame with new values
              df.at[index, 'hyp+'] = hyp_support
              df.at[index, 'hyp-'] = hyp_hallu
              df.at[index, 'prompt'] = prompt

              print("ADDED")
            except:
                print(f"Skipping row {index} due to error in data extraction.")
                continue

    rows_with_missing_ids = df[df['id'].isin(missing_ids)]

    display(rows_with_missing_ids)

    df.to_csv(file_path, header=True, index=False)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_en.csv"

prompt = '''
    Given the src below, generate a paraphrase hypothesis  hyp+ that is supported by src and a second paraphrase hyp- that is not supported by src.

    Provide the result in the following format: {answer_format}

    Src: {source}

    Result:
'''

result = process_csv(csv_filename, generate_paraphrase, prompt)
result

Unnamed: 0,id,source,hyp+,hyp-,prompt


In [None]:
# save a csv file in the task format

import pandas as pd

# Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_en.csv')

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'hyp+', 'hyp-']]

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/final_gpt35_en.csv', index=False)

# Swedish

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_se.csv"

prompt = '''
    användare
    Med tanke på källan nedan, generera en parafras-hypotes hyp+ som stöds av källan och en andra parafras hyp- som inte stöds av källan.
    Ge resultatet i följande format: {answer_format}
    Källa: {source}
    Resultat:
      '''

save_results(generate_paraphrase_se, prompt, data_df_se, csv_filename)

NameError: name 'generate_paraphrase_se' is not defined

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_se.csv"

prompt = '''
    användare
    Med tanke på källan nedan, generera en parafras-hypotes hyp+ som stöds av källan och en andra parafras hyp- som inte stöds av källan.
    Ge resultatet i följande format: {answer_format}
    Källa: {source}
    Resultat:
      '''

result = process_csv(csv_filename, generate_paraphrase_se, prompt)
result

NameError: name 'process_csv' is not defined

In [None]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_se.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'hyp+', 'hyp-']]

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/final_gpt35_se.csv', index=False)