In [None]:
!pip install pandas transformers guidance numpy evaluate rouge-score nltk py-rouge accelerate

Collecting guidance
  Downloading guidance-0.1.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (223 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.6/223.6 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py-rouge
  Downloading py_rouge-1.1-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.30.0-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
Collecting diskcache (from guidance

In [None]:
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import json
import nltk
import numpy as np
from guidance import gen
from evaluate import load
import csv
import nltk.translate.bleu_score as bleu
from rouge import Rouge
from transformers import BertTokenizer, BertModel
import torch
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import warnings
from typing import Any, Dict, Tuple
import re
import os

warnings.filterwarnings("ignore", category=UserWarning, message="A new version of the following files was downloaded from")

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Load Dataset

- id: unique identifier of the example
- source: original model input for paraphrase generation
- hyp1: first alternative paraphrase of the source
- hyp2: second alternative paraphrase of the source
- label: hyp1 or hyp2, based on which of those has been annotated as hallucination
- type: hallucination category assigned.
  - Possible values: addition, named-entity, number, conversion, date, tense, negation, gender, pronoun, antonym, natural

## Trial dataset

This is a small list of examples, provided to help the participants get familiar with the task. Each example contains the following fields:
- id
- source
- hyp1
- hyp2
- label
- type

In [None]:
from datasets import load_dataset

#load the trial data for both English and Swedish
trial_ds = load_dataset("Eloquent/HalluciGen-PG", name="trial")

#load the trial data only for Swedish
trial_ds_sv = load_dataset("Eloquent/HalluciGen-PG", name="trial", split="trial_swedish")

print(trial_ds.keys())

trial_df_en = trial_ds['trial_english'].to_pandas()
trial_df_se= trial_ds['trial_swedish'].to_pandas()

print('English:')

display(trial_df_en.head())

print('Swedish')
display(trial_df_se.head())

Downloading readme:   0%|          | 0.00/3.64k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/8.19k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Generating trial_swedish split:   0%|          | 0/20 [00:00<?, ? examples/s]

Generating trial_english split:   0%|          | 0/16 [00:00<?, ? examples/s]

dict_keys(['trial_swedish', 'trial_english'])
English:


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2


Swedish


Unnamed: 0,id,source,type,hyp1,hyp2,label
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1


In [None]:
#load the test data for the generation step in both English and Swedish
data = load_dataset("Eloquent/HalluciGen-PG", "test_detection")

print(data.keys())

data_df_en = data['test_detection_english'].to_pandas()
data_df_se= data['test_detection_swedish'].to_pandas()


Downloading data:   0%|          | 0.00/41.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/38.5k [00:00<?, ?B/s]

Generating test_detection_swedish split:   0%|          | 0/119 [00:00<?, ? examples/s]

Generating test_detection_english split:   0%|          | 0/119 [00:00<?, ? examples/s]

dict_keys(['test_detection_swedish', 'test_detection_english'])


In [None]:
data_df_en

Unnamed: 0,id,source,hyp1,hyp2
0,0,It has enabled us to support and encourage an ...,You can support and encourage an exchange of e...,We can support and encourage an exchange of ex...
1,1,"Therefore, I am calling for an increase in the...",I want to see a decrease in the premiums for a...,I want to see an increase in the premiums for ...
2,2,"In other words, a person may be prevented from...",When there is reason to fear that a violent ac...,When there is reason to fear that a violent ac...
3,3,None of my 34 amendments were adopted and my a...,Our arguments against the ridiculous administr...,My arguments against the ridiculous administra...
4,4,The final key feature of the report is its ori...,The final main feature of the report is its or...,The final features of the report are its orien...
...,...,...,...,...
114,114,I am referring to the current vote but in rela...,The vote on Amendment No 98 will take place be...,The vote on Amendment No 98 will take place af...
115,115,When I became a Member of the European Parliam...,I was struck by the abundance of knowledge whe...,I was struck by the lack of knowledge when I j...
116,116,"Mr President, let me express my thanks for the...","Mr President, I would like to extend a warm we...","Madam President, I would like to extend a warm..."
117,117,We can replace 16 out of 626 Members of the Eu...,"As voters we cannot decide on a new law, but w...","As voters decide on a new law, we can replace ..."


In [None]:
data_df_se

Unnamed: 0,id,source,hyp1,hyp2
0,0,Kvinnor kommer att möta högre bilförsäkringspr...,Det betyder att kvinnor kan förvänta sig att b...,Kvinnor kommer att få högre premier för bilför...
1,1,"Rörelseintäkterna var 1,45 miljarder dollar, e...","Intäkterna från rörelsen var 1,45 miljarder do...","Intäkterna från rörelsen var 1,45 miljarder do..."
2,2,"Mandela tillbaka på sjukhus i ""allvarligt men ...",Mandela återvänder inte till sjukhuset med all...,Mandela återvänder till sjukhuset med allvarli...
3,3,Egypten fryser Muslimska brödraskapets tillgån...,Den egyptiska regeringen har fryst tillgångar ...,Egypten fryser tillgångarna för Muslimska bröd...
4,4,"P-pillret, som de kallar ""polypiller"", skulle ...","Medicinen som vanligtvis kallas ""poly pill"" sä...","Medicinen som vanligtvis kallas ""poly pill"" sä..."
...,...,...,...,...
114,114,Texaner som dignar under skyhöga hemförsäkring...,Invånare i Texas som har belastats med orimlig...,Invånare i Texas som har belastats med orimlig...
115,115,Uppdaterad - Två explosioner nära mållinjen i ...,Uppdatering: Nära målet i Boston Marathon intr...,Två explosioner inträffade under loppet av Bos...
116,116,Anthony Weiner halkar ned till fjärde plats i ...,Anthony Weiner halkar till fjärde plats i ny o...,Enligt en ny opinionsundersökning från Quinnip...
117,117,Östtimor förbjuder kampsportsklubbar på grund ...,I Östtimor ska man förbjuda klubbar som utövar...,I Östtimor har man förbjudit klubbar som utöva...


In [None]:
from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY')

In [None]:
os.environ['OPENAI_API_KEY'] = api_key

In [None]:
from guidance import models, instruction, system, user, assistant
gpt = models.OpenAI("gpt-3.5-turbo")

# Prompt Version 1

In [None]:
def detect_hallu_narjes(source,hyp1,hyp2):
    # Generate the prompt

    answer_format = {"label": ""}

    user_prompt = f'''
    You have gathered data ({source}) and formulated two competing hypotheses to explain it.
    hyp1: {hyp1}
    hyp2: {hyp2})
    Identify the hypothesis that contradicts the information provided in the given source.

    Provide the result in the following format: {answer_format}.

    Result:
    '''

    with system():
        lm = gpt + "You are a researcher investigating a new phenomenon."

    with user():
        lm += user_prompt

    with assistant():
        lm += gen("answer",temperature=0.2)

    result = lm["answer"]

    print(result)

    return result

In [None]:
def detect_hallu(source,hyp1,hyp2):
    # Generate the prompt

    answer_format = {"label": ""}

    user_prompt = f'''
    Given a "src" and two hypotheses "hyp1" and "hyp2" your task is to detect which of the two hypotheses ("label") is not supported by the source.
    Provide the result in the following format: {answer_format}.

    Src: {source}
    hyp1 : {hyp1}
    hyp2 : {hyp2}

    Result:
    '''

    with system():
        lm = gpt + "You are a classifier. You specialise in comparing sentences"

    with user():
        lm += user_prompt

    with assistant():
        lm += gen("answer")

    result = lm["answer"]


    return result

In [None]:
def detect_hallu_se(source,hyp1,hyp2):
    # Generate the prompt

    answer_format = {"label": ""}

    user_prompt = f'''
      Givet en ”src” och två hypoteser ”hyp1” och ”hyp2” är din uppgift att upptäcka vilken av de två hypoteserna (”label”) som inte stöds av källan.
      Ge resultatet i följande format: {answer_format}.

      Src: {source}
      hyp1 : {hyp1}
      hyp2 : {hyp2}

      Resultat:
        '''

    with system():
        lm = gpt + "Du är en klassificerare. Du är specialiserad på att jämföra meningar"

    with user():
        lm += user_prompt

    with assistant():
        lm += gen("answer")

    result = lm["answer"]

    print(result)

    return result

In [None]:
import json
import re

def extract_json_data(result_text):

    print(result_text)
    # Find the start and end indices of the JSON data
    start_index = result_text.find('{')
    end_index = result_text.find('}') + 1  # Add 1 to include the closing brace

    # Extract the JSON data from the string
    result_text = result_text[start_index:end_index]
    #print("result text: ", result_text)

    # Replace single quotes used as delimiters with double quotes
    result_text = json.loads(result_text.replace("'", "\""))
    print("result_text:",result_text)
    return result_text

In [None]:
# Set display options to show full content of columns and rows
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)


In [None]:
# Print the 107th row with columns 'source', 'hyp1', and 'hyp2'
print(data_df_en.loc[108, ['source', 'hyp1', 'hyp2']])

source    I had proposed to you that the Euro-11 be recognised in the Treaty, but my understanding was that Mr Spiers did not agree.
hyp1                                        I wanted the Euro-11 to be recognised in the Treaty but Mr. Spiers didn't agree with me.
hyp2                                              I wanted the Euro-11 to be recognised in the Treaty and Mr. Spiers agreed with me.
Name: 108, dtype: object


In [None]:
source = "I had proposed to you that the Euro-11 be recognised in the Treaty, but my understanding was that Mr Spiers did not agree."
hyp1= "I wanted the Euro-11 to be recognised in the Treaty but Mr. Spiers didn't agree with me."
hyp2="I wanted the Euro-11 to be recognised in the Treaty and Mr. Spiers agreed with me."
result = detect_hallu_narjes(source,hyp1,hyp2)
print()
print(result)

result = extract_json_data(result)
print(result)

{'label': 'hyp2'}

{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}
{'label': 'hyp2'}


## Generate dataset

In [None]:

import os
import numpy as np
import pandas as pd

def save_results(generate_paraphrase_func, df, csv_filename):

    # Check if the CSV file exists
    if os.path.exists(csv_filename):
        # Read the CSV file using pandas
        existing_csv = pd.read_csv(csv_filename)

        # Check if the DataFrame is not empty
        if not existing_csv.empty:
            # Get the highest ID from the DataFrame
            next_id = existing_csv['id'].max() + 1

            remainder_df = df[df['id'] >= next_id]

            chunks = np.array_split(remainder_df, len(remainder_df) // 10 if len(remainder_df) % 10 == 0 else len(remainder_df) // 10 + 1)
    else:
        # If the CSV file doesn't exist, set highest_id to 1
        next_id = 0

        chunks = np.array_split(df, len(df) // 10 if len(df) % 10 == 0 else len(df) // 10 + 1)

    print("Highest ID: ", next_id)
    # Split DataFrame into chunks of 10 rows

    for chunk in chunks:
        # Iterate through each row in the chunk
        for index, row in chunk.iterrows():
            source = row["source"]
            print("source",source)
            hyp1 = row["hyp1"]
            print("hyp1",hyp1)
            hyp2 = row["hyp2"]
            print("hyp2",hyp2)


            try:
              # Generate paraphrase

              result = generate_paraphrase_func(source, hyp1, hyp2)

              print(result)
              extracted_hallu = extract_json_data(result)

              hallu_label = extracted_hallu["label"]


              # Update DataFrame with new values
              chunk.at[index, "prediction"] = hallu_label

            except:
                print(f"Skipping row {index} due to error in data extraction.")
                continue

        # Append chunk to CSV file
        if not os.path.isfile(csv_filename):
            chunk.to_csv(csv_filename, mode='w', header=True, index=False)
        else:
            chunk.to_csv(csv_filename, mode='a', header=False, index=False)

        # Load CSV file
        df = pd.read_csv(csv_filename)
        display(df)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_narjes_trial.csv"

save_results(detect_hallu_narjes, trial_df_en, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp1
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_narjes_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_en_narjes_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp1
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


              precision    recall  f1-score   support

        hyp1       0.88      0.78      0.82         9
        hyp2       0.75      0.86      0.80         7

    accuracy                           0.81        16
   macro avg       0.81      0.82      0.81        16
weighted avg       0.82      0.81      0.81        16



In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_narjes_trial.csv"

save_results(detect_hallu_narjes, trial_df_se, csv_filename)

Result:
{'label': 'hyp2'}
Result:
{'label': 'hyp2'}
Result:
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp1
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp1
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp1
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_narjes_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_se_narjes_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp1
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp1
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp1
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


              precision    recall  f1-score   support

        hyp1       0.50      0.33      0.40         9
        hyp2       0.57      0.73      0.64        11

    accuracy                           0.55        20
   macro avg       0.54      0.53      0.52        20
weighted avg       0.54      0.55      0.53        20



In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_v1_trial.csv"

save_results(detect_hallu, trial_df_en, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp2
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_v1_trial.csv"

save_results(detect_hallu_se, trial_df_se, csv_filename)

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_en_v1_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_en_v1_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,The population has declined in some 210 of the...,antonym,In the majority of Sweden's 280 municipalities...,In the majority of Sweden's 280 municipalities...,hyp1,hyp1
1,1,The draft agenda as drawn up by the Conference...,negation,The Conference of Presidents hasn't distribute...,The Conference of Presidents has distributed t...,hyp1,hyp1
2,2,I am always grateful for comments and suggesti...,antonym,I think I need to listen to everyone's views i...,I think I need to listen to everyone's views i...,hyp1,hyp1
3,3,The fact is that a key omission from the propo...,named entity,Agenda 2030 does not include a chapter on rene...,Agenda 2000 does not include a chapter on rene...,hyp1,hyp1
4,4,"We need quite specific legislative proposals, ...",natural,Legislative proposals based on the Treaty of A...,Legislative proposals based solely on the Trea...,hyp2,hyp2
5,5,This state of affairs has not changed in more ...,addition,There has been no change in the status quo in ...,The state of affairs is1-65561-65561-65561-655...,hyp2,hyp2
6,6,"Madam President, I am speaking on behalf of ou...",gender,One of the motions for a resolution was drafte...,One of the motions for a resolution was drafte...,hyp2,hyp2
7,7,Amendment No 1 in the French version deletes i...,natural,The French version excludes the expression'pol...,The French version excludes the expression 'po...,hyp1,hyp2
8,8,The European Commission proposes that this inf...,number,The EU wants this information to enter into fo...,The EU wants this information to enter into fo...,hyp2,hyp2
9,9,"Mr President, I did prepare a speech but I hav...",pronoun,Many of the points they were going to make in ...,Many of the points I was going to make in my s...,hyp1,hyp2


              precision    recall  f1-score   support

        hyp1       1.00      0.67      0.80         9
        hyp2       0.70      1.00      0.82         7

    accuracy                           0.81        16
   macro avg       0.85      0.83      0.81        16
weighted avg       0.87      0.81      0.81        16



In [None]:
import pandas as pd
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/results_gpt_se_v1_trial.csv")

display(df)

# Erstelle den Klassifikationsbericht
report = classification_report(df['label'], df['prediction'])

print(report)

# Speichere den Klassifikationsbericht in eine Textdatei
with open("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/trial/classification_report_gpt_se_v1_trial.txt", "w") as text_file:
    text_file.write(report)

Unnamed: 0,id,source,type,hyp1,hyp2,label,prediction
0,0,"Men intäkterna från mjukvarulicenser, ett mått...",number,"Intäkter från programvarulicenser, en metrik s...","Intäkter från programvarulicenser, en metrik s...",hyp2,hyp2
1,1,Hongkong-universitet samarbetar med universite...,natural,University of Hong Kong samarbetar med olika k...,University of Hong Kong samarbetar med olika k...,hyp1,hyp2
2,2,Län med befolkningsminskning kommer att vara V...,named entity,"Vermillion, Posey och Madison är län som komme...","Vermillion, Posey och Marion är län som kommer...",hyp2,hyp2
3,3,Google presenterar en prototyp för en självkör...,addition,En prototyp för en självkörande bil presentera...,Enligt ett blogginlägg från Google har företag...,hyp2,hyp1
4,4,Lagförslaget säger att en kvinna som genomgår ...,negation,Förslaget stadgar att en kvinna som genomgår e...,Förslaget stadgar att en kvinna som genomgår e...,hyp1,hyp1
5,5,Spaniens prinsessa vittnar i historisk bedräge...,gender,Spanska prinsessan blir en del av en utredning...,Spanska prinsen blir en del av en utredning om...,hyp2,hyp2
6,6,Mannen använder en slägga för att bryta betong...,antonym,Mannen använder en slägga för att krossa beton...,Mannen använder en slägga för att krossa beton...,hyp2,hyp2
7,7,Nordkorea carnar utlänningar i söder och ger r...,negation,Nordkorea varnar inte utländska medborgare i s...,Nordkorea varnar utländska medborgare i södra ...,hyp1,hyp2
8,8,Beväpnad man bland 7 döda efter lägenhetsskjut...,addition,En beväpnad man var bland de sju döda efter en...,En man med ett skjutvapen har dödat minst sju ...,hyp2,hyp2
9,9,Irans kärnvapenförhandlingar går in på tredje ...,number,Diskussioner om irans kärnvapenprogram har nåt...,Diskussioner om irans kärnvapenprogram har nåt...,hyp2,hyp2


              precision    recall  f1-score   support

        hyp1       0.80      0.44      0.57         9
        hyp2       0.67      0.91      0.77        11

    accuracy                           0.70        20
   macro avg       0.73      0.68      0.67        20
weighted avg       0.73      0.70      0.68        20



In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_en_v1.csv"

save_results(detect_hallu, data_df_en, csv_filename)

{'label': 'hyp2'}
{'label': 'hyp2'}
{'label': 'hyp2'}
result_text: {'label': 'hyp2'}


Unnamed: 0,id,source,hyp1,hyp2,prediction
0,0,It has enabled us to support and encourage an ...,You can support and encourage an exchange of e...,We can support and encourage an exchange of ex...,hyp2
1,1,"Therefore, I am calling for an increase in the...",I want to see a decrease in the premiums for a...,I want to see an increase in the premiums for ...,hyp1
2,2,"In other words, a person may be prevented from...",When there is reason to fear that a violent ac...,When there is reason to fear that a violent ac...,hyp2
3,3,None of my 34 amendments were adopted and my a...,Our arguments against the ridiculous administr...,My arguments against the ridiculous administra...,hyp1
4,4,The final key feature of the report is its ori...,The final main feature of the report is its or...,The final features of the report are its orien...,hyp2
...,...,...,...,...,...
114,114,I am referring to the current vote but in rela...,The vote on Amendment No 98 will take place be...,The vote on Amendment No 98 will take place af...,hyp2
115,115,When I became a Member of the European Parliam...,I was struck by the abundance of knowledge whe...,I was struck by the lack of knowledge when I j...,hyp1
116,116,"Mr President, let me express my thanks for the...","Mr President, I would like to extend a warm we...","Madam President, I would like to extend a warm...",hyp2
117,117,We can replace 16 out of 626 Members of the Eu...,"As voters we cannot decide on a new law, but w...","As voters decide on a new law, we can replace ...",hyp2


In [None]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_en_v1.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/final_gpt_en_v1_detection.csv', index=False)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_se_v1.csv"

save_results(detect_hallu_se, data_df_se, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,hyp1,hyp2,prediction
0,0,Kvinnor kommer att möta högre bilförsäkringspr...,Det betyder att kvinnor kan förvänta sig att b...,Kvinnor kommer att få högre premier för bilför...,hyp2
1,1,"Rörelseintäkterna var 1,45 miljarder dollar, e...","Intäkterna från rörelsen var 1,45 miljarder do...","Intäkterna från rörelsen var 1,45 miljarder do...",hyp1
2,2,"Mandela tillbaka på sjukhus i ""allvarligt men ...",Mandela återvänder inte till sjukhuset med all...,Mandela återvänder till sjukhuset med allvarli...,hyp1
3,3,Egypten fryser Muslimska brödraskapets tillgån...,Den egyptiska regeringen har fryst tillgångar ...,Egypten fryser tillgångarna för Muslimska bröd...,hyp2
4,4,"P-pillret, som de kallar ""polypiller"", skulle ...","Medicinen som vanligtvis kallas ""poly pill"" sä...","Medicinen som vanligtvis kallas ""poly pill"" sä...",hyp2
...,...,...,...,...,...
114,114,Texaner som dignar under skyhöga hemförsäkring...,Invånare i Texas som har belastats med orimlig...,Invånare i Texas som har belastats med orimlig...,hyp2
115,115,Uppdaterad - Två explosioner nära mållinjen i ...,Uppdatering: Nära målet i Boston Marathon intr...,Två explosioner inträffade under loppet av Bos...,hyp2
116,116,Anthony Weiner halkar ned till fjärde plats i ...,Anthony Weiner halkar till fjärde plats i ny o...,Enligt en ny opinionsundersökning från Quinnip...,hyp2
117,117,Östtimor förbjuder kampsportsklubbar på grund ...,I Östtimor ska man förbjuda klubbar som utövar...,I Östtimor har man förbjudit klubbar som utöva...,hyp1


In [None]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_se_v1.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/final_gpt_se_v1_detection.csv', index=False)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_en_narjes.csv"

save_results(detect_hallu_narjes, data_df_en, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,hyp1,hyp2,prediction
0,0,It has enabled us to support and encourage an ...,You can support and encourage an exchange of e...,We can support and encourage an exchange of ex...,hyp2
1,1,"Therefore, I am calling for an increase in the...",I want to see a decrease in the premiums for a...,I want to see an increase in the premiums for ...,hyp1
2,2,"In other words, a person may be prevented from...",When there is reason to fear that a violent ac...,When there is reason to fear that a violent ac...,hyp2
3,3,None of my 34 amendments were adopted and my a...,Our arguments against the ridiculous administr...,My arguments against the ridiculous administra...,hyp2
4,4,The final key feature of the report is its ori...,The final main feature of the report is its or...,The final features of the report are its orien...,hyp2
...,...,...,...,...,...
114,114,I am referring to the current vote but in rela...,The vote on Amendment No 98 will take place be...,The vote on Amendment No 98 will take place af...,hyp1
115,115,When I became a Member of the European Parliam...,I was struck by the abundance of knowledge whe...,I was struck by the lack of knowledge when I j...,hyp1
116,116,"Mr President, let me express my thanks for the...","Mr President, I would like to extend a warm we...","Madam President, I would like to extend a warm...",hyp2
117,117,We can replace 16 out of 626 Members of the Eu...,"As voters we cannot decide on a new law, but w...","As voters decide on a new law, we can replace ...",hyp2


In [None]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_en_narjes.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/final_gpt_en_narjes_detection.csv', index=False)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_se_narjes.csv"

save_results(detect_hallu_narjes, data_df_se, csv_filename)

{'label': 'hyp1'}
{'label': 'hyp1'}
{'label': 'hyp1'}
result_text: {'label': 'hyp1'}


Unnamed: 0,id,source,hyp1,hyp2,prediction
0,0,Kvinnor kommer att möta högre bilförsäkringspr...,Det betyder att kvinnor kan förvänta sig att b...,Kvinnor kommer att få högre premier för bilför...,hyp2
1,1,"Rörelseintäkterna var 1,45 miljarder dollar, e...","Intäkterna från rörelsen var 1,45 miljarder do...","Intäkterna från rörelsen var 1,45 miljarder do...",hyp1
2,2,"Mandela tillbaka på sjukhus i ""allvarligt men ...",Mandela återvänder inte till sjukhuset med all...,Mandela återvänder till sjukhuset med allvarli...,hyp1
3,3,Egypten fryser Muslimska brödraskapets tillgån...,Den egyptiska regeringen har fryst tillgångar ...,Egypten fryser tillgångarna för Muslimska bröd...,hyp2
4,4,"P-pillret, som de kallar ""polypiller"", skulle ...","Medicinen som vanligtvis kallas ""poly pill"" sä...","Medicinen som vanligtvis kallas ""poly pill"" sä...",hyp2
...,...,...,...,...,...
114,114,Texaner som dignar under skyhöga hemförsäkring...,Invånare i Texas som har belastats med orimlig...,Invånare i Texas som har belastats med orimlig...,hyp2
115,115,Uppdaterad - Två explosioner nära mållinjen i ...,Uppdatering: Nära målet i Boston Marathon intr...,Två explosioner inträffade under loppet av Bos...,hyp2
116,116,Anthony Weiner halkar ned till fjärde plats i ...,Anthony Weiner halkar till fjärde plats i ny o...,Enligt en ny opinionsundersökning från Quinnip...,hyp1
117,117,Östtimor förbjuder kampsportsklubbar på grund ...,I Östtimor ska man förbjuda klubbar som utövar...,I Östtimor har man förbjudit klubbar som utöva...,hyp2


In [None]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/detection_task_gpt/results_gpt_se_narjes.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'prediction']]
df = df.rename(columns={'prediction': 'label'})

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/detection_task/final_gpt_se_narjes_detection.csv', index=False)

In [None]:
# update failed rows
def process_csv(file_path, generate_paraphrase_func, prompt):
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(file_path)

    # Initialize an empty list to store IDs
    missing_ids = []

    # Iterate over each row
    for index, row in df.iterrows():
        # Check if any of the specified columns are empty
        if pd.isna(row['prompt']) and pd.isna(row['hyp+']) and pd.isna(row['hyp-']):
            # Update the "source" and "hyp+" columns with the specified text
            source = row['source']

            missing_ids.append(row['id'])
            try:
              # Generate paraphrase
              result = generate_paraphrase_func(source)
              extracted_hyp = extract_json_data(result)

              hyp_support = extracted_hyp['hyp+']
              print("Hyp+ : ", hyp_support)
              hyp_hallu = extracted_hyp['hyp-']
              print("Hyp- : ", hyp_hallu)

              print('source: ',source)

              # Update DataFrame with new values
              df.at[index, 'hyp+'] = hyp_support
              df.at[index, 'hyp-'] = hyp_hallu
              df.at[index, 'prompt'] = prompt

              print("ADDED")
            except:
                print(f"Skipping row {index} due to error in data extraction.")
                continue

    rows_with_missing_ids = df[df['id'].isin(missing_ids)]

    display(rows_with_missing_ids)

    df.to_csv(file_path, header=True, index=False)

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_en.csv"

prompt = '''
    Given the src below, generate a paraphrase hypothesis  hyp+ that is supported by src and a second paraphrase hyp- that is not supported by src.

    Provide the result in the following format: {answer_format}

    Src: {source}

    Result:
'''

result = process_csv(csv_filename, generate_paraphrase, prompt)
result

Unnamed: 0,id,source,hyp+,hyp-,prompt


In [None]:
# save a csv file in the task format

import pandas as pd

# Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_en.csv')

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'hyp+', 'hyp-']]

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/final_gpt35_en.csv', index=False)

# Swedish

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_se.csv"

prompt = '''
    användare
    Med tanke på källan nedan, generera en parafras-hypotes hyp+ som stöds av källan och en andra parafras hyp- som inte stöds av källan.
    Ge resultatet i följande format: {answer_format}
    Källa: {source}
    Resultat:
      '''

save_results(generate_paraphrase_se, prompt, data_df_se, csv_filename)

NameError: name 'generate_paraphrase_se' is not defined

In [None]:
csv_filename = "/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_se.csv"

prompt = '''
    användare
    Med tanke på källan nedan, generera en parafras-hypotes hyp+ som stöds av källan och en andra parafras hyp- som inte stöds av källan.
    Ge resultatet i följande format: {answer_format}
    Källa: {source}
    Resultat:
      '''

result = process_csv(csv_filename, generate_paraphrase_se, prompt)
result

NameError: name 'process_csv' is not defined

In [None]:
# Load the CSV file
df = pd.read_csv("/content/drive/MyDrive/Master Project-Hallucination/Group2_results/generation_task/generation_task_gpt35/results_gpt35_se.csv")

# Select only the columns 'id', 'hyp+', and 'hyp-'
df = df[['id', 'hyp+', 'hyp-']]

# Save the new DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/Master Project-Hallucination/Group2_results/final_gpt35_se.csv', index=False)