# Llama 2 and Llama 3

## Requirements and Imports

In [None]:
!pip install transformers accelerate bitsandbytes datasets

Collecting accelerate
  Downloading accelerate-0.32.1-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━

In [None]:
import os
import json
import torch
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          pipeline)
import pandas as pd

## Hugging Face Login

In [None]:
from huggingface_hub import login
access_token_read = "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"
login(token = access_token_read)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Config

In [None]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_name = "meta-llama/Llama-2-7b-chat-hf"

politifact_path = 'drive/MyDrive/LLM/FinalProject/politifact'
gossipcop_path = 'drive/MyDrive/LLM/FinalProject/gossipcop'

labels = ['fake', 'real']
json_file_name = "news content.json"

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


## Dataset

### For CPU

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19

In [None]:
import os
import json
# from datasets import Dataset, DatasetDict
import pandas as pd

### Create Data Frame of Dataset

In [None]:
def read_news_files(base_path, dataset_name, title_col, content_col, *cols):
    data = []

    for label in labels:
        label_path = os.path.join(base_path, label)

        for sample_path in os.listdir(label_path):
            sample_json_file_path = os.path.join(label_path, sample_path, json_file_name)

            try:
                with open(sample_json_file_path, 'r', encoding='utf-8') as file:
                    sample_content = json.load(file)

                    id = sample_path.split(dataset_name)[-1]
                    title = sample_content.get(title_col, '')
                    content = sample_content.get(content_col, '')

                    entry = {
                        'id': id,
                        'title': title,
                        'content': content,
                        'label': label
                    }

                    for col in cols:
                        entry[col] = sample_content.get(col, '')

                    data.append(entry)

            except Exception as e:
                print(f"Unexpected error with file {sample_json_file_path}: {e}")

    return data

In [None]:
politifact_data = read_news_files(politifact_path, 'politifact', 'title', 'text')
# gossipcop_data = read_news_files(gossipcop_path, 'gossipcop', 'title', 'content')

politifact_df = pd.DataFrame(politifact_data)

## Data Analysis and Preprocessing

In [None]:
politifact_df.head()

Unnamed: 0,id,title,content,label
0,13501,ALEXISTOGEL 🔥 Situs Togel Online dan Slot Onli...,Situs Togel Online dan Slot Online Gacor Trust...,fake
1,13468,Denzel Washington hit by Facebook fake news st...,A fabricated news story claiming Hollywood act...,fake
2,13038,Log into Facebook,Notice\n\nYou must log in to continue.,fake
3,13467,Mental Images,Mental Images\n\n“My pictures ask where does t...,fake
4,11773,Virginia Republican Wants Schools To Check Chi...,Republican attacks on transgendered Americans ...,fake


### Removing Empty Rows

In [None]:
empty_content = politifact_df['content'] == ''
empty_title = politifact_df['title'] == ''

empty_content_rows_count = empty_content.sum()
empty_title_rows_count = empty_title.sum()
empty_rows_count = (empty_title & empty_content).sum()

print(f"Number of rows where title is empty: {empty_title_rows_count}")
print(f"Number of rows where content is empty: {empty_content_rows_count}")
print(f"Number of rows where both title and content are empty: {empty_rows_count}")

In [None]:
politifact_df_cleaned = politifact_df.drop(politifact_df[empty_title & empty_content].index)

### Removing Diplicated Rows

In [None]:
politifact_df_cleaned[politifact_df_cleaned.duplicated(subset=['title', 'content'], keep=False)].sort_values(by=['title', 'content'])

In [None]:
politifact_df_cleaned_2 = politifact_df_cleaned.drop_duplicates(subset=['title', 'content'])

## Save and Load Data Frame

### PolitiFact

To save:

In [None]:
politifact_df_cleaned_2.to_csv('drive/MyDrive/LLM/FinalProject/politifact_cleaned.csv', index=False)

To load:

In [None]:
politifact_df_cleaned_2 = pd.read_csv('drive/MyDrive/LLM/FinalProject/politifact_cleaned.csv')

### GossipCop

To load:

In [None]:
gossip_cop = pd.read_csv('gossipcop_complete_cleaned_less512.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'gossipcop_complete_cleaned_less512.csv'

## Configuring Model

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_in_8bit=False,
    llm_int8_has_fp16_weight=False,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

In [None]:
device_map = {"": 0}
model = AutoModelForCausalLM.from_pretrained(model_name,
                    quantization_config=quantization_config,
                    device_map=device_map,
                    use_cache = False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
text_generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=10
)

## Zero-shot



```
# This is formatted as code
```

### Define Prompt Format

In [None]:
def create_zero_shot_prompt(sample):
    title = sample['title']
    context = sample['content']

    ### Llama 2 ###
#     prompt = f"""<s>[INST] <<SYS>>
# You are an expert in identifying fake news and disinformation. Please identify whether the piece of twit is real or fake.
# Please think step-by-step as you answer the question. However, please only respond with \'real\' if the news is real or \'fake\' if the news is fake.
# Do not respond with any other words or phrases.
# <</SYS>>

# {title}
# {context} [/INST]
# """

    ### Llama 3 ###
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert in identifying fake news and disinformation. Please identify whether the piece of twit is real or fake.
Please think step-by-step as you answer the question. However, please only respond with \'real\' if the news is real or \'fake\' if the news is fake.
Do not respond with any other words or phrases.
<|eot_id|>

<|start_header_id|>user<|end_header_id|>

Identify whether the following piece of news in fake or real

{title}
{context}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>

"""

    return prompt

#### Testing Llama 2

In [None]:
fake_news = {
    "title": "OMG! Watch the Horrible Consequences of the War Between Sweden and Yemen!!!",
    "content": ""
}

p = create_zero_shot_prompt(fake_news)

print(text_generator(p)[0]['generated_text'])

<s>[INST] <<SYS>>
You are an expert in identifying fake news and disinformation. Please identify whether the twit is real or fake.
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
Do not respond with any other words or phrases.
<</SYS>>

OMG! Watch the Horrible Consequences of the War Between Sweden and Yemen!!!
 [/INST]
Fake


In [None]:
fake_news_edited = {
    "title": "Consequences of the War Between Sweden and Yemen",
    "content": ""
}

p = create_zero_shot_prompt(fake_news_edited)

print(text_generator(p)[0]['generated_text'])

<s>[INST] <<SYS>>
You are an expert in identifying fake news and disinformation. Please identify whether the twit is real or fake.
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
Do not respond with any other words or phrases.
<</SYS>>

Consequences of the War Between Sweden and Yemen
 [/INST]
Real


#### Llama 3 Testing

In [None]:
fake_news = {
    "title": "OMG! Watch the Horrible Consequences of the War Between Sweden and Yemen!!!",
    "content": ""
}

p = create_zero_shot_prompt(fake_news)

print(text_generator(p)[0]['generated_text'])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert in identifying fake news and disinformation. Please identify whether the piece of twit is real or fake.
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
Do not respond with any other words or phrases.
<|eot_id|>

<|start_header_id|>user<|end_header_id|>

Identify whether the following piece of news in fake or real

OMG! Watch the Horrible Consequences of the War Between Sweden and Yemen!!!

<|eot_id|>
<|start_header_id|>assistant<|end_header_id|> 

fake


In [None]:
fake_news_edited = {
    "title": "The sad consequences of the War Between Sweden and Yemen",
    "content": ""
}

p = create_zero_shot_prompt(fake_news_edited)

print(text_generator(p)[0]['generated_text'])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert in identifying fake news and disinformation. Please identify whether the piece of twit is real or fake.
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
Do not respond with any other words or phrases.
<|eot_id|>

<|start_header_id|>user<|end_header_id|>

Identify whether the following piece of news in fake or real

The sad consequences of the War Between Sweden and Yemen

<|eot_id|>
<|start_header_id|>assistant<|end_header_id|> 

fake


### Evaluation on first Dataset

Add the column:

For Llama 2 predictions:

In [None]:
politifact_df_cleaned_2['zero_shot_1'] = 0

For Llama 3 predictions:

In [None]:
politifact_df_cleaned_2['zero_shot_llama_3'] = 0

In [None]:
politifact_df_cleaned_2.head()

Unnamed: 0,id,title,content,label,zero_shot_llama_3
0,13501,ALEXISTOGEL 🔥 Situs Togel Online dan Slot Onli...,Situs Togel Online dan Slot Online Gacor Trust...,fake,0
1,13468,Denzel Washington hit by Facebook fake news st...,A fabricated news story claiming Hollywood act...,fake,0
2,13038,Log into Facebook,Notice\n\nYou must log in to continue.,fake,0
3,13467,Mental Images,Mental Images\n\n“My pictures ask where does t...,fake,0
4,11773,Virginia Republican Wants Schools To Check Chi...,Republican attacks on transgendered Americans ...,fake,0


#### Llama 2

In [None]:
for idx, sample in politifact_df_cleaned_2.iterrows():
    id = sample['id']
    prompt = create_zero_shot_prompt(sample)

    counter += 1

    if len(prompt) > 4096:
        politifact_df_cleaned_2.at[idx, 'zero_shot_1'] = 'Invalid length'
        print(f'[{counter}] Invalid length for {id}: {len(prompt)}')
        continue

    output = text_generator(
        prompt,
        do_sample=True
    )[0]

    marker = "[/INST]\n"
    marker_index = output["generated_text"].find(marker)

    if marker_index != -1:
        final_answer = output["generated_text"][marker_index + len(marker) :].lower()

        # print(output["generated_text"])
        # print('-------------------------------------------------------------------------------------------------------------------')

        if 'real' in final_answer:
            politifact_df_cleaned_2.at[idx, 'zero_shot_1'] = 'real'
            print(f"[{counter}] ID: {id}: real, {final_answer}")
        elif 'fake' in final_answer:
            politifact_df_cleaned_2.at[idx, 'zero_shot_1'] = 'false'
            print(f"[{counter}] ID: {id}: fake, {final_answer}")
        else:
            politifact_df_cleaned_2.at[idx, 'zero_shot_1'] = 'Invalid final answer format'
            print(f'[{counter}] Invalid final answer format for {id}: {final_answer}')
    else:
        politifact_df_cleaned_2.at[idx, 'zero_shot_1'] = 'Invalid answer format'
        print(f'[{counter}] Invalid answer format for {id}')

[1] Invalid length for 13501: 6559
[2] ID: 13468: real, real
[3] ID: 13038: real, real
[4] ID: 13467: real, real
[5] ID: 11773: real, real
[6] ID: 13475: real, real
[7] ID: 13496: real, real
[8] ID: 13484: real, real
[9] ID: 13584: real, real
[10] ID: 13561: real, real
[11] ID: 13570: real, real
[12] ID: 13557: real, real
[13] ID: 13589: real, real
[14] ID: 13577: real, real
[15] ID: 13565: real, real
[16] ID: 13560: real, real
[17] ID: 13576: fake, fake.
[18] ID: 13559: real, real
[19] ID: 13617: real, real
[20] ID: 13591: real, real
[21] ID: 13593: real, real
[22] ID: 13600: real, real
[23] ID: 13601: fake, 
fake
[24] ID: 13698: real, real
[25] ID: 13677: real, real
[26] ID: 13663: real, real
[27] ID: 13608: real, real
[28] ID: 13747: real, real
[29] ID: 13744: real, 
real.
[30] ID: 13703: real, real
[31] ID: 13765: real, real
[32] ID: 13711: real, real
[33] ID: 13764: real, real
[34] ID: 13751: real, real
[35] ID: 13731: real, 
real.
[36] ID: 13745: real, 
real
[37] ID: 13771: real,

In [None]:
tn = ((politifact_df_cleaned_2['label'] == 'fake') & ('false' == politifact_df_cleaned_2['zero_shot_1'])).sum()
fp = ((politifact_df_cleaned_2['label'] == 'fake') & ('real' == politifact_df_cleaned_2['zero_shot_1'])).sum()
tp = ((politifact_df_cleaned_2['label'] == 'real') & ('real' == politifact_df_cleaned_2['zero_shot_1'])).sum()
fn = ((politifact_df_cleaned_2['label'] == 'real') & ('false' == politifact_df_cleaned_2['zero_shot_1'])).sum()
long_prompts = politifact_df_cleaned_2[politifact_df_cleaned_2['zero_shot_1'] == 'Invalid length']

print(f"True Negative: {tn}")
print(f"False Positive: {fp}")
print(f"True Positive: {tp}")
print(f"False Negative: {fn}")
print(f"Long Prompts: {len(long_prompts)}")

accuracy = (tn + tp) / (tn + fp + tp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

True Negative: 29
False Positive: 256
True Positive: 196
False Negative: 7
Long Prompts: 276
Accuracy: 0.4610655737704918
Precision: 0.4336283185840708
Recall: 0.9655172413793104


#### Llama 3

In [None]:
counter = 0

for idx, sample in politifact_df_cleaned_2.iterrows():
    id = sample['id']
    prompt = create_zero_shot_prompt(sample)

    counter += 1

    if len(prompt) > 8000:
        politifact_df_cleaned_2.at[idx, 'zero_shot_llama_3'] = 'Invalid length'
        print(f'[{counter}] Invalid length for {id}: {len(prompt)}')
        continue

    output = text_generator(
        prompt,
        do_sample=True
    )[0]

    marker = "<|start_header_id|>assistant<|end_header_id|>"
    marker_index = output["generated_text"].find(marker)

    if marker_index != -1:
        final_answer = output["generated_text"][marker_index + len(marker) :].lower()

        # print(output["generated_text"])
        # print('-------------------------------------------------------------------------------------------------------------------')

        if 'real' in final_answer:
            politifact_df_cleaned_2.at[idx, 'zero_shot_llama_3'] = 'real'
            print(f"[{counter}] ID: {id}: real, {final_answer}")
        elif 'fake' in final_answer:
            politifact_df_cleaned_2.at[idx, 'zero_shot_llama_3'] = 'false'
            print(f"[{counter}] ID: {id}: fake, {final_answer}")
        else:
            politifact_df_cleaned_2.at[idx, 'zero_shot_llama_3'] = 'Invalid final answer format'
            print(f'[{counter}] Invalid final answer format for {id}: {final_answer}')
    else:
        politifact_df_cleaned_2.at[idx, 'zero_shot_llama_3'] = 'Invalid answer format'
        print(f'[{counter}] Invalid answer format for {id}')

[1] ID: 13501: fake,  

fake
[2] ID: 13468: fake,  

fake
[3] ID: 13038: fake,  

fake
[4] ID: 13467: fake,  

fake
[5] ID: 11773: fake,  

fake
[6] ID: 13475: fake,  

fake
[7] ID: 13496: fake,  

fake
[8] ID: 13484: fake,  

fake
[9] ID: 13584: fake,  

fake
[10] ID: 13561: fake,  

fake
[11] ID: 13570: fake,  

fake
[12] ID: 13557: fake,  

fake
[13] ID: 13589: fake,  

fake
[14] ID: 13577: fake,  

fake
[15] ID: 13565: real,  

real
[16] ID: 13560: fake,  

fake
[17] ID: 13576: fake,  

fake
[18] ID: 13559: fake,  

fake
[19] ID: 13617: fake,  

fake
[20] ID: 13591: fake,  

fake
[21] ID: 13593: real,  

real
[22] ID: 13600: fake,  

fake
[23] ID: 13601: fake,  

fake
[24] ID: 13698: fake,  

fake
[25] ID: 13677: fake,  

fake
[26] ID: 13663: fake,  

fake
[27] ID: 13608: fake,  

fake
[28] ID: 13747: fake,  

fake
[29] ID: 13744: real,  

real
[30] ID: 13703: fake,  

fake
[31] ID: 13765: fake,  

fake
[32] ID: 13711: fake,  

fake
[33] ID: 13764: fake,  

fake
[34] ID: 13751: fak

In [None]:
tn = ((politifact_df_cleaned_2['label'] == 'fake') & ('false' == politifact_df_cleaned_2['zero_shot_llama_3'])).sum()
fp = ((politifact_df_cleaned_2['label'] == 'fake') & ('real' == politifact_df_cleaned_2['zero_shot_llama_3'])).sum()
tp = ((politifact_df_cleaned_2['label'] == 'real') & ('real' == politifact_df_cleaned_2['zero_shot_llama_3'])).sum()
fn = ((politifact_df_cleaned_2['label'] == 'real') & ('false' == politifact_df_cleaned_2['zero_shot_llama_3'])).sum()
long_prompts = politifact_df_cleaned_2[politifact_df_cleaned_2['zero_shot_llama_3'] == 'Invalid length']

print(f"True Negative: {tn}")
print(f"False Positive: {fp}")
print(f"True Positive: {tp}")
print(f"False Negative: {fn}")
print(f"Long Prompts: {len(long_prompts)}")

accuracy = (tn + tp) / (tn + fp + tp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

True Negative: 275
False Positive: 45
True Positive: 183
False Negative: 115
Long Prompts: 146
Accuracy: 0.7411003236245954
Precision: 0.8026315789473685
Recall: 0.6140939597315436


### Evaluation of Paraphrased Dataset

In [None]:
normal_df = pd.read_csv('gossipcop_complete_cleaned_less512.csv')
paraphrased_df = pd.read_csv('gossipcop_paraphrased (2).csv')

FileNotFoundError: [Errno 2] No such file or directory: 'gossipcop_complete_cleaned_less512.csv'

In [None]:
filtered_paraphrased_df = paraphrased_df[paraphrased_df['paraphrased'] != '0']

# Number of rows that meet the condition
num_rows = len(filtered_paraphrased_df)

# Select the same number of rows from df2 where the labels are 'real'
filtered_normal_df = normal_df[normal_df['label'] == 'real'].head(num_rows)

In [None]:
from datasets import Dataset
# Create a new dataframe with 'text' and 'label' columns
paraphrased_data = filtered_paraphrased_df[['id', 'title', 'paraphrased', 'label']].rename(columns={'paraphrased': 'content', 'label': 'labels'})
normal_data = filtered_normal_df[['id', 'title', 'content', 'label']].rename(columns={'label': 'labels'})

# Merge the dataframes by stacking them vertically
merged_df = pd.concat([paraphrased_data, normal_data], ignore_index=True)




In [None]:
merged_df['zero_shot_1'] = 0

In [None]:
merged_df.head()

Unnamed: 0,id,title,content,labels,zero_shot_1
0,-4051111882,Justin Bieber’s Showering Selena Gomez With TL...,justin bieber has been going above and beyond ...,fake,0
1,-4081333587,Zayn Malik shared the gas happy birthday video...,"\nryan reynolds, known for his playful sense o...",fake,0
2,-4060575239,Kourtney Kardashian Breaks Down Over 'Evil Hum...,\nkourtney kardashian and kim kardashian's lon...,fake,0
3,-4050914559,"The Weeknd, Selena Gomez Quotes Posted By Holl...",\nthe weeknd and selena gomez's romance has be...,fake,0
4,-4024992311,"Tom Holland Not Trying To Date Marisa Tomei, D...",\nas an reputable and trustworthy source of en...,fake,0


In [None]:
counter = 0
for idx, sample in merged_df.iterrows():
    id = sample['id']
    prompt = create_zero_shot_prompt(sample)

    counter += 1

    if len(prompt) > 4096:
        merged_df.at[idx, 'zero_shot_1'] = 'Invalid length'
        print(f'[{counter}] Invalid length for {id}: {len(prompt)}')
        continue

    output = text_generator(
        prompt,
        do_sample=True
    )[0]

    marker = "[/INST]\n"
    marker_index = output["generated_text"].find(marker)

    if marker_index != -1:
        final_answer = output["generated_text"][marker_index + len(marker) :].lower()

        # print(output["generated_text"])
        # print('-------------------------------------------------------------------------------------------------------------------')

        if 'real' in final_answer:
            merged_df.at[idx, 'zero_shot_1'] = 'real'
            print(f"[{counter}] ID: {id}: real, {final_answer}")
        elif 'fake' in final_answer:
            merged_df.at[idx, 'zero_shot_1'] = 'false'
            print(f"[{counter}] ID: {id}: fake, {final_answer}")
        else:
            merged_df.at[idx, 'zero_shot_1'] = 'Invalid final answer format'
            print(f'[{counter}] Invalid final answer format for {id}: {final_answer}')
    else:
        merged_df.at[idx, 'zero_shot_1'] = 'Invalid answer format'
        print(f'[{counter}] Invalid answer format for {id}')

[1] ID: -4051111882: real, 
real
[2] ID: -4081333587: real, real
[3] ID: -4060575239: real, 
real
[4] ID: -4050914559: real, 
real
[5] ID: -4024992311: real, real
[6] ID: -4038677075: real, real
[7] ID: -4005203173: real, real
[8] ID: -4047356713: real, real
[9] ID: -4037512632: real, 
real


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[10] ID: -40168647: real, real
[11] ID: -40109120: real, real
[12] ID: -4006218901: real, real
[13] ID: -3967904178: real, real
[14] ID: -3998725211: real, 
real
[15] ID: -3996753978: real, 
real
[16] ID: -3976046218: real, 
real
[17] ID: -3970231049: real, 
real
[18] ID: -3975938427: real, real
[19] ID: -3977674152: real, real
[20] ID: -3999881528: real, real
[21] ID: -3968227653: real, real
[22] ID: -3976443648: real, 
real
[23] ID: -3941312971: fake, 
fake.
[24] ID: -3952009039: real, real
[25] ID: -3946652977: real, 
real
[26] ID: -394386553: real, 
real
[27] ID: -3956979222: real, real
[28] ID: -3930824218: real, real
[29] ID: -3937009586: fake, 
fake.
[30] ID: -3942119395: real, 
real
[31] ID: -3944010770: real, real
[32] ID: -3953802371: fake, 
fake.
[33] ID: -388924876: real, real
[34] ID: -3904848154: real, 
real
[35] ID: -3899200812: real, real
[36] ID: -3912513096: real, real
[37] ID: -392954107: real, 
real
[38] ID: -3909836107: real, 
real
[39] ID: -390778342: real, 
real


In [None]:
tn = ((merged_df['labels'] == 'fake') & ('false' == merged_df['zero_shot_1'])).sum()
fp = ((merged_df['labels'] == 'fake') & ('real' == merged_df['zero_shot_1'])).sum()
tp = ((merged_df['labels'] == 'real') & ('real' == merged_df['zero_shot_1'])).sum()
fn = ((merged_df['labels'] == 'real') & ('false' == merged_df['zero_shot_1'])).sum()
long_prompts = merged_df[merged_df['zero_shot_1'] == 'Invalid length']

print(f"True Negative: {tn}")
print(f"False Positive: {fp}")
print(f"True Positive: {tp}")
print(f"False Negative: {fn}")
print(f"Long Prompts: {len(long_prompts)}")

accuracy = (tn + tp) / (tn + fp + tp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

True Negative: 24
False Positive: 475
True Positive: 488
False Negative: 11
Long Prompts: 0
Accuracy: 0.5130260521042084
Precision: 0.5067497403946002
Recall: 0.9779559118236473


## Few-shot

### Define Prompt Format ( 1 real 1 normal real )

In [None]:
def create_two_shot_prompt(sample):
    title = sample['title']
    context = sample['content']

    ### Llama 2 ###
#     prompt = f"""<s>[INST] <<SYS>>
# You are an expert in identifying fake news and disinformation. Please identify whether the piece of twit is real or fake.
# Please think step-by-step as you answer the question. However, please only respond with \'real\' if the news is real or \'fake\' if the news is fake.
# Do not respond with any other words or phrases.
# Here are some examples for you:
# title: Justin Bieber’s Showering Selena Gomez With TLC To Help Her Prepare For AMAs
# context: Justin Bieber has been ‘so amazing’ for Selena Gomez since their reunion. Not only has he been giving her ‘so much support’ ahead of her AMAs performance, but he’s also been taking care of her. Here’s how!

# “[Selena Gomez] has been rehearsing like crazy, but she’s still dealing with a lot of nerves ahead of the [American Music Awards]. Justin [Bieber]‘s been amazing… he’s giving her so much support, telling her how great she’s going to do. He’s been pumping her up, saying exactly what she needs to hear,” a source close to the singer tells HollywoodLife.com EXCLUSIVELY. “Justin hates seeing Selena stress out. She’s been so busy getting ready for the AMAs, she doesn’t have as much time to take care of herself. Justin worries that she doesn’t eat enough, so the other day when she was in rehearsals, he had her favorite deli in Beverly Hills deliver some soup. It was incredibly thoughtful — you can see why Selena’s fallen for him again.”

# OMG! We’re swooning over how cute they are together. It sounds like Justin has certainly stepped up his game, and he’s treating Selena right this time around. And if you ask us, the timing couldn’t be any better. Performing at the American Music Awards, for the first time since her kidney transplant, can’t be an easy task for Selena, so having Justin there to support her and give her some TLC must be nice. Plus, with Justin helping her every step of the way, we can only imagine her performance on Nov. 19 may just be her best one ever!

# A press release by ABC recently announced Selena’s performance and said she’ll be singing “Wolves”. “The influential and inspiring pop superstar, who continues to redefine her role as an artist, is a fan favorite at the American Music Awards having taken home the award for Favorite Female Artist – Pop/Rock in 2016, as well as providing show-stopping performances of her powerful track ‘Same Old Love’ in 2015 and her passionate song ‘The Heart Wants What It Wants’ in 2014. A truly gifted singer, Gomez makes her return to the AMA stage with the world television premiere of her brand new single ‘Wolves’,” they said in a statement. Are you excited?

# HollywoodLifers, how do YOU feel about Justin Bieber pampering Selena Gomez before the AMAs? Don’t you just love them together? Tell us below!
# label: fake
# title: Mindy Kaling makes first post-baby appearance at Disneyland with her 'Wrinkle in Time' co-stars
# context: Motherhood looks good on Mindy Kaling!

# The new mom made her first public appearance on Thursday when she joined her A Wrinkle in Time co-stars -- Oprah Winfrey, Reese Witherspoon and Storm Reid -- at Disneyland in Anaheim, California.

# Kaling, 38, looked slim and stunning in a polka dot dress and strappy heels that went well with her mouse ears. The Mindy Project star clearly had a great time with her co-stars at the happiest place on Earth as she posted several pics on Instagram.

# 'Mindy Mouse,' she captioned one of the images.

# Kaling gave birth to daughter Katherine in December and in the birth certificate obtained by ET, she did not list her baby's father's name.

# The birth certificate does list Katherine's birthday as Dec. 15, and in a sweet tribute to Kaling's late mother, Swati Roysircar, she decided to give her daughter the middle name Swati.

# A Wrinkle in Time hits theaters on March 9.
# label: real
# <</SYS>>

# title: {title}
# context: {context}
# label: [/INST]
# """

    ## Llama 3 ###
    prompt = f"""### News
Title: Justin Bieber’s Showering Selena Gomez With TLC To Help Her Prepare For AMAs

Context: Justin Bieber has been ‘so amazing’ for Selena Gomez since their reunion. Not only has he been giving her ‘so much support’ ahead of her AMAs performance, but he’s also been taking care of her. Here’s how!

“[Selena Gomez] has been rehearsing like crazy, but she’s still dealing with a lot of nerves ahead of the [American Music Awards]. Justin [Bieber]‘s been amazing… he’s giving her so much support, telling her how great she’s going to do. He’s been pumping her up, saying exactly what she needs to hear,” a source close to the singer tells HollywoodLife.com EXCLUSIVELY. “Justin hates seeing Selena stress out. She’s been so busy getting ready for the AMAs, she doesn’t have as much time to take care of herself. Justin worries that she doesn’t eat enough, so the other day when she was in rehearsals, he had her favorite deli in Beverly Hills deliver some soup. It was incredibly thoughtful — you can see why Selena’s fallen for him again.”

OMG! We’re swooning over how cute they are together. It sounds like Justin has certainly stepped up his game, and he’s treating Selena right this time around. And if you ask us, the timing couldn’t be any better. Performing at the American Music Awards, for the first time since her kidney transplant, can’t be an easy task for Selena, so having Justin there to support her and give her some TLC must be nice. Plus, with Justin helping her every step of the way, we can only imagine her performance on Nov. 19 may just be her best one ever!

A press release by ABC recently announced Selena’s performance and said she’ll be singing “Wolves”. “The influential and inspiring pop superstar, who continues to redefine her role as an artist, is a fan favorite at the American Music Awards having taken home the award for Favorite Female Artist – Pop/Rock in 2016, as well as providing show-stopping performances of her powerful track ‘Same Old Love’ in 2015 and her passionate song ‘The Heart Wants What It Wants’ in 2014. A truly gifted singer, Gomez makes her return to the AMA stage with the world television premiere of her brand new single ‘Wolves’,” they said in a statement. Are you excited?

HollywoodLifers, how do YOU feel about Justin Bieber pampering Selena Gomez before the AMAs? Don’t you just love them together? Tell us below!

### Label
fake

### News
Title: Mindy Kaling makes first post-baby appearance at Disneyland with her 'Wrinkle in Time' co-stars

Context: Motherhood looks good on Mindy Kaling!

The new mom made her first public appearance on Thursday when she joined her A Wrinkle in Time co-stars -- Oprah Winfrey, Reese Witherspoon and Storm Reid -- at Disneyland in Anaheim, California.

Kaling, 38, looked slim and stunning in a polka dot dress and strappy heels that went well with her mouse ears. The Mindy Project star clearly had a great time with her co-stars at the happiest place on Earth as she posted several pics on Instagram.

'Mindy Mouse,' she captioned one of the images.

Kaling gave birth to daughter Katherine in December and in the birth certificate obtained by ET, she did not list her baby's father's name.

The birth certificate does list Katherine's birthday as Dec. 15, and in a sweet tribute to Kaling's late mother, Swati Roysircar, she decided to give her daughter the middle name Swati.

A Wrinkle in Time hits theaters on March 9.

### Label
real

### News
Title: {title}

Context: {context}

### Label
"""

    return prompt

#### Llama 3 Testing

In [None]:
fake_news = {
    "title": "OMG! Watch the Horrible Consequences of the War Between Sweden and Yemen!!!",
    "content": ""
}

p = create_two_shot_prompt(fake_news)

print(text_generator(p)[0]['generated_text'])

### News
Title: Justin Bieber’s Showering Selena Gomez With TLC To Help Her Prepare For AMAs

Context: Justin Bieber has been ‘so amazing’ for Selena Gomez since their reunion. Not only has he been giving her ‘so much support’ ahead of her AMAs performance, but he’s also been taking care of her. Here’s how!

“[Selena Gomez] has been rehearsing like crazy, but she’s still dealing with a lot of nerves ahead of the [American Music Awards]. Justin [Bieber]‘s been amazing… he’s giving her so much support, telling her how great she’s going to do. He’s been pumping her up, saying exactly what she needs to hear,” a source close to the singer tells HollywoodLife.com EXCLUSIVELY. “Justin hates seeing Selena stress out. She’s been so busy getting ready for the AMAs, she doesn’t have as much time to take care of herself. Justin worries that she doesn’t eat enough, so the other day when she was in rehearsals, he had her favorite deli in Beverly Hills deliver some soup. It was incredibly thoughtful 

In [None]:
fake_news_edited = {
    "title": "The sad consequences of the War Between Sweden and Yemen",
    "content": ""
}

p = create_two_shot_prompt(fake_news_edited)

print(text_generator(p)[0]['generated_text'])

### News
Title: Justin Bieber’s Showering Selena Gomez With TLC To Help Her Prepare For AMAs

Context: Justin Bieber has been ‘so amazing’ for Selena Gomez since their reunion. Not only has he been giving her ‘so much support’ ahead of her AMAs performance, but he’s also been taking care of her. Here’s how!

“[Selena Gomez] has been rehearsing like crazy, but she’s still dealing with a lot of nerves ahead of the [American Music Awards]. Justin [Bieber]‘s been amazing… he’s giving her so much support, telling her how great she’s going to do. He’s been pumping her up, saying exactly what she needs to hear,” a source close to the singer tells HollywoodLife.com EXCLUSIVELY. “Justin hates seeing Selena stress out. She’s been so busy getting ready for the AMAs, she doesn’t have as much time to take care of herself. Justin worries that she doesn’t eat enough, so the other day when she was in rehearsals, he had her favorite deli in Beverly Hills deliver some soup. It was incredibly thoughtful 

### Evaluation on first Dataset

Add the column:

In [None]:
politifact_df_cleaned_2['two_shot_1'] = 0

In [None]:
politifact_df_cleaned_2['two_shot_llama_3'] = 0

In [None]:
politifact_df_cleaned_2.head()

Unnamed: 0,id,title,content,label,zero_shot_llama_3,two_shot_llama_3,two_shot_1
0,13501,ALEXISTOGEL 🔥 Situs Togel Online dan Slot Onli...,Situs Togel Online dan Slot Online Gacor Trust...,fake,False,0,0
1,13468,Denzel Washington hit by Facebook fake news st...,A fabricated news story claiming Hollywood act...,fake,False,0,0
2,13038,Log into Facebook,Notice\n\nYou must log in to continue.,fake,False,0,0
3,13467,Mental Images,Mental Images\n\n“My pictures ask where does t...,fake,False,0,0
4,11773,Virginia Republican Wants Schools To Check Chi...,Republican attacks on transgendered Americans ...,fake,False,0,0


### Llama 2

In [None]:
counter = 0
for idx, sample in politifact_df_cleaned_2.iterrows():
    id = sample['id']
    prompt = create_two_shot_prompt(sample)

    counter += 1

    if len(prompt) > 4096:
        politifact_df_cleaned_2.at[idx, 'two_shot_1'] = 'Invalid length'
        print(f'[{counter}] Invalid length for {id}: {len(prompt)}')
        continue

    output = text_generator(
        prompt,
        do_sample=True
    )[0]

    marker = "[/INST]\n"
    marker_index = output["generated_text"].find(marker)

    if marker_index != -1:
        final_answer = output["generated_text"][marker_index + len(marker) :].lower()

        # print(output["generated_text"])
        # print('-------------------------------------------------------------------------------------------------------------------')

        if 'real' in final_answer:
            politifact_df_cleaned_2.at[idx, 'two_shot_1'] = 'real'
            print(f"[{counter}] ID: {id}: real, {final_answer}")
        elif 'fake' in final_answer:
            politifact_df_cleaned_2.at[idx, 'two_shot_1'] = 'false'
            print(f"[{counter}] ID: {id}: fake, {final_answer}")
        else:
            politifact_df_cleaned_2.at[idx, 'two_shot_1'] = 'Invalid final answer format'
            print(f'[{counter}] Invalid final answer format for {id}: {final_answer}')
    else:
        politifact_df_cleaned_2.at[idx, 'two_shot_1'] = 'Invalid answer format'
        print(f'[{counter}] Invalid answer format for {id}')

In [None]:
tn = ((politifact_df_cleaned_2['label'] == 'fake') & ('false' == politifact_df_cleaned_2['two_shot_1'])).sum()
fp = ((politifact_df_cleaned_2['label'] == 'fake') & ('real' == politifact_df_cleaned_2['two_shot_1'])).sum()
tp = ((politifact_df_cleaned_2['label'] == 'real') & ('real' == politifact_df_cleaned_2['two_shot_1'])).sum()
fn = ((politifact_df_cleaned_2['label'] == 'real') & ('false' == politifact_df_cleaned_2['two_shot_1'])).sum()
long_prompts = politifact_df_cleaned_2[politifact_df_cleaned_2['two_shot_1'] == 'Invalid length']

print(f"True Negative: {tn}")
print(f"False Positive: {fp}")
print(f"True Positive: {tp}")
print(f"False Negative: {fn}")
print(f"Long Prompts: {len(long_prompts)}")

accuracy = (tn + tp) / (tn + fp + tp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

True Negative: 0
False Positive: 33
True Positive: 66
False Negative: 0
Long Prompts: 665
Accuracy: 0.6666666666666666
Precision: 0.6666666666666666
Recall: 1.0


### Llama 3

In [None]:
counter = 0
for idx, sample in politifact_df_cleaned_2.iterrows():
    id = sample['id']
    prompt = create_two_shot_prompt(sample)

    counter += 1

    if len(prompt) > 8000:
        politifact_df_cleaned_2.at[idx, 'two_shot_llama_3'] = 'Invalid length'
        print(f'[{counter}] Invalid length for {id}: {len(prompt)}')
        continue

    output = text_generator(
        prompt,
        do_sample=True
    )[0]

    marker = "### Label"
    occurrences = [i for i in range(len(output["generated_text"])) if output["generated_text"].startswith(marker, i)]

    marker_index = -1
    if len(occurrences) >= 3:
        marker_index = occurrences[2]

    # marker = "[/INST]\n"
    # marker_index = output["generated_text"].find(marker)

    if marker_index != -1:
        final_answer = output["generated_text"][marker_index + len(marker) :].lower()

        # print(output["generated_text"])
        # print('-------------------------------------------------------------------------------------------------------------------')

        if 'real' in final_answer:
            politifact_df_cleaned_2.at[idx, 'two_shot_llama_3'] = 'real'
            print(f"[{counter}] ID: {id}: real, {final_answer}")
        elif 'fake' in final_answer:
            politifact_df_cleaned_2.at[idx, 'two_shot_llama_3'] = 'false'
            print(f"[{counter}] ID: {id}: fake, {final_answer}")
        else:
            politifact_df_cleaned_2.at[idx, 'two_shot_llama_3'] = 'Invalid final answer format'
            print(f'[{counter}] Invalid final answer format for {id}: {final_answer}')
    else:
        politifact_df_cleaned_2.at[idx, 'two_shot_llama_3'] = 'Invalid answer format'
        print(f'[{counter}] Invalid answer format for {id}')

[1] Invalid length for 13501: 9707
[2] ID: 13468: fake, 
fake

### news
title: miley cyrus
[3] ID: 13038: fake, 
fake

### news
title: the 10
[4] ID: 13467: real, 
real

### news
title: the 10
[5] ID: 11773: real, 
real

### news
title: justin bieber's
[6] ID: 13475: fake, 
fake

### news
title: justin bieber and
[7] ID: 13496: fake, 
fake

### news
title: the 10
[8] ID: 13484: real, 
real

### news
title: justin bieber's
[9] ID: 13584: fake, 
fake

### news
title: justin bieber and
[10] ID: 13561: fake, 
fake

### news
title: breaking:
[11] ID: 13570: fake, 
fake

### news
title: scientists discover cure
[12] ID: 13557: fake, 
fake

### news
title: scientists discover cure
[13] ID: 13589: fake, 
fake

### news
title: the most shock
[14] ID: 13577: fake, 
fake

### news
title: the secret to
[15] ID: 13565: fake, 
fake

### news
title: the most popular
[16] ID: 13560: real, 
fake

### news
title: the real reason
[17] ID: 13576: fake, 
fake

### news
title: nasa discovers
[18] ID: 13559:

In [None]:
tn = ((politifact_df_cleaned_2['label'] == 'fake') & ('false' == politifact_df_cleaned_2['two_shot_1'])).sum()
fp = ((politifact_df_cleaned_2['label'] == 'fake') & ('real' == politifact_df_cleaned_2['two_shot_1'])).sum()
tp = ((politifact_df_cleaned_2['label'] == 'real') & ('real' == politifact_df_cleaned_2['two_shot_1'])).sum()
fn = ((politifact_df_cleaned_2['label'] == 'real') & ('false' == politifact_df_cleaned_2['two_shot_1'])).sum()
long_prompts = politifact_df_cleaned_2[politifact_df_cleaned_2['two_shot_1'] == 'Invalid length']

print(f"True Negative: {tn}")
print(f"False Positive: {fp}")
print(f"True Positive: {tp}")
print(f"False Negative: {fn}")
print(f"Long Prompts: {len(long_prompts)}")

accuracy = (tn + tp) / (tn + fp + tp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

### Evaluation of Paraphrased Dataset

In [None]:
normal_df = pd.read_csv('gossipcop_complete_cleaned_less512.csv')
paraphrased_df = pd.read_csv('gossipcop_paraphrased (2).csv')

In [None]:
filtered_paraphrased_df = paraphrased_df[paraphrased_df['paraphrased'] != '0']

# Number of rows that meet the condition
num_rows = len(filtered_paraphrased_df)

# Select the same number of rows from df2 where the labels are 'real'
filtered_normal_df = normal_df[normal_df['label'] == 'real'].head(num_rows)

In [None]:
from datasets import Dataset
# Create a new dataframe with 'text' and 'label' columns
paraphrased_data = filtered_paraphrased_df[['id', 'title', 'paraphrased', 'label']].rename(columns={'paraphrased': 'content', 'label': 'labels'})
normal_data = filtered_normal_df[['id', 'title', 'content', 'label']].rename(columns={'label': 'labels'})

# Merge the dataframes by stacking them vertically
merged_df = pd.concat([paraphrased_data, normal_data], ignore_index=True)




In [None]:
merged_df['two_shot_1'] = 0

In [None]:
merged_df.head()

Unnamed: 0,id,title,content,labels,two_shot_1
0,-4051111882,Justin Bieber’s Showering Selena Gomez With TL...,justin bieber has been going above and beyond ...,fake,0
1,-4081333587,Zayn Malik shared the gas happy birthday video...,"\nryan reynolds, known for his playful sense o...",fake,0
2,-4060575239,Kourtney Kardashian Breaks Down Over 'Evil Hum...,\nkourtney kardashian and kim kardashian's lon...,fake,0
3,-4050914559,"The Weeknd, Selena Gomez Quotes Posted By Holl...",\nthe weeknd and selena gomez's romance has be...,fake,0
4,-4024992311,"Tom Holland Not Trying To Date Marisa Tomei, D...",\nas an reputable and trustworthy source of en...,fake,0


In [None]:
counter = 0
for idx, sample in merged_df.iterrows():
    id = sample['id']
    prompt = create_two_shot_prompt(sample)

    counter += 1

    if len(prompt) > 4096:
        merged_df.at[idx, 'two_shot_1'] = 'Invalid length'
        print(f'[{counter}] Invalid length for {id}: {len(prompt)}')
        continue

    output = text_generator(
        prompt,
        do_sample=True
    )[0]

    marker = "[/INST]\n"
    marker_index = output["generated_text"].find(marker)

    if marker_index != -1:
        final_answer = output["generated_text"][marker_index + len(marker) :].lower()

        # print(output["generated_text"])
        # print('-------------------------------------------------------------------------------------------------------------------')

        if 'real' in final_answer:
            merged_df.at[idx, 'two_shot_1'] = 'real'
            print(f"[{counter}] ID: {id}: real, {final_answer}")
        elif 'fake' in final_answer:
            merged_df.at[idx, 'two_shot_1'] = 'false'
            print(f"[{counter}] ID: {id}: fake, {final_answer}")
        else:
            merged_df.at[idx, 'two_shot_1'] = 'Invalid final answer format'
            print(f'[{counter}] Invalid final answer format for {id}: {final_answer}')
    else:
        merged_df.at[idx, 'two_shot_1'] = 'Invalid answer format'
        print(f'[{counter}] Invalid answer format for {id}')

[1] Invalid length for -4051111882: 5521
[2] Invalid length for -4081333587: 4839
[3] Invalid length for -4060575239: 5129
[4] Invalid length for -4050914559: 4794
[5] Invalid length for -4024992311: 4557
[6] Invalid length for -4038677075: 4998
[7] ID: -4005203173: real, real
[8] Invalid length for -4047356713: 4638
[9] Invalid length for -4037512632: 5682
[10] ID: -40168647: real, real
[11] Invalid length for -40109120: 4490
[12] Invalid length for -4006218901: 4548
[13] Invalid length for -3967904178: 5282
[14] Invalid length for -3998725211: 5713
[15] Invalid length for -3996753978: 5256
[16] Invalid length for -3976046218: 4853
[17] Invalid length for -3970231049: 6079
[18] Invalid length for -3975938427: 5352
[19] ID: -3977674152: real, real
[20] Invalid length for -3999881528: 4643
[21] ID: -3968227653: real, real
[22] Invalid length for -3976443648: 5075
[23] Invalid length for -3941312971: 4998
[24] ID: -3952009039: real, real
[25] Invalid length for -3946652977: 5962
[26] Inv

In [None]:
tn = ((merged_df['labels'] == 'fake') & ('false' == merged_df['two_shot_1'])).sum()
fp = ((merged_df['labels'] == 'fake') & ('real' == merged_df['two_shot_1'])).sum()
tp = ((merged_df['labels'] == 'real') & ('real' == merged_df['two_shot_1'])).sum()
fn = ((merged_df['labels'] == 'real') & ('false' == merged_df['two_shot_1'])).sum()
long_prompts = merged_df[merged_df['two_shot_1'] == 'Invalid length']

print(f"True Negative: {tn}")
print(f"False Positive: {fp}")
print(f"True Positive: {tp}")
print(f"False Negative: {fn}")
print(f"Long Prompts: {len(long_prompts)}")

accuracy = (tn + tp) / (tn + fp + tp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

True Negative: 0
False Positive: 53
True Positive: 68
False Negative: 1
Long Prompts: 876
Accuracy: 0.5573770491803278
Precision: 0.5619834710743802
Recall: 0.9855072463768116


## Soft Prompt Tuning

In [None]:
# TO DO