In [11]:
import random
from transformers import pipeline
import pandas as pd
from tqdm import tqdm


# Load the dataset
data = pd.read_csv("trim.csv")  # Replace with your file name
assert "title" in data.columns and "label" in data.columns

# Shuffle and split the dataset
random.seed(42)
data = data.sample(frac=1, random_state=42).reset_index(drop=True)
train_data = data.iloc[:3500]
test_data = data.iloc[4000:]

In [17]:
# Randomly select 5 examples for ICL
five_shot_examples = train_data.sample(n=5, random_state=40)

# Mapping function
def map_label_to_text(label):
    return "Fake" if label == 1 else "Real"

# Format the 5-shot prompt
def format_icl_prompt(five_shot, test_title):
    prompt = ""
    for _, row in five_shot.iterrows():
        label_text = map_label_to_text(row['label'])
        prompt += f"Title: {row['title']}\nLabel: {label_text}\n\n"
    prompt += f"Title: {test_title}\nLabel:"
    return prompt

format_icl_prompt(five_shot_examples, "")

"Title: BOOM! WATCH TREY GOWDY Scorch FBI Director Comey In House Hearing On Hillary’s E-mail Scandal [Video]\nLabel: Fake\n\nTitle: Noah Wall: After Years of GOP Repeal Bills Under Obama, Now ’They Back Down’\nLabel: Real\n\nTitle: 10 REASONS A VOTE RECOUNT Is A Really Bad Idea For America\nLabel: Fake\n\nTitle: California's Death Penalty: Mike Ramos v. Kevin Cooper and Proposition 62\nLabel: Fake\n\nTitle: U.S. not coordinating Syria military operations with Russia: White House\nLabel: Real\n\nTitle: \nLabel:"

In [None]:
icl_model = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B", device_map="auto")  # Use the appropriate Llama model path


In [7]:
# Run ICL on the test set
correct = 0
total = 0

for _, test_row in tqdm(test_data.iterrows()):
    test_prompt = format_icl_prompt(five_shot_examples, test_row['title'])
    # input_tokens = tokenizer.tokenizer(test_prompt)
    
    result = icl_model(test_prompt, max_new_tokens=4, do_sample=False)[0]['generated_text']
    
    
    # Extract the prediction and map it back to label
    predicted_label_text = result.split("Label:")[-1].strip().split()[0]
    predicted_label = 1 if predicted_label_text == "Fake" else 0
    if predicted_label == test_row['label']:
        correct += 1
    total += 1
    
    print(result, correct/total)

# Calculate accuracy
accuracy = correct / total
print(f"ICL Test Set Accuracy: {accuracy * 100:.2f}%")

0it [00:00, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
1it [00:06,  6.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Title: Busted: Bill Recorded Telling Mistress To Deny That Clintons Helped Her Get A State Job
Label: Fake

Title:  Paul Ryan: There Will Be No Mass Deportations (VIDEO)
Label: Fake

Title: EPIC RESPONSE AFTER THE BOSTON GLOBE Runs Fake Cover Bashing Trump…THIS IS GREAT!
Label: Fake

Title: At least 20 hostages dead in siege of hotel in Mali, official says
Label: Real

Title: Cecile Richards Credits Planned Parenthood Supporters with Stopping AHCA - Breitbart
Label: Real

Title: Alvin Ailey’s Robert Battle on His First Real Dance Shoes - The New York Times
Label: Real

Title: 1.0


2it [00:13,  6.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Title: Busted: Bill Recorded Telling Mistress To Deny That Clintons Helped Her Get A State Job
Label: Fake

Title:  Paul Ryan: There Will Be No Mass Deportations (VIDEO)
Label: Fake

Title: EPIC RESPONSE AFTER THE BOSTON GLOBE Runs Fake Cover Bashing Trump…THIS IS GREAT!
Label: Fake

Title: At least 20 hostages dead in siege of hotel in Mali, official says
Label: Real

Title: Cecile Richards Credits Planned Parenthood Supporters with Stopping AHCA - Breitbart
Label: Real

Title: FLASHBACK VIDEO: JESSE JACKSON Praises Donald Trump For His Commitment To Bringing Blacks, Minorities Into Corporate America
Label: Real

Title: 0.5


3it [00:20,  6.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Title: Busted: Bill Recorded Telling Mistress To Deny That Clintons Helped Her Get A State Job
Label: Fake

Title:  Paul Ryan: There Will Be No Mass Deportations (VIDEO)
Label: Fake

Title: EPIC RESPONSE AFTER THE BOSTON GLOBE Runs Fake Cover Bashing Trump…THIS IS GREAT!
Label: Fake

Title: At least 20 hostages dead in siege of hotel in Mali, official says
Label: Real

Title: Cecile Richards Credits Planned Parenthood Supporters with Stopping AHCA - Breitbart
Label: Real

Title: Poll: Donald Trump, Ben Carson dominate GOP field as Fiorina falters
Label: Real

Title: 0.6666666666666666


4it [00:26,  6.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Title: Busted: Bill Recorded Telling Mistress To Deny That Clintons Helped Her Get A State Job
Label: Fake

Title:  Paul Ryan: There Will Be No Mass Deportations (VIDEO)
Label: Fake

Title: EPIC RESPONSE AFTER THE BOSTON GLOBE Runs Fake Cover Bashing Trump…THIS IS GREAT!
Label: Fake

Title: At least 20 hostages dead in siege of hotel in Mali, official says
Label: Real

Title: Cecile Richards Credits Planned Parenthood Supporters with Stopping AHCA - Breitbart
Label: Real

Title: KATHY GRIFFIN Lawyers Up with Ridiculous Claim Against the Trump Family
Label: Real

Title: 0.5


4it [00:28,  7.20s/it]


KeyboardInterrupt: 