In [None]:
!rm -rf /usr/local/lib/python*/dist-packages/transformers*

In [None]:
# Uninstall potential culprits
!pip install -U transformers datasets evaluate scikit-learn

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (1

In [None]:
import kagglehub
import pandas as pd
import numpy as np
import transformers
import torch
import os
os.environ["WANDB_DISABLED"] = "true"

## Load COVID dataset

In [None]:
path = kagglehub.dataset_download("elvinagammed/covid19-fake-news-dataset-nlp")

In [None]:
files = os.listdir(path)
print(files)

['Constraint_Val.csv', 'Constraint_Train.xlsx', 'Constraint_Test.csv', 'Constraint_Test.xlsx', 'english_test_with_labels.csv', 'test_ernie2.0_results.csv', 'Constraint_Train.csv']


In [None]:
# Train data
csv_file = f"{path}/Constraint_Train.csv"
covid_train = pd.read_csv(csv_file, encoding='latin-1')
covid_train = covid_train[['tweet', 'label']]
covid_train.label.value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
real,3360
fake,3060


In [None]:
# Test data
csv_file = f"{path}/english_test_with_labels.csv"  # החלף בשם הקובץ האמיתי
covid_test = pd.read_csv(csv_file, encoding='latin-1')
covid_test = covid_test[['tweet', 'label']]
covid_test.label.value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
real,1120
fake,1020


In [None]:
# Validation data
csv_file = f"{path}/Constraint_Val.csv"
covid_val = pd.read_csv(csv_file, encoding='latin-1')
covid_val = covid_val[['tweet', 'label']]

covid_val.label.value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
real,1120
fake,1020


In [None]:
corona_df = pd.concat([covid_train,covid_test,covid_val], ignore_index=True)
corona_df['label'] = ['false' if (str(i) == 'fake') else 'true' if (str(i) == 'real') else str(i) for i in corona_df.label]
corona_df.columns = ['claim', 'label']

corona_df.label.value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
True,5600
False,5100


# Azure - gpt-4-turbo

In [None]:
!pip install openai



In [None]:
!pip install backoff

Collecting backoff
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Installing collected packages: backoff
Successfully installed backoff-2.2.1


## Generate synthetic false claims using gpt-4-turbo

In [None]:
from openai import AzureOpenAI
import pandas as pd
import time
import backoff
import os
import random

# === connecting Azure OpenAI ===
client = AzureOpenAI(
    api_key="<API-KEY>",  # ← YOUR API KEY
    api_version="2024-04-01-preview",
    azure_endpoint="https://<AZURE-OPENAI-ENDPOINT>.openai.azure.com/"  # ← YOUR RESOURCE NAME
)

# === files and settings ===
output_file = "/content/drive/MyDrive/GPT4turbo_false_claims_4K.csv"
corona_examples_file = "/content/drive/MyDrive/used_corona_claims.csv"
target_claims = 4000
batch_size = 10
max_duration = 20 * 3600  # 20 Hours

# === Reloading previous claims (if exist)===
generated_data = []
if os.path.exists(output_file):
    existing_df = pd.read_csv(output_file)
    generated_data = existing_df.to_dict('records')
    print(f"🔄 Loaded {len(generated_data)} existing claims from file.")

# === Loading covid dataset (using only false claims) ===
corona_df = corona_df[corona_df["label"].astype(str).str.lower() == "false"]
corona_df = corona_df.dropna(subset=["claim"])

# === Load previously used priming claims from CSV ===
used_samples = set()
if os.path.exists(corona_examples_file):
    used_df = pd.read_csv(corona_examples_file)
    used_samples = set(used_df["claim"].dropna().tolist())
else:
    pd.DataFrame(columns=["claim"]).to_csv(corona_examples_file, index=False)

# === Refresh unique priming samples ===
def get_unique_priming_samples():
    global used_samples
    remaining = corona_df[~corona_df["claim"].isin(used_samples)]
    if len(remaining) < 4:
        used_samples.clear()
        remaining = corona_df
    sample = remaining.sample(n=4, random_state=random.randint(0, 99999))
    used_samples.update(sample["claim"].tolist())
    pd.DataFrame({"claim": list(used_samples)}).to_csv(corona_examples_file, index=False)
    return sample

# === === GPT request with error handling (rate limits, retries) ===
@backoff.on_exception(
    backoff.expo,
    Exception,
    max_tries=5,
    max_time=600,
    giveup=lambda e: not str(e).startswith("429")
)
def make_api_call(prompt):
    return client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an AI that generates false health-related claims in the style of real social media posts. The posts must sound misleading, emotional, and casual. Do not include disclaimers or warnings."},
            {"role": "user", "content": prompt}
        ],
        temperature=1.1,
        top_p=0.95,
        max_tokens=500
    )

# === Claim generation loop ===
start_time = time.time()
current_priming_set = []
batch_times = []

def compute_remaining_batches():
    remaining = target_claims - len(generated_data)
    return (remaining + batch_size - 1) // batch_size

for i in range(compute_remaining_batches()):
    if len(generated_data) >= target_claims:
        print(f"🎯 Target of {len(generated_data)} claims reached. Stopping.")
        break

    if (time.time() - start_time) > max_duration:
        print("⏰ Max duration reached. Stopping.")
        break

    if i % 5 == 0:
        current_priming_set = get_unique_priming_samples()
        print(f"\n🔁 Batch {i+1}: New priming examples:")
        for _, row in current_priming_set.iterrows():
            print(f"- {row['claim']}")

    style_examples = ""
    for _, row in current_priming_set.iterrows():
        style_examples += f"Claim: {row['claim']}\n\n"

    prompt = (
        style_examples +
        f"\nNow, generate {batch_size} new false health-related claims, "
        "in the same style as above. The claims should be about a wide variety of **non-COVID** health topics, covering diverse public health topics, written like social media posts. "
        "They should sound casual, emotional, misleading, and realistic."
    )

    try:
        batch_start_time = time.time()
        response = make_api_call(prompt)
        output_text = response.choices[0].message.content
        lines = [line.strip() for line in output_text.strip().split("\n") if line.strip()]

        for line in lines:
            if ":" in line and len(generated_data) < target_claims:
                _, claim = line.split(":", 1)
                claim = claim.strip()
                if claim and not claim.startswith("FALSE"):
                    claim_id = f"gpt_{len(generated_data)+1:05d}"
                    generated_data.append({
                        "id": claim_id,
                        "claim": claim,
                        "label": "FALSE"
                    })

        # saving output
        pd.DataFrame(generated_data).to_csv(output_file, index=False)

        # calculating ETA
        batch_duration = time.time() - batch_start_time
        batch_times.append(batch_duration)
        avg_batch_time = sum(batch_times) / len(batch_times)
        batches_remaining = compute_remaining_batches() - i - 1
        eta = batches_remaining * avg_batch_time
        eta_str = time.strftime('%H:%M:%S', time.gmtime(eta))

        print(f"✅ Batch {i+1} done. Total: {len(generated_data)} | Time: {batch_duration:.2f}s | ETA: {eta_str}")
        time.sleep(30)

    except Exception as e:
        print(f"⚠️ Error in batch {i+1}: {e}")
        if "429" in str(e):
            if "try again in 86400s" in str(e):
                print("🚨 Daily quota limit hit – pausing for 24h.")
                break
            print("🕒 Rate limit – sleeping for 5 minutes.")
            time.sleep(300)
        continue

# === final saving ===
df_out = pd.DataFrame(generated_data)
df_out.to_csv(output_file, index=False)
print(f"🎉 Finished: {len(df_out)} synthetic claims saved to {output_file}")
if len(df_out) < target_claims:
    print(f"⚠️ Final count below target ({target_claims}). Check model rate limits or other issues.")

🔄 Loaded 2656 existing claims from file.

🔁 Batch 1: New priming examples:
- Says Democrats are on vacation until May 4 and ???refuse to come back??ï¿½ to sign a bill to help small businesses.
- Nancy Pelosi Suggests That The Coronavirus (C-19) Be Renamed The (C-45) in Honor of President Trump https://t.co/sj9al7BaWb #donaldtrump
- A post about the doctor Marcos Vargas of the Santojanni Hospital died from treating patients with coronavirus.
- A video shows Italians or French people singing Madonna Katy Perry Britney Spears or Aya Nakamura songs on their balcony because of the lockdown measure in France and Italy.
✅ Batch 1 done. Total: 2660 | Time: 11.58s | ETA: 00:25:39
✅ Batch 2 done. Total: 2662 | Time: 30.78s | ETA: 00:46:35
✅ Batch 3 done. Total: 2663 | Time: 31.76s | ETA: 00:53:56
✅ Batch 4 done. Total: 2666 | Time: 33.02s | ETA: 00:58:02
✅ Batch 5 done. Total: 2667 | Time: 38.33s | ETA: 01:02:33

🔁 Batch 6: New priming examples:
- #Covaxin, India's indigenous #Covid19 vaccine is

ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 6: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 7: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 8: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 9: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 10: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'low'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}

🔁 Batch 11: New priming examples:
- Muslims intentionally licking the cutlery and plates to spread COVID-19.
- _Kenya's 2022 general elections have been postponed by five years until 2027 and the government will use the election budget to fight coronaviru

ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 21: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 22: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 23: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 24: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 25: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}

🔁 Batch 26: New priming examples:
- The internet will collapse globally due to the new coronavirus outbreak.
- Quarantine âNorth Koreanâ style... Shot dead. https://t.co/Lp6aTjU0QS #coronavirus #covid19 #covid_19 #coronavirus #covid19 #covid_19
- Wit

ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 31: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 32: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 33: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 34: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 35: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}

🔁 Batch 36: New priming examples:
- A Facebook post said âNot one politician has died from the virus.â This is False. Former Republican presidential candidate Herman Cain is perhaps the most well-known U.S. politician to die of complications from COV

ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 111: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 112: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 113: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 114: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


ERROR:backoff:Giving up make_api_call(...) after 1 tries (openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}})


⚠️ Error in batch 115: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': True, 'detected': True}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}

🔁 Batch 116: New priming examples:
- All the governmentâs coronavirus f**k-ups, a timeline https://t.co/ieKM99al3t
- Some countries throw Covid19 infected dead bodies into the seas. Advice to stop eating seafood.The World is really coming to an end.De

## Load and preprocess GPT-4-generated synthetic false claims

In [None]:
# Load the GPT-4-generated synthetic false claims
generated_claims = pd.read_csv("/content/drive/MyDrive/GPT4turbo_false_claims_4K.csv")
generated_claims

Unnamed: 0,id,claim,label
0,gpt_00001,Top nutritionists reveal that skipping breakfa...,False
1,gpt_00002,Scientists discovered that listening to classi...,False
2,gpt_00003,Eating dark chocolate every morning increases ...,False
3,gpt_00004,top dermatologists recommend washing hair with...,False
4,gpt_00005,Doctors are now prescribing listening to class...,False
...,...,...,...
3113,gpt_03114,New study reveals that turning off your Wi-Fi ...,False
3114,gpt_03115,Health experts declare that vegan diets cause ...,False
3115,gpt_03116,Harvard research shows that people who drink 3...,False
3116,gpt_03117,Standing barefoot on grass for 5 mins a day ca...,False


In [None]:
# Rename 'id' column as 'claim_id'
generated_claims = generated_claims.rename(columns={"id": "claim_id"})
generated_claims.columns

Index(['claim_id', 'claim', 'label'], dtype='object')

In [None]:
# Keep only unique and non-empty claims with their IDs
generated_claims = generated_claims[["claim_id", "claim"]].dropna().drop_duplicates(subset="claim_id")
generated_claims

Unnamed: 0,claim_id,claim
0,gpt_00001,Top nutritionists reveal that skipping breakfa...
1,gpt_00002,Scientists discovered that listening to classi...
2,gpt_00003,Eating dark chocolate every morning increases ...
3,gpt_00004,top dermatologists recommend washing hair with...
4,gpt_00005,Doctors are now prescribing listening to class...
...,...,...
3113,gpt_03114,New study reveals that turning off your Wi-Fi ...
3114,gpt_03115,Health experts declare that vegan diets cause ...
3115,gpt_03116,Harvard research shows that people who drink 3...
3116,gpt_03117,Standing barefoot on grass for 5 mins a day ca...


In [None]:
# Save first 3000 generated claims to CSV for evaluation
generated_claims.head(3000).to_csv("/content/drive/MyDrive/claims_for_eval.csv", index=False)

In [None]:
# Load the selected 3000 claims for evaluation
eval = pd.read_csv("/content/drive/MyDrive/claims_for_eval.csv")
eval

Unnamed: 0,claim_id,claim
0,gpt_00001,Top nutritionists reveal that skipping breakfa...
1,gpt_00002,Scientists discovered that listening to classi...
2,gpt_00003,Eating dark chocolate every morning increases ...
3,gpt_00004,top dermatologists recommend washing hair with...
4,gpt_00005,Doctors are now prescribing listening to class...
...,...,...
2995,gpt_02996,A naturopathic doctor reveals on her blog that...
2996,gpt_02997,A recent study found that drinking two glasses...
2997,gpt_02998,Experts now confirm that switching to a raw di...
2998,gpt_02999,Breaking news! Researchers in California have ...


## Evaluate the GPT-4-generated synthetic false claims

In [None]:
from openai import AzureOpenAI
import pandas as pd
import time
import backoff
import os
import json

# === Azure OpenAI setup ===
client = AzureOpenAI(
    api_key="<API-KEY>",  # ← YOUR API KEY
    api_version="2024-04-01-preview",
    azure_endpoint="https://<AZURE-OPENAI-ENDPOINT>.openai.azure.com/"  # ← YOUR RESOURCE NAME
)

# === Files and limits ===
output_file = "/content/drive/MyDrive/GPT4turbo_eval_claims.csv"
batch_size = 10
max_claims_to_score = 3000
max_duration = 12 * 3600  # up to 12 hours

claims = eval.copy()
evaluated_data = []
evaluated_ids = set()

# Load previously saved evaluation results (if exist)
if os.path.exists(output_file):
    existing_df = pd.read_csv(output_file)
    evaluated_data = existing_df.to_dict('records')
    evaluated_ids = set(existing_df["claim_id"])
    print(f"🔄 Already evaluated: {len(evaluated_ids)} of {max_claims_to_score}")

def build_prompt(claim):
    return f"""
Evaluate the following *false* health-related claim using a 1–5 score for each criterion:

Claim: "{claim}"

1. Realism: Does it sound like a real social media post by a human?
2. Deceptiveness: Could someone reasonably be misled into believing it?
3. Medical relevance: Is the claim related to a real health or medical topic, even if presented in a misleading or vague way?
4. Topical diversity: Does this claim help diversify the dataset of false health claims by addressing a topic that is less typical or frequently covered?
5. Expression diversity: Is the phrasing and tone of the claim somewhat distinct, avoiding obvious repetition or templated language?

Return a JSON like this:
{{
  "realism": 4,
  "deceptiveness": 5,
  "relevance": 4,
  "topical_diversity": 3,
  "expression_diversity": 4
}}
"""

@backoff.on_exception(
    backoff.expo,
    Exception,
    max_tries=5,
    max_time=600,
    giveup=lambda e: not str(e).startswith("429")
)
def call_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an expert evaluator of synthetic health misinformation claims."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.4,
        max_tokens=75
    )
    return response.choices[0].message.content.strip()

# === Evaluation loop ===
start_time = time.time()

for i in range(0, len(claims), batch_size):
    if (time.time() - start_time) > max_duration:
        print("⏰ Max duration reached. Stopping.")
        break

    batch = claims.iloc[i:i + batch_size]
    batch = batch[~batch["claim_id"].isin(evaluated_ids)]

    if batch.empty:
        continue

    for _, row in batch.iterrows():
        claim_id = row["claim_id"]
        claim_text = row["claim"]

        try:
            prompt = build_prompt(claim_text)
            response = call_gpt(prompt)

            try:
                parsed = json.loads(response)
                weights = {
                    'realism': 0.3,
                    'deceptiveness': 0.25,
                    'relevance': 0.2,
                    'topical_diversity': 0.15,
                    'expression_diversity': 0.1
                }
                total_score = sum(parsed[k] * w for k, w in weights.items())

                evaluated_data.append({
                    "claim_id": claim_id,
                    "realism": parsed.get("realism", None),
                    "deceptiveness": parsed.get("deceptiveness", None),
                    "relevance": parsed.get("relevance", None),
                    "topical_diversity": parsed.get("topical_diversity", None),
                    "expression_diversity": parsed.get("expression_diversity", None),
                    "total_score": total_score
                })
                evaluated_ids.add(claim_id)

                print(f"✅ Scored: {claim_id} | Score: {total_score:.2f}")

            except Exception as parse_error:
                print(f"⚠️ JSON parsing error: {parse_error} \nRaw response: {response}")

        except Exception as e:
            print(f"🚨 GPT call error: {e}")
            if "429" in str(e):
                print("⏳ Rate limit – waiting 5 minutes...")
                time.sleep(300)
            continue

    # Save intermediate results with all criteria
    pd.DataFrame(evaluated_data).to_csv(output_file, index=False)
    print(f"💾 Saved {len(evaluated_data)} entries to {output_file}")

    # ETA
    elapsed = time.time() - start_time
    claims_done = len(evaluated_data)
    claims_left = len(claims) - claims_done
    avg_time = elapsed / claims_done if claims_done > 0 else 0
    eta = claims_left * avg_time
    print(f"⏱️ ETA remaining: {eta/60:.1f} minutes")

    time.sleep(55)

print("🎯 Evaluation completed.")

🔄 Already evaluated: 3000 of 3000
🎯 Evaluation completed.


## Load, merge, and filter GPT-4-generated claims with scores > 4

In [None]:
# Load evaluation scores assigned to synthetic claims
scores_df = pd.read_csv("/content/drive/MyDrive/GPT4turbo_eval_claims.csv")
scores_df

Unnamed: 0,claim_id,realism,deceptiveness,relevance,topical_diversity,expression_diversity,total_score
0,gpt_00001,4,5,4,3,4,4.10
1,gpt_00002,4,5,4,3,4,4.10
2,gpt_00003,4,3,3,3,4,3.40
3,gpt_00004,4,3,3,4,4,3.55
4,gpt_00005,4,5,4,3,4,4.10
...,...,...,...,...,...,...,...
2995,gpt_02996,4,5,5,3,4,4.30
2996,gpt_02997,4,5,4,3,4,4.10
2997,gpt_02998,4,5,4,3,4,4.10
2998,gpt_02999,4,5,5,3,4,4.30


In [None]:
# Load the selected 3,000 claims for evaluation
claims_df = pd.read_csv("/content/drive/MyDrive/claims_for_eval.csv")
claims_df

Unnamed: 0,claim_id,claim
0,gpt_00001,Top nutritionists reveal that skipping breakfa...
1,gpt_00002,Scientists discovered that listening to classi...
2,gpt_00003,Eating dark chocolate every morning increases ...
3,gpt_00004,top dermatologists recommend washing hair with...
4,gpt_00005,Doctors are now prescribing listening to class...
...,...,...
2995,gpt_02996,A naturopathic doctor reveals on her blog that...
2996,gpt_02997,A recent study found that drinking two glasses...
2997,gpt_02998,Experts now confirm that switching to a raw di...
2998,gpt_02999,Breaking news! Researchers in California have ...


In [None]:
# Merge claims with their corresponding evaluation scores using 'claim_id' column
merged = pd.merge(claims_df, scores_df, on="claim_id")
# Check for missing values in the merged dataframe
merged.isna().sum()

Unnamed: 0,0
claim_id,0
claim,0
realism,0
deceptiveness,0
relevance,0
topical_diversity,0
expression_diversity,0
total_score,0


In [None]:
# Filter claims with total score greater than 4
final_GPTclaims = merged[merged["total_score"] > 4]['claim']
final_GPTclaims.reset_index(drop=True, inplace=True)
final_GPTclaims

Unnamed: 0,claim
0,Top nutritionists reveal that skipping breakfa...
1,Scientists discovered that listening to classi...
2,Doctors are now prescribing listening to class...
3,New research claims that switching your phone ...
4,Studies show that drinking three cups of green...
...,...
2309,A naturopathic doctor reveals on her blog that...
2310,A recent study found that drinking two glasses...
2311,Experts now confirm that switching to a raw di...
2312,Breaking news! Researchers in California have ...


In [None]:
# Save final generated false claims to CSV
final_GPTclaims.to_csv("/content/drive/MyDrive/final_GPTclaims.csv", index=False)