In [1]:
# Load libraries
import os
import pandas as pd

In [2]:
# Set file path
in_path = '/Users/tylund/Library/CloudStorage/Dropbox/1. Side Projects/2025.1-Deepfake Threat Landscape/1-data/deepfake-incidents'

In [3]:
# Load data sets
aiid_incidents_df = pd.read_csv(os.path.join(in_path, 'aiid_incidents_deepfakes.csv'))
aiid_class_df = pd.read_csv(os.path.join(in_path, 'aiid_class_deepfakes.csv'))
aaic_df = pd.read_csv(os.path.join(in_path, 'aaic_deepfakes.csv'))

In [4]:
# Subset columns
aaic_df = aaic_df[['Incident_ID', 'Headline','is_deepfake', 'comment']]
aiid_class_df = aiid_class_df[['Incident ID', 'Known AI Goal Snippets', 'is_deepfake', 'comment']]
aiid_incidents_df = aiid_incidents_df[['incident_id', 'description', 'title', 'is_deepfake', 'comment']]

In [5]:
# Normalize columns
aaic_df = aaic_df.rename(columns={
    'Incident_ID': 'incident_id',
    'Headline': 'title',
    'Date': 'date'
}
)

aiid_class_df = aiid_class_df.rename(columns={
    'Incident ID': 'incident_id',
    'Known AI Goal Snippets': 'title'
}
)

aiid_incidents_df = aiid_incidents_df.rename(columns={
    'incident_ID': 'incident_id'
}
)

In [6]:
# Combine AIID data frames
aiid_class_df['description'] = ''

aiid_df = pd.concat([aiid_class_df, aiid_incidents_df], ignore_index=True)

# Create a flag for whether description exists
aiid_df["has_description"] = aiid_df["description"].notna() & (aiid_df["description"] != "")

# Sort by this flag so rows with description come first
aiid_df = aiid_df.sort_values(by="has_description", ascending=False)

# Drop duplicates, keeping the first occurrence (which will have a description if it exists)
aiid_df = aiid_df.drop_duplicates(subset="incident_id", keep="first")

# Drop the helper column
aiid_df = aiid_df.drop(columns=["has_description"])

## OpenAI API Implementation

In [7]:
from dotenv import load_dotenv
from openai import OpenAI
from tqdm import tqdm
from openai import RateLimitError, APIError, Timeout
import time

load_dotenv()  # loads variables from .env
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Enable tqdm for pandas
tqdm.pandas(desc="Classifying events")

In [8]:
def gpt_classification_wrapper(title: str, description: str = "", max_retries: int = 5) -> str:
    """
    Classify whether an incident is a deepfake (TRUE/FALSE) using GPT-4o mini.
    Handles retries on rate limit or transient errors.
    """
    # Handle None input gracefully
    title = title or ""
    description = description or ""

    prompt = f"""
You are an expert in AI ethics and threat analysis. Your task is to classify whether the following incident aligns with the definition of a deepfake provided below.

Definition:  
A deepfake is a realistic image, video, or audio recording created with AI/deep learning techniques that depicts or impersonates a person (real or synthetic). This includes:
- Identifiable individuals  
- Generic impersonations (e.g., fake professionals, fraud voices)  
- Synthetic personas designed to appear real  

Excluded cases: depictions of events, objects, or scenes (e.g., protests, buildings, disasters).  

Key criteria:  
- Must be audio, video, or image content  
- Must be realistic, believable, or seemingly authentic  
- Must be created with AI/deep learning  

Incident:
<title>
{title}
</title>
<description>
{description}
</description>

Question: Based on the definition and criteria above, does this incident qualify as a deepfake?  
Answer ONLY with TRUE or FALSE.
"""


    retries = 0
    while retries < max_retries:
        try:
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": prompt}],
                temperature=0  # deterministic
            )
            answer = response.choices[0].message.content.strip().upper()

            # Safety check → enforce only TRUE/FALSE output
            if answer in ["TRUE", "FALSE"]:
                return answer
            else:
                return "INVALID"

        except (RateLimitError, APIError, Timeout) as e:
            retries += 1
            wait_time = 2 ** retries
            print(f"API error: {e}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
        except Exception as e:
            print(f"Unexpected error: {e}")
            return "ERROR"

    # If max retries exceeded
    return "ERROR"

In [9]:
def classify_events(df: pd.DataFrame, title_col="title", desc_col="description") -> pd.DataFrame:
    """
    Apply GPT-based deepfake classification to the dataframe.
    Works with datasets that may or may not have a description column.
    Compares GPT classification with manual label 'is_deepfake' and adds a match column.
    Displays a progress bar during classification.
    """
    df = df.copy()

    # Ensure manual label exists
    if "is_deepfake" not in df.columns:
        raise ValueError("Dataframe must contain an 'is_deepfake' column for manual labels.")

    # Determine if description column exists
    has_description = desc_col in df.columns and df[desc_col].notna().any()

    # Apply GPT classification with progress bar
    if has_description:
        df["gpt_classification"] = df.progress_apply(
            lambda row: gpt_classification_wrapper(row[title_col], row[desc_col]),
            axis=1
        )
    else:
        df["gpt_classification"] = df[title_col].progress_apply(
            lambda t: gpt_classification_wrapper(t, "")
        )

    # Add match column
    df["match"] = df["is_deepfake"].astype(str).str.upper() == df["gpt_classification"]

    return df

In [10]:
#aiid_gpt_results = classify_events(aiid_df)

Classifying events: 100%|█████████████████████| 401/401 [05:47<00:00,  1.15it/s]


In [11]:
aiid_gpt_results.to_csv(os.path.join(in_path, 'aiid_gpt_results.csv'))

In [12]:
aiid_gpt_results['match'].value_counts()

match
True     359
False     42
Name: count, dtype: int64

In [13]:
#aaic_gpt_results = classify_events(aaic_df)

Classifying events: 100%|█████████████████████| 282/282 [03:38<00:00,  1.29it/s]


In [14]:
aaic_gpt_results.to_csv(os.path.join(in_path, 'aaic_gpt_results.csv'))

In [15]:
aaic_gpt_results['match'].value_counts()

match
True     211
False     71
Name: count, dtype: int64

## Manual Comparison

In [9]:
# Load data sets
aiid_gpt_df = pd.read_csv(os.path.join(in_path, 'aiid_gpt_results.csv'))
aaic_gpt_df = pd.read_csv(os.path.join(in_path, 'aaic_gpt_results.csv'))

In [27]:
# Drop columns to minimize bias in comparison
cols_to_drop = ['is_deepfake', 'comment', 'gpt_classification', 'match', 'final', 'justification']

# Select random sample for manual comparison
aiid_sample = aiid_gpt_df[aiid_gpt_df['final']].sample(n=15, random_state=42).drop(cols_to_drop, axis=1)
aaic_sample = aaic_gpt_df[aaic_gpt_df['final']].sample(n=15, random_state=42).drop(cols_to_drop, axis=1)

# Merge aaic_sample with links
aaic_links = pd.read_csv(os.path.join(in_path, 'aaic_deepfakes.csv'))[['Incident_ID', 'Link']].rename(
    columns={'Incident_ID': 'incident_id'})

aaic_sample = pd.merge(aaic_sample, aaic_links, on='incident_id', how='left')

# Export for manual comparison
aaic_sample.to_csv(os.path.join(in_path, 'aaic_sample.csv'), index=False)
aiid_sample.to_csv(os.path.join(in_path, 'aiid_sample.csv'), index=False)

Unnamed: 0,incident_id,title,is_deepfake,comment,gpt_classification,match,final,justification,Link
0,AIAAIC1425,Al video depicts Indonesian presidential hopef...,True,none,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
1,AIAAIC1100,Remini AI photo enhancer generates 'child porn',True,borderline: generic impersonation,False,False,True,Generic impersonation,https://www.aiaaic.org/aiaaic-repository/ai-al...
2,AIAAIC1922,Gabby Petito docuseries sparks backlash for us...,True,none,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
3,AIAAIC0921,Deepfake Mark Ruffalo scams manga artist Chika...,True,none,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
4,AIAAIC0676,Anthony Bourdain deepfake voice results in bac...,True,none,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
5,AIAAIC1033,Vladimir Putin declares Russia martial law dee...,True,none,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
6,AIAAIC0944,Pro-China deepfake 'spamouflage' campaign,True,none,False,False,True,Fully synthetic persona,https://www.aiaaic.org/aiaaic-repository/ai-al...
7,AIAAIC1788,"Pensioner loses NZD 224,000 to deepfake Bitcoi...",True,borderline: generic impersonation,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
8,AIAAIC1910,AI-generated video condemns Kanye West anti-se...,True,none,False,False,True,Identifiable individuals,https://www.aiaaic.org/aiaaic-repository/ai-al...
9,AIAAIC0396,Queen Elizabeth II impersonated in deepfake Ch...,True,none,True,True,True,Agreement,https://www.aiaaic.org/aiaaic-repository/ai-al...
