In [35]:
import sys
import os
import json
import pandas as pd
import numpy as np
import ast
import itertools
import random
import copy
from datetime import datetime
from collections import Counter, defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

sys.path.append("../")

from src.helpers import io
from src.classes.dataset import Dataset
from src.classes.annotation_set import AnnotationSet

%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
# FILL IN:
PATH_TO_DATASET = "../data/static/wildchat4k-raw.json"
DATASET_ID = "wildchat_1m"
PATH_TO_ANNOTATIONS_DIR = "../res/gpto3mini-json-wildchat"
OUTDIR = "data/annotation_analysis_v0/time_geography"
os.makedirs(OUTDIR, exist_ok=True)

# Load dataset (w/o annotations)
dataset = Dataset.load(PATH_TO_DATASET)

# Load annotations into dataset
for fpath in io.listdir_nohidden(PATH_TO_ANNOTATIONS_DIR):
    annotation_set = AnnotationSet.load_automatic(path=fpath, source="automatic_v0")
    dataset.add_annotations(annotation_set)

prompt-multi_turn_relationship: 0 / 10127 failed due to invalid annotations.
prompt-interaction_features: 0 / 10127 failed due to invalid annotations.
turn-sensitive_use_flags: 0 / 10127 failed due to invalid annotations.
turn-topic: 1 / 10127 failed due to invalid annotations.
response-interaction_features: 0 / 10127 failed due to invalid annotations.
prompt-function_purpose: 6 / 10127 failed due to invalid annotations.
prompt-media_format: 0 / 10127 failed due to invalid annotations.
response-media_format: 0 / 10127 failed due to invalid annotations.
response-answer_form: 0 / 10127 failed due to invalid annotations.


# Research Questions: How do dialogue characteristics differ over time and geography across the whole set of conversations?

Characteristics:
- Prompt: 
    - "function_purpose", 
    - ("level_id": "prompt")"interaction_features", 
    - ("level_id": "prompt")"media_format", 
    - (Multi-turn relationships?)
- Response: 
    - "answer_form", 
    - ("level_id": "response")"interaction_features", 
    - ("level_id": "response")"media_format"
- Turn: 
    - "topic", 
    - "sensitive_use_flags"
- Conversation: 
    - "model", 
    - (Languages?, Conversation length/stats?)


# Temporal Analysis

In [37]:
# Helper: extract month from timestamp
def extract_month(timestamp):
    try:
        dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
        return dt.strftime("%Y-%m")
    except Exception:
        return None

# Temporal analysis function
def analyze_temporal_distribution(label_key, level):
    monthly_counts = defaultdict(Counter)

    for conv in dataset.data:
        month = extract_month(conv.time)
        if not month:
            continue

        for msg in conv.conversation:
            if level == "conversation":
                if msg.turn == 0:  # log only once per conversation
                    label = getattr(conv, label_key, None)
                    if label:
                        labels = label if isinstance(label, list) else [label]
                        for l in labels:
                            monthly_counts[month][l] += 1
                break

            elif level == "turn" and f"automatic_v0-turn_{label_key}" in msg.metadata:
                label = msg.metadata[f"automatic_v0-turn_{label_key}"].value
                labels = label if isinstance(label, list) else [label]
                for l in labels:
                    monthly_counts[month][l] += 1

            elif level == "prompt" and msg.role == "user" and f"automatic_v0-prompt_{label_key}" in msg.metadata:
                label = msg.metadata[f"automatic_v0-prompt_{label_key}"].value
                labels = label if isinstance(label, list) else [label]
                for l in labels:
                    monthly_counts[month][l] += 1

            elif level == "response" and msg.role == "assistant" and f"automatic_v0-response_{label_key}" in msg.metadata:
                label = msg.metadata[f"automatic_v0-response_{label_key}"].value
                labels = label if isinstance(label, list) else [label]
                for l in labels:
                    monthly_counts[month][l] += 1

    # Convert to DataFrame
    df_counts = pd.DataFrame(monthly_counts).fillna(0).T
    df_counts.index.name = "Month"
    df_counts.sort_index(inplace=True)

    # Ensure numeric and drop empty
    df_counts = df_counts.apply(pd.to_numeric, errors='coerce').fillna(0).astype(int)
    df_counts = df_counts.loc[(df_counts != 0).any(axis=1)]

    if df_counts.empty:
        print(f"[Skipped] No data available to plot for {label_key} @ {level}")
        return

    # Normalize to percent
    df_pct = df_counts.div(df_counts.sum(axis=1), axis=0) * 100

    # Combine into formatted table
    df_formatted = df_counts.astype(str) + " (" + df_pct.round(1).astype(str) + "%)"

    print(f"\n=== Monthly Distribution Table for {label_key} @ {level} ===")
    display(df_formatted)

    # Plot
    fig, ax = plt.subplots(figsize=(14, 7))
    df_pct.plot(kind="bar", stacked=True, ax=ax, colormap="tab20")
    plt.title(f"Monthly Distribution of {label_key.replace('_', ' ').title()}")
    plt.ylabel("Percentage")
    plt.xlabel("Month")
    plt.xticks(rotation=45, ha="right")
    ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title="Labels", fontsize='small', title_fontsize='medium')
    plt.tight_layout()
    plot_path = os.path.join(OUTDIR, f"temporal_{label_key}_{level}.png")
    plt.savefig(plot_path, bbox_inches="tight")
    plt.close()
    print(f"Saved plot: {plot_path}")

# List of all dimensions to analyze
targets = [
    ("function_purpose", "prompt"),
    ("interaction_features", "prompt"),
    ("media_format", "prompt"),
    ("answer_form", "response"),
    ("interaction_features", "response"),
    ("media_format", "response"),
    ("topic", "turn"),
    ("sensitive_use_flags", "turn"),
    ("model", "conversation"),
]

# Run all analyses
for label_key, level in targets:
    analyze_temporal_distribution(label_key, level)

[Skipped] No data available to plot for function_purpose @ prompt
[Skipped] No data available to plot for interaction_features @ prompt
[Skipped] No data available to plot for media_format @ prompt
[Skipped] No data available to plot for answer_form @ response
[Skipped] No data available to plot for interaction_features @ response
[Skipped] No data available to plot for media_format @ response
[Skipped] No data available to plot for topic @ turn
[Skipped] No data available to plot for sensitive_use_flags @ turn

=== Monthly Distribution Table for model @ conversation ===


Unnamed: 0_level_0,gpt-3.5-turbo-0301,gpt-4-0314,gpt-3.5-turbo-0613,gpt-4-1106-preview,gpt-3.5-turbo-0125,gpt-4-0125-preview
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-04,218 (70.8%),90 (29.2%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-05,246 (79.9%),62 (20.1%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-06,239 (77.6%),38 (12.3%),31 (10.1%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-07,0 (0.0%),0 (0.0%),308 (100.0%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-08,0 (0.0%),0 (0.0%),308 (100.0%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-09,0 (0.0%),0 (0.0%),308 (100.0%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-10,0 (0.0%),0 (0.0%),308 (100.0%),0 (0.0%),0 (0.0%),0 (0.0%)
2023-11,0 (0.0%),0 (0.0%),267 (86.7%),41 (13.3%),0 (0.0%),0 (0.0%)
2023-12,0 (0.0%),0 (0.0%),180 (58.4%),128 (41.6%),0 (0.0%),0 (0.0%)
2024-01,0 (0.0%),0 (0.0%),120 (39.1%),187 (60.9%),0 (0.0%),0 (0.0%)


Saved plot: data/annotation_analysis_v0/time_geography/temporal_model_conversation.png


# Geographic Analysis

In [34]:
def extract_country(geo_string):
    if not geo_string:
        return None
    return geo_string.split(";")[0].strip()

def analyze_geographic_distribution(label_key, level, top_n=15):
    geo_counts = defaultdict(Counter)

    for conv in dataset.data:
        country = extract_country(conv.geography)
        if not country:
            continue

        for msg in conv.conversation:
            if level == "conversation":
                if msg.turn == 0:
                    label = getattr(conv, label_key, None)
                    if label:
                        labels = label if isinstance(label, list) else [label]
                        for l in labels:
                            geo_counts[country][l] += 1
                break

            elif level == "turn" and f"automatic_v0-turn_{label_key}" in msg.metadata:
                label = msg.metadata[f"automatic_v0-turn_{label_key}"].value
                labels = label if isinstance(label, list) else [label]
                for l in labels:
                    geo_counts[country][l] += 1

            elif level == "prompt" and msg.role == "user" and f"automatic_v0-prompt_{label_key}" in msg.metadata:
                label = msg.metadata[f"automatic_v0-prompt_{label_key}"].value
                labels = label if isinstance(label, list) else [label]
                for l in labels:
                    geo_counts[country][l] += 1

            elif level == "response" and msg.role == "assistant" and f"automatic_v0-response_{label_key}" in msg.metadata:
                label = msg.metadata[f"automatic_v0-response_{label_key}"].value
                labels = label if isinstance(label, list) else [label]
                for l in labels:
                    geo_counts[country][l] += 1

    # Convert to DataFrame
    df_counts = pd.DataFrame(geo_counts).fillna(0).T
    df_counts.index.name = "Country"
    df_counts.sort_index(inplace=True)

    # Keep only top countries
    df_counts['__total__'] = df_counts.sum(axis=1)
    df_top = df_counts.sort_values('__total__', ascending=False).head(top_n).drop(columns="__total__")

    if df_top.empty:
        print(f"[Skipped] No data available to plot for {label_key} @ {level}")
        return

    # Normalize to percent
    df_pct = df_top.div(df_top.sum(axis=1), axis=0) * 100

    # Combine into formatted table
    df_formatted = df_top.astype(int).astype(str) + " (" + df_pct.round(1).astype(str) + "%)"

    # Print table
    print(f"\n=== Geographic Distribution Table for {label_key} @ {level} ===")
    display(df_formatted)

    # Plot
    fig, ax = plt.subplots(figsize=(14, 8))
    df_pct.plot(kind="bar", stacked=True, ax=ax, colormap="tab20")

    plt.title(f"{'Geographic' if 'Country' in df_pct.index.name else 'Monthly'} Distribution of {label_key.replace('_', ' ').title()}")
    plt.ylabel("Percentage")
    plt.xlabel(df_pct.index.name)
    plt.xticks(rotation=45, ha="right")
    ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title="Labels", fontsize='small', title_fontsize='medium')
    plt.tight_layout()
    plot_path = os.path.join(OUTDIR, f"{'geo' if 'Country' in df_pct.index.name else 'temporal'}_{label_key}_{level}.png")
    plt.savefig(plot_path, bbox_inches='tight')  # Ensure legend is not cut off
    plt.close()
    print(f"Saved plot: {plot_path}")


# Full set of dimensions to analyze
targets = [
    ("function_purpose", "prompt"),
    ("interaction_features", "prompt"),
    ("media_format", "prompt"),
    ("answer_form", "response"),
    ("interaction_features", "response"),
    ("media_format", "response"),
    ("topic", "turn"),
    ("sensitive_use_flags", "turn"),
    ("model", "conversation"),
]

for label_key, level in targets:
    analyze_geographic_distribution(label_key, level)


=== Geographic Distribution Table for function_purpose @ prompt ===


Unnamed: 0_level_0,Information analysis (Content explanation / interpretation),"Advice, Guidance, & Recommendations (Activity / product recommendations)","Advice, Guidance, & Recommendations (Instructions / How-to)",Information retrieval (general info from web),Content generation (code),Content generation (academic / essay writing),"Content generation (general prose, discussion or explanation)",Editorial & formatting (Content summarization),No clear task,Content generation (administrative writing),...,Content generation (prompts for another AI system),Role-play / social simulation (platonic companion / friend),Content generation (code documentation),"Reasoning (Verbal problems, logic games, puzzles or riddles)","Advice, Guidance, & Recommendations (Social and personal advice)",Editorial & formatting (Code style and re-formatting),Information analysis (Content Classification),Information analysis (Ranking or Scoring),Information analysis (Other content analysis / description),"Advice, Guidance, & Recommendations (Action planning (scheduling, robotics))"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
United States,100 (5.5%),10 (0.6%),73 (4.0%),164 (9.1%),57 (3.1%),46 (2.5%),125 (6.9%),19 (1.0%),143 (7.9%),26 (1.4%),...,77 (4.2%),4 (0.2%),1 (0.1%),8 (0.4%),6 (0.3%),2 (0.1%),0 (0.0%),1 (0.1%),0 (0.0%),0 (0.0%)
Russia,141 (8.9%),5 (0.3%),146 (9.2%),138 (8.7%),177 (11.1%),55 (3.5%),137 (8.6%),11 (0.7%),106 (6.7%),7 (0.4%),...,4 (0.3%),11 (0.7%),1 (0.1%),2 (0.1%),5 (0.3%),5 (0.3%),7 (0.4%),0 (0.0%),0 (0.0%),0 (0.0%)
China,118 (11.2%),8 (0.8%),121 (11.5%),148 (14.1%),92 (8.8%),38 (3.6%),72 (6.9%),16 (1.5%),94 (9.0%),16 (1.5%),...,113 (10.8%),1 (0.1%),2 (0.2%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Hong Kong,34 (8.2%),1 (0.2%),28 (6.8%),40 (9.7%),23 (5.6%),51 (12.3%),22 (5.3%),4 (1.0%),28 (6.8%),14 (3.4%),...,66 (15.9%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.2%),0 (0.0%),0 (0.0%),0 (0.0%),2 (0.5%),0 (0.0%)
United Kingdom,12 (3.3%),0 (0.0%),8 (2.2%),31 (8.4%),6 (1.6%),14 (3.8%),54 (14.7%),1 (0.3%),18 (4.9%),6 (1.6%),...,7 (1.9%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.3%)
Germany,28 (8.2%),4 (1.2%),17 (5.0%),34 (9.9%),14 (4.1%),4 (1.2%),19 (5.6%),4 (1.2%),75 (21.9%),9 (2.6%),...,1 (0.3%),3 (0.9%),0 (0.0%),0 (0.0%),2 (0.6%),2 (0.6%),0 (0.0%),1 (0.3%),0 (0.0%),0 (0.0%)
France,27 (8.6%),2 (0.6%),13 (4.2%),35 (11.2%),19 (6.1%),7 (2.2%),46 (14.7%),6 (1.9%),32 (10.2%),0 (0.0%),...,2 (0.6%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.3%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.3%),0 (0.0%)
Canada,12 (3.8%),0 (0.0%),20 (6.4%),35 (11.2%),5 (1.6%),15 (4.8%),37 (11.8%),7 (2.2%),25 (8.0%),3 (1.0%),...,8 (2.6%),5 (1.6%),4 (1.3%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Philippines,13 (4.4%),0 (0.0%),12 (4.1%),19 (6.4%),1 (0.3%),3 (1.0%),28 (9.5%),2 (0.7%),32 (10.8%),3 (1.0%),...,2 (0.7%),1 (0.3%),0 (0.0%),1 (0.3%),0 (0.0%),0 (0.0%),1 (0.3%),0 (0.0%),0 (0.0%),0 (0.0%)
India,16 (6.5%),8 (3.3%),29 (11.8%),26 (10.6%),10 (4.1%),6 (2.4%),41 (16.7%),7 (2.9%),18 (7.3%),14 (5.7%),...,3 (1.2%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.4%),0 (0.0%),0 (0.0%),3 (1.2%),0 (0.0%),0 (0.0%)


Saved plot: data/annotation_analysis_v0/time_geography/geo_function_purpose_prompt.png

=== Geographic Distribution Table for interaction_features @ prompt ===


Unnamed: 0_level_0,None,Courtesy/Politeness,Role-assignment,Reinforcement/Praise/Scolding,Jailbreak attempt,Companionship,Reinforcement/Praise
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
United States,1539 (82.2%),131 (7.0%),146 (7.8%),23 (1.2%),23 (1.2%),11 (0.6%),0 (0.0%)
Russia,1398 (87.0%),68 (4.2%),104 (6.5%),17 (1.1%),13 (0.8%),5 (0.3%),1 (0.1%)
China,817 (75.7%),117 (10.8%),137 (12.7%),7 (0.6%),1 (0.1%),0 (0.0%),0 (0.0%)
Hong Kong,262 (60.0%),64 (14.6%),107 (24.5%),4 (0.9%),0 (0.0%),0 (0.0%),0 (0.0%)
United Kingdom,342 (93.2%),14 (3.8%),8 (2.2%),1 (0.3%),2 (0.5%),0 (0.0%),0 (0.0%)
Germany,281 (81.2%),25 (7.2%),12 (3.5%),22 (6.4%),4 (1.2%),2 (0.6%),0 (0.0%)
France,263 (83.8%),14 (4.5%),20 (6.4%),12 (3.8%),5 (1.6%),0 (0.0%),0 (0.0%)
Canada,263 (83.8%),20 (6.4%),21 (6.7%),5 (1.6%),5 (1.6%),0 (0.0%),0 (0.0%)
Philippines,276 (92.9%),12 (4.0%),6 (2.0%),0 (0.0%),3 (1.0%),0 (0.0%),0 (0.0%)
India,220 (90.9%),12 (5.0%),10 (4.1%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)


Saved plot: data/annotation_analysis_v0/time_geography/geo_interaction_features_prompt.png

=== Geographic Distribution Table for media_format @ prompt ===


Unnamed: 0_level_0,Natural language,Formatted enumeration/itemization (bullets/lists),Code,URLs,Formatted enumeration/itemization,HTML,Images,Math/symbols,Likely retrieved/pasted content,Other,Audio,Charts/Graphs
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Russia,1566 (63.2%),348 (14.0%),273 (11.0%),31 (1.3%),128 (5.2%),14 (0.6%),6 (0.2%),80 (3.2%),30 (1.2%),2 (0.1%),1 (0.0%),0 (0.0%)
United States,1783 (72.2%),339 (13.7%),114 (4.6%),17 (0.7%),106 (4.3%),7 (0.3%),9 (0.4%),68 (2.8%),20 (0.8%),3 (0.1%),1 (0.0%),2 (0.1%)
China,1035 (62.2%),301 (18.1%),159 (9.6%),14 (0.8%),123 (7.4%),13 (0.8%),3 (0.2%),12 (0.7%),3 (0.2%),0 (0.0%),0 (0.0%),0 (0.0%)
Hong Kong,408 (61.2%),145 (21.7%),30 (4.5%),9 (1.3%),54 (8.1%),4 (0.6%),1 (0.1%),16 (2.4%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
United Kingdom,363 (71.0%),89 (17.4%),10 (2.0%),6 (1.2%),26 (5.1%),1 (0.2%),7 (1.4%),8 (1.6%),0 (0.0%),1 (0.2%),0 (0.0%),0 (0.0%)
Germany,332 (66.5%),76 (15.2%),18 (3.6%),4 (0.8%),23 (4.6%),13 (2.6%),1 (0.2%),19 (3.8%),13 (2.6%),0 (0.0%),0 (0.0%),0 (0.0%)
Canada,305 (69.2%),74 (16.8%),18 (4.1%),4 (0.9%),28 (6.3%),0 (0.0%),1 (0.2%),8 (1.8%),2 (0.5%),0 (0.0%),0 (0.0%),1 (0.2%)
France,305 (73.1%),49 (11.8%),34 (8.2%),4 (1.0%),16 (3.8%),1 (0.2%),0 (0.0%),5 (1.2%),2 (0.5%),1 (0.2%),0 (0.0%),0 (0.0%)
India,238 (59.2%),91 (22.6%),24 (6.0%),3 (0.7%),19 (4.7%),2 (0.5%),10 (2.5%),9 (2.2%),6 (1.5%),0 (0.0%),0 (0.0%),0 (0.0%)
Philippines,292 (75.8%),55 (14.3%),1 (0.3%),4 (1.0%),19 (4.9%),1 (0.3%),1 (0.3%),9 (2.3%),0 (0.0%),2 (0.5%),1 (0.3%),0 (0.0%)


Saved plot: data/annotation_analysis_v0/time_geography/geo_media_format_prompt.png
[Skipped] No data available to plot for answer_form @ response
[Skipped] No data available to plot for interaction_features @ response
[Skipped] No data available to plot for media_format @ response

=== Geographic Distribution Table for topic @ turn ===


Unnamed: 0_level_0,"Entertainment, Hobbies & Leisure",Employment & Hiring,News & Current Affairs,"Technology, Software & Computing",Food & Dining,History,Culture,Art & Design,Business & Finances,Interpersonal Relationships & Communication,...,Nature & Environment,Insurance & Social Scoring,Politics & Elections,"Law, Criminal Justice, Law Enforcement",Adult & Illicit Content,Social Issues & Movements,Transportation,Fantasy / Fiction / Fanfiction,Immigration / Migration,"Psychology, Philosophy & Human Behavior"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
United States,104 (4.4%),15 (0.6%),6 (0.3%),219 (9.2%),21 (0.9%),67 (2.8%),45 (1.9%),142 (6.0%),91 (3.8%),110 (4.6%),...,14 (0.6%),0 (0.0%),29 (1.2%),31 (1.3%),176 (7.4%),22 (0.9%),3 (0.1%),392 (16.4%),1 (0.0%),31 (1.3%)
Russia,78 (3.9%),16 (0.8%),8 (0.4%),479 (24.0%),13 (0.7%),41 (2.1%),31 (1.6%),44 (2.2%),88 (4.4%),43 (2.2%),...,9 (0.5%),0 (0.0%),43 (2.2%),44 (2.2%),126 (6.3%),18 (0.9%),19 (1.0%),137 (6.9%),1 (0.1%),71 (3.6%)
China,27 (2.2%),16 (1.3%),7 (0.6%),331 (27.0%),27 (2.2%),19 (1.6%),36 (2.9%),134 (10.9%),132 (10.8%),17 (1.4%),...,14 (1.1%),1 (0.1%),18 (1.5%),9 (0.7%),19 (1.6%),4 (0.3%),7 (0.6%),14 (1.1%),0 (0.0%),0 (0.0%)
Hong Kong,10 (1.9%),7 (1.3%),3 (0.6%),100 (19.0%),3 (0.6%),12 (2.3%),10 (1.9%),68 (12.9%),47 (8.9%),11 (2.1%),...,6 (1.1%),0 (0.0%),1 (0.2%),6 (1.1%),7 (1.3%),4 (0.8%),1 (0.2%),23 (4.4%),1 (0.2%),10 (1.9%)
United Kingdom,93 (18.2%),14 (2.7%),2 (0.4%),37 (7.3%),4 (0.8%),10 (2.0%),10 (2.0%),13 (2.5%),36 (7.1%),21 (4.1%),...,7 (1.4%),0 (0.0%),17 (3.3%),8 (1.6%),34 (6.7%),6 (1.2%),16 (3.1%),82 (16.1%),0 (0.0%),6 (1.2%)
Germany,10 (2.3%),2 (0.5%),1 (0.2%),77 (17.7%),4 (0.9%),24 (5.5%),5 (1.1%),12 (2.8%),9 (2.1%),7 (1.6%),...,6 (1.4%),0 (0.0%),8 (1.8%),10 (2.3%),38 (8.7%),3 (0.7%),3 (0.7%),21 (4.8%),1 (0.2%),8 (1.8%)
Philippines,40 (9.3%),7 (1.6%),25 (5.8%),15 (3.5%),3 (0.7%),24 (5.6%),5 (1.2%),2 (0.5%),12 (2.8%),8 (1.9%),...,9 (2.1%),0 (0.0%),17 (3.9%),5 (1.2%),3 (0.7%),8 (1.9%),1 (0.2%),56 (13.0%),8 (1.9%),17 (3.9%)
Canada,25 (6.4%),1 (0.3%),3 (0.8%),71 (18.1%),2 (0.5%),14 (3.6%),6 (1.5%),21 (5.4%),22 (5.6%),8 (2.0%),...,5 (1.3%),0 (0.0%),4 (1.0%),2 (0.5%),23 (5.9%),2 (0.5%),1 (0.3%),55 (14.0%),0 (0.0%),1 (0.3%)
France,42 (11.1%),3 (0.8%),1 (0.3%),80 (21.2%),2 (0.5%),2 (0.5%),9 (2.4%),3 (0.8%),20 (5.3%),15 (4.0%),...,6 (1.6%),0 (0.0%),1 (0.3%),2 (0.5%),65 (17.2%),3 (0.8%),0 (0.0%),5 (1.3%),0 (0.0%),6 (1.6%)
India,22 (6.6%),11 (3.3%),1 (0.3%),71 (21.3%),4 (1.2%),2 (0.6%),11 (3.3%),5 (1.5%),35 (10.5%),13 (3.9%),...,4 (1.2%),0 (0.0%),3 (0.9%),5 (1.5%),2 (0.6%),3 (0.9%),0 (0.0%),25 (7.5%),1 (0.3%),7 (2.1%)


Saved plot: data/annotation_analysis_v0/time_geography/geo_topic_turn.png

=== Geographic Distribution Table for sensitive_use_flags @ turn ===


Unnamed: 0_level_0,None,"Inciting violence, hateful or other harmful behavior (harassment & bullying)",Sexually explicit content (Other),"Inciting violence, hateful or other harmful behavior (physical harm)",Sexually explicit content (fictitious person),Sexually explicit content (real person),"Discriminatory practices (Misrepresentation, stereotyping, or inappropriate reference to sensitive attributes)","Possible presence of copyrighted, unreferenced material",Criminal planning or other suspected illegal activity not listed elsewhere,Privacy concerns (Possible sensitive information),...,Generating defamatory content,Privacy concerns (Possible identifiable information),Criminal planning or other suspected illegal activity not listed,"Cyberattacks (enabling/enacting malware, computer viruses, worms, malicious code, ...)","Inciting violence, hateful or other harmful behavior (self-harm)","Privacy concerns (Possible sensitive information) (e.g., API keys, passwords, other confidential information)",Cyberattacks,Potential violation of external policy / ethics,Impersonation attempts,Other
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
United States,1559 (85.9%),31 (1.7%),120 (6.6%),19 (1.0%),58 (3.2%),3 (0.2%),11 (0.6%),0 (0.0%),0 (0.0%),1 (0.1%),...,0 (0.0%),3 (0.2%),0 (0.0%),3 (0.2%),1 (0.1%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Russia,1392 (87.3%),28 (1.8%),88 (5.5%),24 (1.5%),16 (1.0%),3 (0.2%),11 (0.7%),1 (0.1%),12 (0.8%),3 (0.2%),...,1 (0.1%),2 (0.1%),1 (0.1%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
China,1020 (98.0%),2 (0.2%),16 (1.5%),1 (0.1%),1 (0.1%),1 (0.1%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),...,0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Hong Kong,404 (98.3%),0 (0.0%),5 (1.2%),1 (0.2%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),...,0 (0.0%),1 (0.2%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
United Kingdom,314 (85.8%),8 (2.2%),19 (5.2%),2 (0.5%),15 (4.1%),0 (0.0%),3 (0.8%),0 (0.0%),0 (0.0%),0 (0.0%),...,0 (0.0%),0 (0.0%),0 (0.0%),1 (0.3%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Germany,278 (80.3%),24 (6.9%),29 (8.4%),4 (1.2%),7 (2.0%),1 (0.3%),1 (0.3%),0 (0.0%),0 (0.0%),0 (0.0%),...,0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),2 (0.6%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
France,239 (75.4%),3 (0.9%),42 (13.2%),0 (0.0%),5 (1.6%),7 (2.2%),9 (2.8%),1 (0.3%),1 (0.3%),1 (0.3%),...,3 (0.9%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Canada,269 (87.6%),4 (1.3%),14 (4.6%),2 (0.7%),11 (3.6%),0 (0.0%),3 (1.0%),0 (0.0%),1 (0.3%),2 (0.7%),...,0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.3%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Philippines,288 (98.0%),1 (0.3%),2 (0.7%),0 (0.0%),1 (0.3%),0 (0.0%),1 (0.3%),1 (0.3%),0 (0.0%),0 (0.0%),...,0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%)
India,237 (97.9%),0 (0.0%),1 (0.4%),1 (0.4%),1 (0.4%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),...,0 (0.0%),1 (0.4%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),0 (0.0%),1 (0.4%),0 (0.0%)


Saved plot: data/annotation_analysis_v0/time_geography/geo_sensitive_use_flags_turn.png

=== Geographic Distribution Table for model @ conversation ===


Unnamed: 0_level_0,gpt-3.5-turbo-0301,gpt-4-0314,gpt-3.5-turbo-0613,gpt-4-1106-preview,gpt-3.5-turbo-0125,gpt-4-0125-preview
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
United States,105 (12.0%),26 (3.0%),449 (51.5%),138 (15.8%),83 (9.5%),71 (8.1%)
Russia,112 (18.4%),32 (5.2%),287 (47.0%),89 (14.6%),46 (7.5%),44 (7.2%)
China,148 (36.6%),16 (4.0%),186 (46.0%),8 (2.0%),40 (9.9%),6 (1.5%)
Hong Kong,24 (11.6%),4 (1.9%),135 (65.2%),12 (5.8%),29 (14.0%),3 (1.4%)
United Kingdom,18 (11.6%),2 (1.3%),69 (44.5%),24 (15.5%),13 (8.4%),29 (18.7%)
Germany,38 (27.5%),18 (13.0%),43 (31.2%),25 (18.1%),2 (1.4%),12 (8.7%)
France,17 (13.7%),10 (8.1%),51 (41.1%),33 (26.6%),1 (0.8%),12 (9.7%)
Canada,15 (16.5%),7 (7.7%),36 (39.6%),21 (23.1%),5 (5.5%),7 (7.7%)
Japan,10 (11.5%),2 (2.3%),45 (51.7%),8 (9.2%),16 (18.4%),6 (6.9%)
India,5 (7.1%),3 (4.3%),25 (35.7%),23 (32.9%),2 (2.9%),12 (17.1%)


Saved plot: data/annotation_analysis_v0/time_geography/geo_model_conversation.png
