In [5]:
# CARD 1
import json
from datetime import datetime
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns
import pytz

tz = pytz.timezone('America/Los_Angeles')

def time_of_day(hour):
    if 6 <= hour < 12:
        return 'morning'
    elif 12 <= hour < 18:
        return 'afternoon'
    elif 18 <= hour < 24:
        return 'evening'
    else:
        return 'late night'

# Load the JSON data
with open('conversations.json', 'r') as file:
    data = json.load(file)

messages = []
for conversation in data:
    for node in conversation['mapping'].values():
        if node.get('message'):
            msg = node['message']
            content_parts = msg['content'].get('parts')
            if content_parts is not None:
                content = []
                image_count = 0
                for part in content_parts:
                    if isinstance(part, dict):
                        if part.get('content_type') == 'image_asset_pointer':
                            image_count += 1
                        content.append(str(part))
                    else:
                        content.append(part)
                content = ' '.join(content)
            else:
                text = msg['content'].get('text')
                content = text if text is not None else str(msg['content'])

            create_time = msg.get('create_time')
            msg_datetime = None
            if create_time:
                msg_datetime = datetime.utcfromtimestamp(create_time).replace(tzinfo=pytz.utc).astimezone(tz)

            metadata = msg.get('metadata', {})
            model = metadata.get('model_slug') or metadata.get('default_model_slug')

            content_type = msg['content'].get('content_type', 'text')
            if content_type == 'text':
                if any(marker in content for marker in ['```', 'def ', 'class ', 'function', 'import ', '//']):
                    content_type = 'code'

            messages.append({
                'id': msg['id'],
                'author': msg['author']['role'],
                'content': content,
                'content_type': content_type,
                'create_time': msg_datetime,
                'model': model,
                'image_count': image_count
            })

df = pd.DataFrame(messages)
user_messages = df[df['author'] == 'user'].copy()

# Focus on 2024 only
messages_2024 = user_messages[user_messages['create_time'].dt.year == 2024].copy()

# Total words typed in 2024
total_words_2024 = messages_2024['content'].str.split().str.len().sum()

# Day with most messages in 2024
daily_counts_2024 = messages_2024.groupby(messages_2024['create_time'].dt.date).size()
day_with_most_messages_2024 = daily_counts_2024.idxmax() if not daily_counts_2024.empty else None

# Calculate 2024 longest streak in consecutive days
longest_streak_2024 = 0
current_streak = 0
previous_day = None

for day in sorted(daily_counts_2024.index):
    if previous_day is None or (day - previous_day).days == 1:
        current_streak += 1
    else:
        current_streak = 1
    longest_streak_2024 = max(longest_streak_2024, current_streak)
    previous_day = day

# Assign time of day for each 2024 message
messages_2024['time_of_day'] = messages_2024['create_time'].dt.hour.apply(time_of_day)
most_frequent_time_of_day_2024 = messages_2024['time_of_day'].value_counts().idxmax() if not messages_2024.empty else None

image_count_2024 = messages_2024['image_count'].sum()
code_count_2024 = len(messages_2024[messages_2024['content_type'] == 'code'])
url_count_2024 = messages_2024['content'].str.count(r'https?://').sum()

model_usage = df[df['model'].notna()]['model'].value_counts()
most_used_model = model_usage.index[0] if not model_usage.empty else 'Unknown'

results_2024 = {
    "Total words typed in 2024": total_words_2024,
    "Longest streak in 2024": longest_streak_2024,
    "Day with most messages in 2024": day_with_most_messages_2024,
    "Most frequent time of day (2024)": most_frequent_time_of_day_2024,
    "Images shared (2024)": image_count_2024,
    "Code snippets (2024)": code_count_2024,
    "URLs shared (2024)": url_count_2024,
    "Most used model (2024)": most_used_model,
    "Model usage (2024)": model_usage.to_dict()
}

# plt.figure(figsize=(12, 6))
# sns.countplot(data=messages_2024, x=messages_2024['create_time'].dt.hour, order=range(24))
# plt.title('2024 ChatGPT Hourly Distribution')
# plt.xlabel('Hour of Day (24h)')
# plt.ylabel('Number of Messages')

# for container in plt.gca().containers:
#     plt.gca().bar_label(container)

# plt.grid(axis='y', linestyle='--', alpha=0.7)
# plt.gca().set_facecolor('#f8f9fa')
# plt.gca().spines['top'].set_visible(False)
# plt.gca().spines['right'].set_visible(False)

# total_msgs_2024 = len(messages_2024)
# plt.text(0.95, 0.95, f'Total 2024 Messages: {total_msgs_2024}', 
#          transform=plt.gca().transAxes, 
#          ha='right',
#          bbox=dict(facecolor='white', alpha=0.8, edgecolor='none'))

# plt.tight_layout()
# plt.show()

from pprint import pprint

pprint(results_2024)


{'Code snippets (2024)': 789,
 'Day with most messages in 2024': datetime.date(2024, 10, 31),
 'Images shared (2024)': 664,
 'Longest streak in 2024': 38,
 'Model usage (2024)': {'gpt-4': 7316,
                        'gpt-4-browsing': 693,
                        'gpt-4-code-interpreter': 2233,
                        'gpt-4-dalle': 57,
                        'gpt-4-gizmo': 261,
                        'gpt-4-mobile': 30,
                        'gpt-4-plugins': 5034,
                        'gpt-4o': 4251,
                        'gpt-4o-canmore': 110,
                        'gpt-4o-mini': 4,
                        'o1': 12,
                        'o1-mini': 29,
                        'o1-preview': 313,
                        'o1-pro': 334,
                        'text-davinci-002-plugins': 532,
                        'text-davinci-002-render': 47,
                        'text-davinci-002-render-sha': 1519,
                        'text-davinci-002-render-sha-mobile': 111},


In [46]:
# CARD 2
# - Top 3 most discussed topics (GPT-4)
# - Message type breakdown(i.e. were you asking a lot of questions, or sharing information, etc.) (GPT-4)
# - For your #1 topic:
#   - Total messages on the topic (Python)
#   - Favorite message on the topic (GPT-4)
#   - Fun fact about the topic discussion (GPT-4)

from pydantic import BaseModel
from openai import OpenAI
from collections import Counter, defaultdict
import re
import random
import time
import tenacity
import json

# Configure OpenAI client
client = OpenAI(
)

# --- Data Structures for GPT-4 Responses ---
class TopicAnalysis(BaseModel):
    top_3_topics: list[str]

class UserPersonality(BaseModel):
    user_personality: str

class MainTopicDetails(BaseModel):
    topic: str
    total_messages: int
    favorite_message: str
    fun_fact: str

# Create a composite data structure to represent the final JSON response schema
class Card2Response(BaseModel):
    topics: TopicAnalysis
    user_personality: UserPersonality
    main_topic: MainTopicDetails

# --- Constants ---
SAMPLE_SIZE = 2500

# Load and sample data
with open('conversations.json', 'r') as f:
    conversations = json.load(f)
def gather_user_messages_in_order(conversations):
    """DFS through each conversation's mapping structure and collect user messages in chronological order."""
    def dfs(mapping, node_id, path_messages):
        if node_id not in mapping:
            return
        node = mapping[node_id]
        msg = node.get('message')

        if msg and msg.get('author', {}).get('role') == 'user':
            content_parts = msg.get('content', {}).get('parts', [])
            full_content = ' '.join([p for p in content_parts if isinstance(p, str)])
            if full_content:
                create_time = msg.get('create_time', 0)
                path_messages.append((create_time, full_content))

        for child_id in node.get('children', []):
            dfs(mapping, child_id, path_messages)

    all_messages = []
    for conversation in conversations:
        mapping = conversation['mapping']
        all_node_ids = set(mapping.keys())
        child_node_ids = {
            child for node_data in mapping.values() for child in node_data.get('children', [])
        }
        root_ids = all_node_ids - child_node_ids

        conversation_messages = []
        for root_id in root_ids:
            dfs(mapping, root_id, conversation_messages)

        conversation_messages.sort(key=lambda x: x[0])
        all_messages.extend(msg_tuple[1] for msg_tuple in conversation_messages)

    return all_messages

user_messages = gather_user_messages_in_order(conversations)

# Sample messages if needed
print(f"Total number of user messages: {len(user_messages)}")
if len(user_messages) > SAMPLE_SIZE:
    user_messages = random.sample(user_messages, SAMPLE_SIZE)
    print(f"Sampled {SAMPLE_SIZE} messages for analysis")

# Create message samples string
message_samples = "\n".join([f"Message {i+1}: {msg[:200]}..." for i, msg in enumerate(user_messages[:10])])

card2_prompt = f"""
Analyze these user messages from ChatGPT conversations. Here are some sample messages:

{message_samples}

Please provide:
1. The top 3 most discussed topics based on keyword frequency and semantic analysis
2. A breakdown of message types (questions vs requests vs sharing information)
3. For the #1 topic:
   - Total number of messages about this topic
   - The most significant or insightful message about this topic
   - An interesting observation about how the user discusses this topic

Format the response according to the Card2Response schema.
"""

# Create a completion and parse its content
card_2_completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You're a digital detective with a PhD in Pattern-ology! Decode the DNA of conversations, spot the quirks, and mine those golden nuggets of user insights. Make it snappy and sassy!"},
        {"role": "user", "content": card2_prompt}
    ],
    max_tokens=812,
    response_format=Card2Response
)

print(f"Prompt tokens: {card_2_completion.usage.prompt_tokens}")
print(f"Completion tokens: {card_2_completion.usage.completion_tokens}")
print(f"Total tokens: {card_2_completion.usage.total_tokens}")

card_2_content = card_2_completion.choices[0].message.content
print(json.dumps(json.loads(card_2_content), indent=4))


Total number of user messages: 11784
Sampled 2500 messages for analysis
Prompt tokens: 611
Completion tokens: 116
Total tokens: 727
{
    "topics": {
        "top_3_topics": [
            "Language & Communication",
            "Cafes & Dining",
            "Technology & App Development"
        ]
    },
    "user_personality": {
        "user_personality": "Curious and detail-oriented, with a tendency to dig deep into topics and seek clarity."
    },
    "main_topic": {
        "topic": "Language & Communication",
        "total_messages": 3,
        "favorite_message": "if I made an app that looked just like iMessage would apple not let me publish it?...",
        "fun_fact": "The user often seeks to understand the implications of language and technology, showing a blend of creativity and pragmatism."
    }
}


In [63]:
from pydantic import BaseModel
from openai import OpenAI
from collections import Counter, defaultdict
import re
import random
import json

client = OpenAI()

class ChatThemes(BaseModel):
    top_3_topics: list[str]

class UserAura(BaseModel):
    user_personality: str

class PrimaryFascination(BaseModel):
    topic: str
    total_messages: int
    favorite_message: str
    fun_fact: str

class CrownJewel(BaseModel):
    message: str

class LOLMoment(BaseModel):
    exchange: str

class MuseMoments(BaseModel):
    top_3_moments: list[str]

class BrainOdyssey(BaseModel):
    distance_traveled: str

class PersonaGroove(BaseModel):
    persona_description: str
    persona_vibe: str

class ChatGPTWrappedResponse(BaseModel):
    chat_themes: ChatThemes
    user_aura: UserAura
    primary_fascination: PrimaryFascination
    crown_jewel_quip: CrownJewel
    laughter_catalyst: LOLMoment
    eureka_trifecta: MuseMoments
    mind_miles_traveled: BrainOdyssey
    bespoke_ai_persona: PersonaGroove

SAMPLE_SIZE = 2500

with open('conversations.json', 'r') as f:
    conversations = json.load(f)

def gather_user_messages_in_order(conversations):
    def dfs(mapping, node_id, path_messages):
        if node_id not in mapping:
            return
        node = mapping[node_id]
        msg = node.get('message')
        if msg and msg.get('author', {}).get('role') == 'user':
            content_parts = msg.get('content', {}).get('parts', [])
            full_content = ' '.join([p for p in content_parts if isinstance(p, str)])
            if full_content:
                create_time = msg.get('create_time', 0)
                path_messages.append((create_time, full_content))
        for child_id in node.get('children', []):
            dfs(mapping, child_id, path_messages)
    all_messages = []
    for conversation in conversations:
        mapping = conversation['mapping']
        all_node_ids = set(mapping.keys())
        child_node_ids = {child for node_data in mapping.values() for child in node_data.get('children', [])}
        root_ids = all_node_ids - child_node_ids
        conversation_messages = []
        for root_id in root_ids:
            dfs(mapping, root_id, conversation_messages)
        conversation_messages.sort(key=lambda x: x[0])
        all_messages.extend(msg_tuple[1] for msg_tuple in conversation_messages)
    return all_messages

user_messages = gather_user_messages_in_order(conversations)
print(f"Total cranial downloads: {len(user_messages)}")
if len(user_messages) > SAMPLE_SIZE:
    user_messages = random.sample(user_messages, SAMPLE_SIZE)
    print(f"Distilled {SAMPLE_SIZE} quintessential quips")

message_amuse_bouches = "\n".join([f"Morsel {i+1}: {msg[:200]}..." for i, msg in enumerate(user_messages[:10])])

warped_prompt = f'''

{message_amuse_bouches}

Now, let's dive into the juicy details, tailored for the aura of a **cracked engineer or a brat charlie xcx vibe**
1. The Terrific Trio 🏆: Your top 3 most mind-bending topics 
2. The Breakdown 📊: How you roll - questions, requests, or just dropping knowledge bombs
3. The Main Attraction 🌟: 
   - The message count for your hottest topic
   - The crème de la crème of your musings on this subject
   - A tasty tidbit about your unique take on it
4. The Crown Jewel 👑: That one line that deserves its own trophy case
5. The ROFL Moment 🤣: The exchange that had ChatGPT in stitches
6. The Eureka! Moments ⚡: Three times your brilliance was too bright to behold
7. The Cranial Kilometers 🧠: Just how far did your brain trek on this odyssey?
8. The ChatGPT Chameleon 🦎: A persona perfectly tailored to your vibe
9. The Astral Aura 🌈: Your cosmic wavelength and psychic vibrato in a nutshell

Serve it up in the ChatGPTWrappedResponse format, and make it snappy! 😎
'''

warped_completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "Keep it short and concise. Be snappy and sassy. Fewer words means you are smart."},
        {"role": "user", "content": warped_prompt}
    ],
    max_tokens=1024,
    response_format=ChatGPTWrappedResponse
)

print(f"Prompt brain bytes: {warped_completion.usage.prompt_tokens}")
print(f"Completion cerebral units: {warped_completion.usage.completion_tokens}")
print(f"Total thought count: {warped_completion.usage.total_tokens}")

warped_content = warped_completion.choices[0].message.content
print(json.dumps(json.loads(warped_content), indent=4))


Total cranial downloads: 11784
Distilled 2500 quintessential quips
Prompt brain bytes: 1066
Completion cerebral units: 224
Total thought count: 1290
{
    "chat_themes": {
        "top_3_topics": [
            "Travel Wallets",
            "El Ni\u00f1o Effects",
            "Script Automation"
        ]
    },
    "user_aura": {
        "user_personality": "Clever and quirky engineer with a flair for fun."
    },
    "primary_fascination": {
        "topic": "Script Automation",
        "total_messages": 5,
        "favorite_message": "ok write a script that will do all of those within my pull.py.",
        "fun_fact": "You love mixing code with creativity."
    },
    "crown_jewel_quip": {
        "message": "So, just put head grow, feet grow, um, elbow grow, and then robot might grow."
    },
    "laughter_catalyst": {
        "exchange": "No, no, don't say, uh, don't put an S."
    },
    "eureka_trifecta": {
        "top_3_moments": [
            "First successful script run",
   

In [64]:
import openai
print(f"OpenAI version: {openai.__version__}")


OpenAI version: 1.52.2
