# Analysis of Apollo Dialogue Conversations

This notebook loads conversation data from DynamoDB and analyzes interactions.

In [19]:
import boto3
import pandas as pd
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv("../../.env")

# Initialize DynamoDB client
aws_region = os.getenv('AWS_REGION', 'eu-north-1')
endpoint_url = os.getenv('AWS_ENDPOINT_URL')
DYNAMODB_TABLE = os.getenv('DYNAMODB_TABLE', 'apollolytics_dialogues')

if endpoint_url:
    dynamodb = boto3.resource('dynamodb', region_name=aws_region, endpoint_url=endpoint_url)
else:
    dynamodb = boto3.resource('dynamodb', region_name=aws_region)

# Get the table
table = dynamodb.Table(DYNAMODB_TABLE)

# Scan the table
response = table.scan()
items = response['Items']

# Handle pagination if there are more results
while 'LastEvaluatedKey' in response:
    response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
    items.extend(response['Items'])

# Convert to DataFrame
df = pd.DataFrame(items)


# Convert timestamp to datetime - handle string timestamps
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')

# Sort by session_id and timestamp
df = df.sort_values(['session_id', 'timestamp'])

# Display basic info
print(f"Total records: {len(df)}")
print(f"Unique sessions: {df['session_id'].nunique()}")
print("\nColumns in the dataset:")
print(df.columns.tolist())

# Display first few rows
df.head()

Total records: 224
Unique sessions: 44

Columns in the dataset:
['event_type', 'created_at', 'session_id', 'timestamp', 'dialogue_mode', 'origin_url', 'article', 'message_content', 'role', 'message_id', 'transcript', 'reason', 'propaganda_result', 'content', 'timing_info', 'datetime']


Unnamed: 0,event_type,created_at,session_id,timestamp,dialogue_mode,origin_url,article,message_content,role,message_id,transcript,reason,propaganda_result,content,timing_info,datetime
108,session_init,2025-05-19T14:41:34.903880,07c463bd-ac97-49ef-8e6f-1db044d57b80,1747666000.0,critical,http://localhost:3000/dialogue/positive,asd,,,,,,,,,2025-05-19 14:41:34
109,propaganda_analysis,2025-05-19T14:41:36.044704,07c463bd-ac97-49ef-8e6f-1db044d57b80,1747666000.0,,,,,,,,,"{'type': 'contextualization', 'data': {}, 'use...",,,2025-05-19 14:41:36
110,,2025-05-19T14:41:42.103222,07c463bd-ac97-49ef-8e6f-1db044d57b80,1747666000.0,,,,,assistant,assistant_787003bf-5480-48b8-9f6e-77c7ae6d74e6,,,,What are your thoughts on the impact of misinf...,"{'model_generation_time': 6.018832445144653, '...",2025-05-19 14:41:42
111,,2025-05-19T14:42:00.661684,07c463bd-ac97-49ef-8e6f-1db044d57b80,1747666000.0,,,,,user,user_9043ec08-a8e3-4f9c-af1f-6ed27b82c121,,,,"Hello, my name is Killian.","{'thinking_time': 1.263, 'recording_duration':...",2025-05-19 14:42:00
112,,2025-05-19T14:42:09.601034,07c463bd-ac97-49ef-8e6f-1db044d57b80,1747666000.0,,,,,assistant,assistant_3a5f2870-df3a-4081-9a59-fe7404894c6b,,,,"Hello, Kaleo. It's nice to meet you. I'd like ...","{'model_generation_time': 8.87710452079773, 'm...",2025-05-19 14:42:09


In [22]:
df['session_id'].unique()

array(['ef721f48-d2e7-4dea-9b17-3c34815a4984',
       '3e8816dc-16b3-4ffe-9234-e04382d33303',
       '07c463bd-ac97-49ef-8e6f-1db044d57b80',
       '692250f4-4463-435b-b8b1-5bdbf9b7d1e9',
       '39dec183-2bb5-47a2-8135-a76d7b494446',
       'f8561b14-8316-4ac6-9fa8-37c53806ea20',
       '5109f8d4-9899-47f2-a5e4-9606b24c4797',
       '29267742-ae68-4252-8611-953227960050',
       'b9600c11-ffd9-4afd-9601-e1b19be4a598',
       '615e7300-f723-4133-b60f-3674daa21935',
       'b5d9bba1-b954-4aaf-83a3-9e83ab498aa4',
       '40c7c5c3-3259-4698-a405-e7f076078bf8',
       '655d31b7-2e3d-4644-b254-9a785199a10e',
       'aeab2389-b929-4add-b9c1-11da9d5a648a',
       '4f664c15-86b1-46a7-b9ee-66dd0f6c3474',
       '833a48e8-bb07-4dda-b9dd-523a02f112b6',
       '50e806f4-ce67-43fe-9b4c-4f37b31492d7',
       '526a54ca-f2f6-4380-a7f0-b83672a0f75a',
       'e0302800-6c3e-471a-9242-02366b0f5ed0',
       '7eabc7e8-5240-4585-b620-23ec65a20722',
       '21aed941-b5ba-4a94-943e-557478325fb4',
       '5e85f

In [20]:
# Sort by created_at (newest first)
df = df.sort_values('created_at', ascending=False)
df.head()

Unnamed: 0,event_type,created_at,session_id,timestamp,dialogue_mode,origin_url,article,message_content,role,message_id,transcript,reason,propaganda_result,content,timing_info,datetime
100,,2025-05-19T14:54:59.887659,ef721f48-d2e7-4dea-9b17-3c34815a4984,1747666000.0,,,,,assistant,assistant_c672a7e8-5730-4a52-a7ee-724214f38b5f,,,,"Hello, Kilian. It's a pleasure to meet you. I'...","{'model_generation_time': 10.26395058631897, '...",2025-05-19 14:54:59
99,,2025-05-19T14:54:49.530380,ef721f48-d2e7-4dea-9b17-3c34815a4984,1747666000.0,,,,,user,user_f7f9c79a-01ca-4d53-859e-a6759cc33f03,,,,"Hello, my name is Killian","{'thinking_time': 1.195, 'recording_duration':...",2025-05-19 14:54:49
98,,2025-05-19T14:54:30.426835,ef721f48-d2e7-4dea-9b17-3c34815a4984,1747666000.0,,,,,assistant,assistant_e57a2afa-06c7-4313-a622-fc06f262fdf5,,,,What are your thoughts on articles that influe...,"{'model_generation_time': 5.684672117233276, '...",2025-05-19 14:54:30
97,propaganda_analysis,2025-05-19T14:54:24.696174,ef721f48-d2e7-4dea-9b17-3c34815a4984,1747666000.0,,,,,,,,,"{'type': 'contextualization', 'data': {}, 'use...",,,2025-05-19 14:54:24
96,session_init,2025-05-19T14:54:23.519776,ef721f48-d2e7-4dea-9b17-3c34815a4984,1747666000.0,critical,http://localhost:3000/dialogue/positive,asd,,,,,,,,,2025-05-19 14:54:23


In [8]:
df.head(10)

Unnamed: 0,event_type,created_at,session_id,timestamp,dialogue_mode,origin_url,article,message_content,role,message_id,transcript,reason,propaganda_result,content,timing_info,datetime
51,,2025-05-19T14:31:09.438604,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,assistant,assistant_e3eba667-8a09-4380-b953-6661e369e507,,,,"Hello, Kelyo. Let's engage in a meaningful con...",{'model_generation_time': 13.129829168319702},2025-05-19 14:31:09
50,,2025-05-19T14:30:56.249908,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,user,user_de2ea71b-19b5-416c-9ce7-e2d462579369,,,,I don't care what you're saying.,"{'thinking_time': -3.453, 'recording_duration'...",2025-05-19 14:30:56
49,,2025-05-19T14:30:36.745124,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,assistant,assistant_78e356d8-7290-4c06-aeff-9d8b472804bd,,,,"Hello Callum, I'm sorry to hear that you're no...",{'model_generation_time': 12.262012958526611},2025-05-19 14:30:36
48,,2025-05-19T14:30:24.428545,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,user,user_f9a613ee-18d0-4268-82f8-ede436728b81,,,,I don't feel a lot.,"{'thinking_time': 0.794, 'recording_duration':...",2025-05-19 14:30:24
47,,2025-05-19T14:30:08.632485,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,assistant,assistant_a32acdb0-1cbd-4fd3-a694-3e8202e68a15,,,,"Hello Galileo, it's a pleasure to engage in a ...",{'model_generation_time': 8.669907569885254},2025-05-19 14:30:08
46,,2025-05-19T14:29:59.910014,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,user,user_0d316822-78ba-47d8-a5d1-a482e18b859e,,,,"Oh, not really.","{'thinking_time': 0.866, 'recording_duration':...",2025-05-19 14:29:59
45,,2025-05-19T14:29:45.876117,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,assistant,assistant_7922aac0-6ea8-467b-9457-27d59b1c940d,,,,"Hello, Kelio. It's nice to meet you. I'd love ...",{'model_generation_time': 6.799267292022705},2025-05-19 14:29:45
44,,2025-05-19T14:29:39.019060,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,user,user_36f8942c-4315-4bff-90df-501ab87cb782,,,,"Hello, my name is Killian","{'thinking_time': 1.654, 'recording_duration':...",2025-05-19 14:29:39
43,,2025-05-19T14:29:24.064903,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,assistant,assistant_417d226e-0ab2-4f65-b9d0-324781173d63,,,,Let's consider the article presented to you. W...,{'model_generation_time': 6.444554567337036},2025-05-19 14:29:24
42,propaganda_analysis,2025-05-19T14:29:17.573740,692250f4-4463-435b-b8b1-5bdbf9b7d1e9,1747665000.0,,,,,,,,,"{'type': 'contextualization', 'data': {}, 'use...",,,2025-05-19 14:29:17


In [18]:
df.timing_info[109]

{'model_generation_time': Decimal('11.455530881881714'),
 'model_audio_duration': Decimal('89478.48529166667'),
 'total_response_time': Decimal('89489.94082254855')}