# Data Exploration Notebook

This notebook explores the raw data from our LLM-based social network simulation.

In [None]:
import json
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
plt.style.use('seaborn')

## Load Data

In [None]:
with open('../data/raw/network_structure.json', 'r') as f:
    network_data = json.load(f)

with open('../data/raw/agent_data.json', 'r') as f:
    agent_data = json.load(f)

print(f"Number of agents: {len(agent_data)}")
print(f"Number of connections: {len(network_data['edges'])}")

## Explore Network Structure

In [None]:
G = nx.Graph()
G.add_nodes_from(network_data['nodes'])
G.add_edges_from(network_data['edges'])

plt.figure(figsize=(12, 8))
nx.draw(G, node_size=20, with_labels=False)
plt.title("Social Network Structure")
plt.show()

print(f"Network density: {nx.density(G):.4f}")
print(f"Average clustering coefficient: {nx.average_clustering(G):.4f}")

## Analyze Agent Types

In [None]:
agent_types = [data['type'] for data in agent_data.values()]
type_counts = pd.Series(agent_types).value_counts()

plt.figure(figsize=(10, 6))
type_counts.plot(kind='bar')
plt.title("Distribution of Agent Types")
plt.xlabel("Agent Type")
plt.ylabel("Count")
plt.show()

## Explore Messages

In [None]:
all_messages = [msg for data in agent_data.values() for msg in data['messages']]
message_lengths = [len(msg) for msg in all_messages]

plt.figure(figsize=(10, 6))
sns.histplot(message_lengths, kde=True)
plt.title("Distribution of Message Lengths")
plt.xlabel("Message Length (characters)")
plt.ylabel("Frequency")
plt.show()

print(f"Total messages: {len(all_messages)}")
print(f"Average message length: {sum(message_lengths) / len(message_lengths):.2f} characters")

## Analyze Agent Personalities

In [None]:
personalities = pd.DataFrame([data['personality'] for data in agent_data.values()])

plt.figure(figsize=(12, 8))
sns.boxplot(data=personalities)
plt.title("Distribution of Personality Traits")
plt.ylabel("Trait Value")
plt.show()

print(personalities.describe())