# Attention Market Simulator â€” Notebook 01: Data Exploration

This notebook explores the synthetic time series generated by `AttentionEnv`, 
which models human attention as a market microstructure system.

We will:

- Load the simulated data
- Inspect attention, boredom, fatigue, and volatility over time
- Compute attention "returns" and rolling volatility
- Examine regime frequencies and summary statistics
- Analyze attention imbalance vs future movement
- Look for volatility clustering and basic autocorrelation

This notebook is meant to look and feel like an exploratory quant research report.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path

# Optional: nicer plots
plt.style.use("default")

data_path = Path("..") / "data" / "attention_simulation.csv"
df = pd.read_csv(data_path)

df.head()


In [None]:
print("Shape:", df.shape)
print("\nColumns:\n", df.columns.tolist())

print("\nRegime counts:")
print(df["regime"].value_counts())

df.describe().T


In [None]:
fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)

axes[0].plot(df["t"], df["attention_level"])
axes[0].set_ylabel("Attention")

axes[1].plot(df["t"], df["volatility"])
axes[1].set_ylabel("Volatility")

axes[2].plot(df["t"], df["attention_imbalance"])
axes[2].set_ylabel("Imbalance")

axes[3].plot(df["t"], df["boredom"], label="Boredom")
axes[3].plot(df["t"], df["fatigue"], label="Fatigue")
axes[3].set_ylabel("Boredom / Fatigue")
axes[3].set_xlabel("Time step")
axes[3].legend()

fig.suptitle("Core State Variables Over Time", y=1.02)
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(12, 4))

regime_map = {
    "engaged": 0,
    "fatigued": 1,
    "overstimulated": 2,
    "addictive_loop": 3,
    "disengaged": 4,
    "baseline": 5,
}

scatter = ax.scatter(
    df["t"],
    df["attention_level"],
    c=df["regime"].map(regime_map),
    s=10,
)

ax.set_title("Attention Level with Regimes")
ax.set_xlabel("Time step")
ax.set_ylabel("Attention level")

plt.tight_layout()
plt.show()


In [None]:
df = df.copy()

# One-step return in attention
df["attn_return_1"] = df["attention_level"].diff()

# Absolute return
df["attn_abs_return_1"] = df["attn_return_1"].abs()

# Forward 5-step change
df["attn_fwd_change_5"] = df["attention_level"].shift(-5) - df["attention_level"]

# Direction label for classification later
df["attn_fwd_up_5"] = (df["attn_fwd_change_5"] > 0).astype(int)

# Rolling volatility (like realized vol)
window = 20
df["rolling_vol_20"] = df["attn_return_1"].rolling(window).std()

# Rolling mean
df["rolling_mean_20"] = df["attention_level"].rolling(window).mean()

df[["attention_level", "attn_return_1", "attn_fwd_change_5", "rolling_vol_20"]].head(10)


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 4))

# Histogram of returns
ax[0].hist(df["attn_return_1"].dropna(), bins=50)
ax[0].set_title("Distribution of 1-step Attention Returns")
ax[0].set_xlabel("Return")
ax[0].set_ylabel("Frequency")

# Absolute returns (volatility proxy)
ax[1].hist(df["attn_abs_return_1"].dropna(), bins=50)
ax[1].set_title("Distribution of |Return|")
ax[1].set_xlabel("|Return|")

plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(12, 4))
ax.plot(df["t"], df["attn_abs_return_1"])
ax.set_title("Absolute Attention Returns Over Time")
ax.set_xlabel("Time step")
ax.set_ylabel("|Return|")

plt.tight_layout()
plt.show()


In [None]:
group_cols = [
    "attention_level",
    "attn_return_1",
    "attn_abs_return_1",
    "rolling_vol_20",
    "volatility",
    "attention_imbalance",
]

regime_summary = df.groupby("regime")[group_cols].agg(["mean", "std", "min", "max", "count"])
regime_summary


In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
sns.boxplot(data=df, x="regime", y="attn_return_1", ax=ax)
ax.set_title("Distribution of 1-step Attention Returns by Regime")
ax.set_xlabel("Regime")
ax.set_ylabel("Return")

plt.tight_layout()
plt.show()


In [None]:
# Drop NaNs from forward change
analysis_df = df.dropna(subset=["attn_fwd_change_5", "attention_imbalance"]).copy()

fig, ax = plt.subplots(figsize=(8, 5))
sns.scatterplot(
    data=analysis_df.sample(min(1000, len(analysis_df)), random_state=42),
    x="attention_imbalance",
    y="attn_fwd_change_5",
    ax=ax,
)
ax.set_title("Attention Imbalance vs 5-step Forward Attention Change")
ax.set_xlabel("Current Imbalance")
ax.set_ylabel("Future 5-step Change")

plt.tight_layout()
plt.show()

analysis_df[["attention_imbalance", "attn_fwd_change_5"]].corr()


In [None]:
feature_cols = [
    "attention_level",
    "attn_return_1",
    "attn_abs_return_1",
    "rolling_vol_20",
    "volatility",
    "attention_imbalance",
    "attention_liquidity",
    "attention_demand",
    "boredom",
    "fatigue",
]

corr = df[feature_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=False, cmap="coolwarm", center=0)
plt.title("Correlation Matrix of Key Features")
plt.tight_layout()
plt.show()


In [None]:
from statsmodels.graphics.tsaplots import plot_acf

fig, ax = plt.subplots(figsize=(8, 4))
plot_acf(df["attn_return_1"].dropna(), lags=40, ax=ax)
ax.set_title("ACF of 1-step Attention Returns")
plt.tight_layout()
plt.show()
