In [2]:
# %% [markdown]
# # 🔧 Consolidated Patch Cell — Combined Token Degradation Analysis
# This cell fixes schema mismatches, enforces consistent indexing, strengthens modeling,
# and removes random data fallbacks. Paste AFTER your data loading/extraction cells.

import numpy as np
import pandas as pd
import re
from datetime import datetime
from typing import Dict, List, Tuple
from scipy import stats
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

# -----------------------------
# 0) Sanity checks and warnings
# -----------------------------
# ChatGPT URLs: expect 18 (update if your list is intentionally 17)
EXPECTED_URLS = 18
try:
    n_urls = len(chatgpt_workup_urls)
    assert n_urls == EXPECTED_URLS, f"Expected {EXPECTED_URLS} ChatGPT workups, found {n_urls}"
except AssertionError as e:
    print(f"⚠️ URL count check: {e}")

# ---------------------------------------------------
# 1) Normalize Claude schema: observed_tdt -> token_usage
# ---------------------------------------------------
print(f"✅ Claude session data loaded (n={len(claude_sessions)})")

claude_df = pd.DataFrame(claude_sessions).copy()
# Align to shared schema fields used later
claude_df['token_usage'] = pd.to_numeric(claude_df['observed_tdt'], errors='coerce')
claude_df['threshold']   = claude_df['token_usage']  # mirror for downstream code

# Ensure minimal required columns exist
for col in ['session', 'date', 'ai_system', 'token_usage']:
    if col not in claude_df.columns:
        claude_df[col] = np.nan

# -------------------------------------------
# 2) Ensure ChatGPT extracted frame is present
#    (No random data fallback: keep only real)
# -------------------------------------------
if 'chatgpt_sessions' not in globals():
    print("⚠️ chatgpt_sessions not found; creating empty DataFrame")
    chatgpt_df = pd.DataFrame(columns=['session','date','ai_system','token_usage','threshold','evidence'])
else:
    chatgpt_df = pd.DataFrame(chatgpt_sessions).copy()

# Coerce numeric and clean
for df_ in [claude_df, chatgpt_df]:
    for col in ['session', 'token_usage']:
        if col in df_.columns:
            df_[col] = pd.to_numeric(df_[col], errors='coerce')

# ------------------------------------
# 3) Build combined_df with global index
# ------------------------------------
def _ts_key(df):
    # robust datetime parse
    return pd.to_datetime(df['date'], errors='coerce')

claude_df['_order']  = _ts_key(claude_df)
chatgpt_df['_order'] = _ts_key(chatgpt_df)

combined_df = pd.concat([claude_df, chatgpt_df], ignore_index=True)

# Drop rows without token_usage
combined_df = combined_df.dropna(subset=['token_usage']).copy()

# Sort chronologically, then by system and session to be stable
combined_df = combined_df.sort_values(['_order', 'ai_system', 'session'], kind='mergesort').reset_index(drop=True)

# Create a unified global session index for modeling
combined_df['global_session'] = np.arange(1, len(combined_df) + 1)

# Dynamic counts
n_total   = len(combined_df)
n_claude  = int((combined_df['ai_system'] == 'Claude').sum())
n_chatgpt = int((combined_df['ai_system'] == 'ChatGPT').sum())

print("\n🔗 COMBINED DATASET")
print("=" * 40)
print(f"Total sessions: {n_total}")
print(f"Claude sessions: {n_claude}")
print(f"ChatGPT sessions: {n_chatgpt}")
print(f"Sample size (n): {n_total}")

# ----------------------------------------
# 4) Summary stats (clean NaNs beforehand)
# ----------------------------------------
all_token_usage = combined_df['token_usage'].dropna().to_numpy()
claude_tokens   = combined_df.loc[combined_df['ai_system']=='Claude','token_usage'].dropna().to_numpy()
chatgpt_tokens  = combined_df.loc[combined_df['ai_system']=='ChatGPT','token_usage'].dropna().to_numpy()

print(f"\n📊 SUMMARY BY AI SYSTEM:")
summary_stats = (combined_df
                 .groupby('ai_system')['token_usage']
                 .agg(['count','mean','std','min','max'])
                 .round(2))
display(summary_stats)

print(f"\n📋 COMBINED DATASET (head):")
display(combined_df.head())

# -----------------------------------------
# 5) Statistical tests (Chi-square, t-test)
# -----------------------------------------
print("\n📊 STATISTICAL ANALYSIS - COMBINED DATASET")
print("=" * 60)
n = len(all_token_usage)
if n >= 2:
    print(f"🎯 OVERALL STATISTICS (n={n}):")
    print(f"  Mean token usage: {np.mean(all_token_usage):.2f}%")
    print(f"  Std deviation:    {np.std(all_token_usage, ddof=1):.2f}")
    print(f"  Variance:         {np.var(all_token_usage, ddof=1):.2f}")
    print(f"  Range:            {np.min(all_token_usage)}% - {np.max(all_token_usage)}%")

    # Chi-square variance test: H0: σ² = 10
    null_variance = 10
    sample_variance = float(np.var(all_token_usage, ddof=1))
    chi2_stat = (n - 1) * sample_variance / null_variance
    p_value = 1 - stats.chi2.cdf(chi2_stat, n - 1)

    print(f"\n🧪 CHI-SQUARE VARIANCE TEST:")
    print(f"  H₀: σ² = {null_variance}")
    print(f"  Sample variance:      {sample_variance:.3f}")
    print(f"  Chi-square statistic: {chi2_stat:.3f}")
    print(f"  Degrees of freedom:   {n-1}")
    print(f"  p-value:              {p_value:.6f}")
    print(f"  Reject H₀:            {'Yes' if p_value < 0.05 else 'No'}")
else:
    print("⚠️ Not enough observations for variance test.")

# Two-sample t-test if both arms have at least 2
if len(claude_tokens) > 1 and len(chatgpt_tokens) > 1:
    t_stat, t_p_value = stats.ttest_ind(claude_tokens, chatgpt_tokens, equal_var=False)
    print(f"\n🤖 CLAUDE vs CHATGPT (two-sample t-test, Welch):")
    print(f"  Claude mean:   {np.mean(claude_tokens):.2f}% (n={len(claude_tokens)})")
    print(f"  ChatGPT mean:  {np.mean(chatgpt_tokens):.2f}% (n={len(chatgpt_tokens)})")
    print(f"  t-statistic:   {t_stat:.3f}")
    print(f"  p-value:       {t_p_value:.6f}")
    print(f"  Significant:   {'Yes' if t_p_value < 0.05 else 'No'}")
else:
    print("\n⚠️ Not enough data in both groups for t-test.")

# ------------------------------------------------
# 6) Degradation modeling using global_session idx
# ------------------------------------------------
def analyze_token_degradation(df):
    """Analyze token degradation patterns using global_session."""
    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import r2_score

    df_sorted = df.sort_values('global_session')
    x = df_sorted['global_session'].to_numpy()
    y = df_sorted['token_usage'].to_numpy()

    print("\n📉 TOKEN DEGRADATION ANALYSIS")
    print("=" * 50)

    models = {}

    # Linear
    lin = LinearRegression().fit(x.reshape(-1,1), y)
    yhat = lin.predict(x.reshape(-1,1))
    r2 = r2_score(y, yhat)

    models['linear'] = {
        'slope': lin.coef_[0],
        'intercept': lin.intercept_,
        'r_squared': r2,
        'model': lin
    }
    print(f"LINEAR MODEL:")
    print(f"  y = {lin.coef_[0]:.3f} * global_session + {lin.intercept_:.3f}")
    print(f"  R² = {r2:.3f}")
    print(f"  Slope: {lin.coef_[0]:+.3f}% per global session")

    # Exponential (robust init)
    def fexp(x, A, k, C): return A * np.exp(-k * x) + C
    try:
        p0 = [float(np.ptp(y)) if len(y) else 1.0, 0.01, float(np.min(y)) if len(y) else 0.0]
        popt, _ = curve_fit(fexp, x, y, p0=p0, maxfev=10000)
        yexp = fexp(x, *popt)
        r2e = r2_score(y, yexp)
        models['exponential'] = {
            'A': popt[0], 'k': popt[1], 'C': popt[2],
            'r_squared': r2e,
            'half_life': (np.log(2)/popt[1]) if popt[1] > 0 else np.inf
        }
        print(f"\nEXPONENTIAL MODEL:")
        print(f"  R² = {r2e:.3f}")
        print(f"  Half-life: {models['exponential']['half_life']:.2f} global sessions")
    except Exception as e:
        print(f"\nEXPONENTIAL MODEL: Failed to fit - {e}")
        models['exponential'] = None

    return models, df_sorted

degradation_models, df_temporal = analyze_token_degradation(combined_df)

# ----------------
# 7) Visualization
# ----------------
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))

# Split groups
claude_data = combined_df[combined_df['ai_system'] == 'Claude']
chatgpt_data = combined_df[combined_df['ai_system'] == 'ChatGPT']

# Plot 1: token usage over time (global_session)
ax1.scatter(claude_data['global_session'], claude_data['token_usage'], 
            color='blue', s=80, alpha=0.75, label='Claude', marker='o')
ax1.scatter(chatgpt_data['global_session'], chatgpt_data['token_usage'], 
            color='red', s=80, alpha=0.75, label='ChatGPT', marker='s')

# Linear trend
if degradation_models.get('linear'):
    x_trend = np.linspace(combined_df['global_session'].min(), combined_df['global_session'].max(), 200)
    y_trend = degradation_models['linear']['model'].predict(x_trend.reshape(-1, 1))
    ax1.plot(x_trend, y_trend, '--', color='gray', alpha=0.8, label='Linear Trend')

ax1.axhline(y=85, color='orange', linestyle=':', alpha=0.8, label='Critical Threshold (85%)')
ax1.set_xlabel('Global Session')
ax1.set_ylabel('Token Usage (%)')
ax1.set_title('Token Usage Over Time (Global Index)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Distribution comparison (histograms)
ax2.hist(claude_tokens, bins=10, alpha=0.6, label='Claude', color='blue', density=True)
ax2.hist(chatgpt_tokens, bins=10, alpha=0.6, label='ChatGPT', color='red', density=True)
ax2.set_xlabel('Token Usage (%)')
ax2.set_ylabel('Density')
ax2.set_title('Token Usage Distribution by AI System')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: Boxplot comparison
ax3.boxplot([claude_tokens, chatgpt_tokens], labels=['Claude', 'ChatGPT'])
ax3.set_ylabel('Token Usage (%)')
ax3.set_title('Token Usage Distribution Comparison')
ax3.grid(True, alpha=0.3)

# Plot 4: Frequency distribution of token_usage values
vals, counts = np.unique(combined_df['token_usage'].round(), return_counts=True)
ax4.bar(vals, counts, alpha=0.75, color='green')
ax4.axvline(x=85, color='red', linestyle='--', label='Critical Threshold')
ax4.set_xlabel('Token Usage (%) (rounded)')
ax4.set_ylabel('Frequency')
ax4.set_title('Token Usage Frequency')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# -------------------------------
# 8) 100-session linear prediction
# -------------------------------
def predict_100_sessions_combined(models):
    print("\n🔮 100-SESSION PREDICTION")
    print("=" * 40)
    future_sessions = np.arange(1, 101)  # future global sessions

    if models.get('linear'):
        linear_pred = models['linear']['model'].predict(future_sessions.reshape(-1, 1))
        print("Session | Token Usage")
        print("--------|------------")
        for s in [30, 50, 75, 100]:
            print(f"{s:7d} | {linear_pred[s-1]:6.1f}%")
        crit = np.where(linear_pred >= 85)[0]
        if len(crit):
            print(f"\nLast global session ≥85%: {crit[-1] + 1}")
        return linear_pred
    return None

_ = predict_100_sessions_combined(degradation_models)

# ----------------
# 9) Export results
# ----------------
combined_df.to_csv('combined_token_analysis.csv', index=False)

summary_report = {
    'total_sessions': int(len(combined_df)),
    'claude_sessions': int(n_claude),
    'chatgpt_sessions': int(n_chatgpt),
    'overall_mean_token_usage': float(np.mean(all_token_usage)) if len(all_token_usage) else None,
    'overall_std_token_usage': float(np.std(all_token_usage, ddof=1)) if len(all_token_usage)>1 else None,
    'overall_variance': float(np.var(all_token_usage, ddof=1)) if len(all_token_usage)>1 else None,
}

# Include chi-square only if computed above
if n >= 2:
    summary_report['chi_square_test'] = {
        'null_hypothesis': 'σ² = 10',
        'test_statistic': float(chi2_stat),
        'p_value': float(p_value),
        'reject_null': bool(p_value < 0.05)
    }

# Include t-test only if computed
if len(claude_tokens) > 1 and len(chatgpt_tokens) > 1:
    summary_report['claude_vs_chatgpt'] = {
        'claude_mean': float(np.mean(claude_tokens)),
        'chatgpt_mean': float(np.mean(chatgpt_tokens)),
        't_test_p_value': float(t_p_value)
    }

import json
with open('token_analysis_summary.json', 'w') as f:
    json.dump(summary_report, f, indent=2)

print("\n💾 EXPORT COMPLETE")
print("=" * 30)
print("Files created:")
print("• combined_token_analysis.csv")
print("• token_analysis_summary.json")


NameError: name 'chatgpt_workup_urls' is not defined