# Data Analysis Notebook

In [None]:
import numpy as np 
import pandas as pd 
import plotly.graph_objects as go

# Data Cleaning and Merging

## Import Data

#### Import Double Auction Model Data 

In [None]:
da_sim_df = pd.read_csv("/Users/nalin/Desktop/UChicago/Thesis/simulation_summary.csv")

In [None]:
da_sim_df.head()

In [None]:
da_sim_df.columns

In [None]:
da_sim_df["AMM"] = False #set the treatment of the AMM to false 
da_sim_df['battery_type'] = 'optimal'

## Import AMM ABM model data 
Data includes simulation runs from both Informed Trader and VFI Optimized Battery treatments

In [None]:
amm_sim_df = pd.read_csv("/Users/nalin/Desktop/UChicago/Thesis/abm_sim_summary.csv")

In [None]:
amm_sim_df.head()


In [None]:
amm_sim_df["AMM"] = True 
amm_inf_df = amm_sim_df[(amm_sim_df["battery_type"] == "informed")].copy()
amm_vfi_df = amm_sim_df[(amm_sim_df["battery_type"] == "optimal")].copy()

In [None]:
amm_inf_df = amm_inf_df.drop(columns=['q_b_vfi', 'soc_vfi', 'surplus_battery_vfi_ts', 'total_surplus_battery_vfi'])
amm_inf_df.head()

In [None]:
amm_inf_df.columns

In [None]:
amm_inf_df.rename(columns={
    'soc_inf': 'socs',
    'q_b_inf': 'q_b',
    'surplus_battery_inf_ts': 'surplus_battery_ts', 
    'total_surplus_battery_inf': 'total_surplus_battery'
}, inplace=True)
amm_inf_df.columns


In [None]:
amm_vfi_df 
amm_vfi_df = amm_vfi_df.drop(columns=['q_b_inf', 'soc_inf', 'surplus_battery_inf_ts', 'total_surplus_battery_inf'])

In [None]:
amm_vfi_df.rename(columns={
    'soc_vfi': 'socs',
    'q_b_vfi': 'q_b',
    'surplus_battery_vfi_ts': 'surplus_battery_ts', 
    'total_surplus_battery_vfi': 'total_surplus_battery'
}, inplace=True)

amm_vfi_df.columns

In [None]:
combined_df = pd.concat([da_sim_df, amm_inf_df, amm_vfi_df], ignore_index=True)

In [None]:
combined_df.columns

In [None]:
pd.set_option('display.max_columns', None)
combined_df.head()

In [None]:
combined_df.to_csv("da_amm_combined.csv")

In [None]:
combined_df = pd.read_csv("da_amm_combined.csv")

In [None]:
combined_df.head()

# Data Analysis

## Simulation Parameter Grid

| Parameter | Variable Name | Values | Description |
|-----------|---------------|--------|-------------|
| **Battery Capacity (Max)** | `C_max` | [5, 10, 15, 20] | Maximum battery storage capacity |
| **Initial Battery Charge** | `C_init` | [20%, 50%, 80%] of C_max | Starting state of charge |
| **Battery Trading Limit** | `q_b_max` | [1, 2, 3, 4, 5] | Maximum energy battery can trade per period |
| **Mean Solar Peak Power** | `mean_pmax` | [5, 10, 15, 20] | Average maximum daily solar generation |
| **Solar Variability** | `std_pmax` | [0, mean_pmax/2] | Standard deviation of solar peak power |
| **Battery Strategy** | `battery_type` | ['informed', 'optimal'] | Informed trader vs VFI optimized battery |
| **Mechanism** | `AMM` | bool | We have two exchange mechanism the DA and AMM |

### Fixed Parameters
| Parameter | Value | Description |
|-----------|-------|-------------|
| Simulation Days | 7 | Length of simulation |
| Trades per Period | 20 | Number of trades executed each hour |
| Sunrise/Sunset | 6:00/20:00 | Solar generation window |
| Demand Parameters | v_max=10, q_max=10 | Maximum willingness to pay and quantity |
| Utility Cost | c_u=5 | Marginal cost of utility generation |
| Utility Capacity | q_u_max=10 | Maximum utility generation capacity |
| Discount Factor | β=1 | No discounting in optimization |
| AMM Initial Price | 5.0 | Starting M_token/E_token exchange rate |
| AMM Reserves | x≈4.47, y≈22.36 | Initial liquidity pool reserves |

**Total Combinations:** 4 × 3 × 5 × 4 × 2 × 2  + 480= **1440 simulations** per run 

This setup allows you to analyze how battery capacity, charging strategy, solar variability, and trading behavior interact across different market conditions in your energy trading ABM.

## Summary Statistics and Data Visualization 

In [None]:

# Create treatment variable for the three experimental conditions
def assign_treatment(row):
    if row['AMM'] == False and row['battery_type'] == 'optimal':
        return 'Double Auction'
    elif row['AMM'] == True and row['battery_type'] == 'optimal':
        return 'AMM + VFI Battery'
    elif row['AMM'] == True and row['battery_type'] == 'informed':
        return 'AMM + Informed Trader'
    else:
        return 'Unknown'

combined_df['treatment'] = combined_df.apply(assign_treatment, axis=1)

# Check the treatment distribution
print("Treatment counts:")
print(combined_df['treatment'].value_counts())
print("\nSample of data with treatments:")
print(combined_df[['AMM', 'battery_type', 'treatment']].head(10))

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots

# List of surplus variables to plot
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

# Define colors for each treatment
colors = {
    'Double Auction': '#1f77b4',
    'AMM + VFI Battery': '#ff7f0e', 
    'AMM + Informed Trader': '#2ca02c'
}

# Create individual boxplots for each variable (easier to read)
for var in surplus_vars:
    fig = px.box(
        combined_df[combined_df['treatment'] != 'Unknown'], 
        x='treatment', 
        y=var,
        title=f'{var.replace("_", " ").title()} by Treatment',
        color='treatment',
        color_discrete_map=colors,
        points='outliers'
    )
    
    fig.update_layout(
        width=800,
        height=500,
        xaxis_title='Treatment',
        yaxis_title=var.replace('_', ' ').title(),
        title_x=0.5
    )
    
    fig.show()

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# List of surplus variables to plot in specific order for 2x3 layout
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

# Define colors for each treatment
colors = {
    'Double Auction': '#1f77b4',
    'AMM + VFI Battery': '#ff7f0e', 
    'AMM + Informed Trader': '#2ca02c'
}

# Create 2x3 subplot layout
# Row 1: Battery, Solar, Utility
# Row 2: Demand, Total, (empty)
subplot_titles = [
    'Battery Surplus', 'Solar Surplus', 'Utility Surplus',
    'Demand Surplus', 'Total Surplus', ''
]

fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=subplot_titles,
    vertical_spacing=0.12,
    horizontal_spacing=0.08
)

# Filter data to exclude unknown treatments
plot_data = combined_df[combined_df['treatment'] != 'Unknown']

# Plot positions for 2x3 grid
positions = [(1,1), (1,2), (1,3), (2,1), (2,2)]

for i, var in enumerate(surplus_vars):
    row, col = positions[i]
    
    # Create boxplot for each treatment
    for treatment in ['Double Auction', 'AMM + VFI Battery', 'AMM + Informed Trader']:
        treatment_data = plot_data[plot_data['treatment'] == treatment][var]
        
        fig.add_trace(
            go.Box(
                y=treatment_data,
                name=treatment,
                marker_color=colors[treatment],
                legendgroup=treatment,
                showlegend=(i == 0),  # Only show legend for first subplot
                boxpoints='outliers'
            ),
            row=row, col=col
        )

# Update layout
fig.update_layout(
    title_text="Surplus Analysis by Treatment",
    title_x=0.5,
    height=700,
    width=1200,
    boxmode='group',
    
)

# Update y-axis labels for each subplot
y_labels = ['Battery Surplus', 'Solar Surplus', 'Utility Surplus', 
           'Demand Surplus', 'Total Surplus']

for i, label in enumerate(y_labels):
    row, col = positions[i]
    fig.update_yaxes(title_text=label, row=row, col=col)

# Update x-axis labels (remove for cleaner look since we have legend)
for i in range(len(surplus_vars)):
    row, col = positions[i]
    fig.update_xaxes(showticklabels=False, row=row, col=col)

fig.show()

In [None]:

# Summary statistics by treatment
print("Summary Statistics by Treatment:\n")

for var in surplus_vars:
    print(f"\n{var.replace('_', ' ').title()}:")
    summary = combined_df[combined_df['treatment'] != 'Unknown'].groupby('treatment')[var].describe()
    print(summary.round(2))

## Period Visualizations

In [None]:
import plotly.graph_objects as go
import ast

def find_row_index(df, C_max, q_b_max, mean_pmax, std_pmax, C_init, battery_type=None, AMM=None):
    """
    Find row index(es) in the dataframe based on parameter values.
    
    Parameters:
    - df: DataFrame to search in
    - C_max, q_b_max, mean_pmax, std_pmax, C_init: Parameter values to match
    - battery_type: Optional filter for battery type ('informed', 'optimal')
    - AMM: Optional filter for AMM (True/False)
    
    Returns:
    - List of matching row indices
    """
    mask = (
        (df['C_max'] == C_max) &
        (df['q_b_max'] == q_b_max) &
        (df['mean_pmax'] == mean_pmax) &
        (df['std_pmax'] == std_pmax) &
        (df['C_init'] == C_init)
    )
    
    if battery_type is not None:
        mask = mask & (df['battery_type'] == battery_type)
    
    if AMM is not None:
        mask = mask & (df['AMM'] == AMM)
    
    matching_indices = df[mask].index.tolist()
    
    if len(matching_indices) == 0:
        print(f"No rows found with parameters: C_max={C_max}, q_b_max={q_b_max}, mean_pmax={mean_pmax}, std_pmax={std_pmax}, C_init={C_init}")
        if battery_type: print(f"  battery_type={battery_type}")
        if AMM is not None: print(f"  AMM={AMM}")
    elif len(matching_indices) == 1:
        print(f"Found 1 row at index {matching_indices[0]}")
    else:
        print(f"Found {len(matching_indices)} rows at indices: {matching_indices}")
        print("Consider adding battery_type or AMM filters to narrow down results")
    
    return matching_indices

def plot_summary_row_combined(df, row_index, title_suffix=""):
    """
    Plot summary for a specific row in combined_df.
    
    Parameters:
    - df: The combined dataframe 
    - row_index: Index of the row to plot
    - title_suffix: Additional text for plot titles
    """
    if row_index not in df.index:
        print(f"Row index {row_index} not found in dataframe")
        return None
    
    row = df.loc[row_index]
    
    # Helper to parse the stringified lists
    def parse(col):
        if isinstance(col, str):
            return ast.literal_eval(col)
        return col  # already a list
    
    battery_type = row["battery_type"]
    treatment = row.get("treatment", "Unknown")
    
    # Get battery-specific data based on the combined_df structure
    if battery_type == "informed":
        q_b_list = parse(row['q_b'])  # After renaming, informed trader data is in 'q_b'
        socs = parse(row['socs'])     # After renaming, informed trader data is in 'socs'
    elif battery_type == "optimal":
        q_b_list = parse(row['q_b'])  # After renaming, VFI data is in 'q_b'
        socs = parse(row['socs'])     # After renaming, VFI data is in 'socs'
    else:
        q_b_list, socs = [], []

    # Parse other time series data
    prices = parse(row['prices'])
    q_s_list = parse(row['q_s'])
    q_u_list = parse(row['q_u'])
    q_d_list = parse(row['q_d'])
    s_t = parse(row['s_t'])
    
    time_index = list(range(len(prices)))
    
    # Extract parameters for title
    params_text = f"C_max={row['C_max']}, C_init={row['C_init']}, q_b_max={row['q_b_max']}, mean_pmax={row['mean_pmax']}, std_pmax={row['std_pmax']}"
    full_title_suffix = f"{title_suffix} - {treatment} - {params_text}"

    # 1. Battery Dispatch (q_b)
    fig_battery = go.Figure()
    fig_battery.add_trace(go.Scatter(
        x=time_index, y=q_b_list, 
        mode='lines+markers', 
        name=f'Battery Dispatch ({battery_type})', 
        line=dict(color='red')
    ))
    fig_battery.add_hline(y=0, line_dash="dash", line_color="gray")
    fig_battery.update_layout(
        title=f"Battery Dispatch - {full_title_suffix}", 
        xaxis_title="Time (hours)", 
        yaxis_title="Battery Dispatch (q_b)", 
        xaxis=dict(range=[0, len(time_index)]),
        height=400
    )

    # 2. Battery State of Charge (SOC)
    fig_soc = go.Figure()
    fig_soc.add_trace(go.Scatter(
        x=time_index, y=socs, 
        mode='lines+markers', 
        name='SOC', 
        line=dict(color='green')
    ))
    fig_soc.add_hline(
        y=socs[0], line_dash="dot", line_color="orange", 
        annotation_text=f"Initial SOC: {socs[0]:.2f}", 
        annotation_position="top right"
    )
    fig_soc.add_hline(
        y=row['C_max'], line_dash="dash", line_color="red", 
        annotation_text=f"Max Capacity: {row['C_max']}", 
        annotation_position="bottom right"
    )
    fig_soc.update_layout(
        title=f"Battery State-of-Charge - {full_title_suffix}", 
        xaxis_title="Time (hours)", 
        yaxis_title="SOC",
        yaxis=dict(range=[0, row['C_max']*1.1]),
        height=400
    )

    # 3. Market Clearing Price
    fig_price = go.Figure()
    fig_price.add_trace(go.Scatter(
        x=time_index, y=prices, 
        mode='lines+markers', 
        name='Market Price', 
        line=dict(color='red')
    ))
    fig_price.update_layout(
        title=f"Market Clearing Price {full_title_suffix}", 
        xaxis_title="Time (hours)", 
        yaxis_title="Price",
        height=400
    )

    # 4. Dispatch Quantities
    fig_dispatch = go.Figure()
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_s_list, 
        mode='lines', 
        name='Solar Dispatch', 
        line=dict(color='green')
    ))
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_b_list, 
        mode='lines', 
        name=f'Battery Dispatch ({battery_type})', 
        line=dict(color='red')
    ))
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_u_list, 
        mode='lines', 
        name='Utility Dispatch', 
        line=dict(color='purple')
    ))
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_d_list, 
        mode='lines', 
        name='Quantity Demanded', 
        line=dict(color='blue')
    ))

    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=s_t, 
        mode='lines', 
        name='Solar Generation', 
        line=dict(color='orange', dash='dash') 
    ))
    
    fig_dispatch.update_layout(
        title=f"Dispatch and Market Quantities {full_title_suffix}",
        xaxis_title="Time (hours)", 
        yaxis_title="Quantity",
        height=500
    )

    # Show all plots
    fig_battery.show()
    fig_soc.show() 
    fig_price.show()
    fig_dispatch.show()
    
    # Return figures for further manipulation if needed
    return {
        "battery_dispatch": fig_battery,
        "state_of_charge": fig_soc,
        "market_price": fig_price,
        "dispatch_quantities": fig_dispatch,
        "row_info": {
            "index": row_index,
            "treatment": treatment,
            "battery_type": battery_type,
            "parameters": params_text
        }
    }


In [None]:

# Example usage:
indices = find_row_index(combined_df, C_max=10, q_b_max=1, mean_pmax=15, std_pmax=0, C_init=5, battery_type='optimal', AMM=True)
if indices:
    plot_summary_row_combined(combined_df, indices[0])

In [None]:
# Example usage:
indices = find_row_index(combined_df, C_max=10, q_b_max=1, mean_pmax=15, std_pmax=0, C_init=5, battery_type='informed', AMM=True)
if indices:
    plot_summary_row_combined(combined_df, indices[0])

### Removing Outliers

In [None]:
# Identify outliers for total_surplus_battery in 'AMM + VFI Battery' treatment
import numpy as np

# Filter data for AMM + VFI Battery treatment
amm_vfi_data = combined_df[combined_df['treatment'] == 'AMM + VFI Battery'].copy()

# Calculate outliers using IQR method
Q1 = amm_vfi_data['total_surplus_battery'].quantile(0.25)
Q3 = amm_vfi_data['total_surplus_battery'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

print(f"AMM + VFI Battery - total_surplus_battery statistics:")
print(f"Q1: {Q1:.2f}")
print(f"Q3: {Q3:.2f}")
print(f"IQR: {IQR:.2f}")
print(f"Lower bound (Q1 - 1.5*IQR): {lower_bound:.2f}")
print(f"Upper bound (Q3 + 1.5*IQR): {upper_bound:.2f}")

# Identify outlier rows
outliers = amm_vfi_data[
    (amm_vfi_data['total_surplus_battery'] < lower_bound) | 
    (amm_vfi_data['total_surplus_battery'] > upper_bound)
]

print(f"\nNumber of outliers: {len(outliers)}")
print(f"Total observations in AMM + VFI Battery: {len(amm_vfi_data)}")
print(f"Percentage of outliers: {len(outliers)/len(amm_vfi_data)*100:.1f}%")

if len(outliers) > 0:
    print(f"\nOutlier values:")
    print(outliers['total_surplus_battery'].sort_values().values)

In [None]:
# Show parameter combinations for outlier rows
if len(outliers) > 0:
    print("\nParameter combinations for outlier rows:")
    print("="*60)
    
    # Key parameters to examine
    key_params = ['v_max', 'q_max', 'c_u', 'q_u_max', 'beta', 'C_max', 'C_init', 'q_b_max', 
                  'total_surplus_battery', 'total_surplus_all', 'avg_price_overall']
    
    # Show outliers with key parameters
    outlier_params = outliers[key_params].copy()
    outlier_params = outlier_params.sort_values('total_surplus_battery')
    
    print(outlier_params.to_string(index=False))
    
    # Show the row indices of outliers in the original dataframe
    print(f"\nRow indices of outliers in combined_df:")
    print(outliers.index.tolist())

In [None]:
# Show parameter combinations for outlier rows
if len(outliers) > 0:
    print("\nParameter combinations for outlier rows:")
    print("="*60)
    
    # Key parameters to examine
    key_params = ['v_max', 'q_max', 'c_u', 'q_u_max', 'beta', 'C_max', 'C_init', 'q_b_max', 
                  'total_surplus_battery', 'total_surplus_all', 'avg_price_overall']
    
    # Show outliers with key parameters
    outlier_params = outliers[key_params].copy()
    outlier_params = outlier_params.sort_values('total_surplus_battery')
    
    print(outlier_params.to_string(index=False))
    
    # Show the row indices of outliers in the original dataframe
    print(f"\nRow indices of outliers in combined_df:")
    print(outliers.index.tolist())

In [None]:
# Visualize the outliers
fig = px.box(
    amm_vfi_data, 
    y='total_surplus_battery',
    title='Total Surplus Battery - AMM + VFI Battery Treatment (with outliers highlighted)',
    points='all'  # Show all points
)

# Add horizontal lines for outlier bounds
fig.add_hline(y=lower_bound, line_dash="dash", line_color="red", 
              annotation_text=f"Lower bound: {lower_bound:.2f}")
fig.add_hline(y=upper_bound, line_dash="dash", line_color="red", 
              annotation_text=f"Upper bound: {upper_bound:.2f}")

fig.update_layout(width=600, height=500)
fig.show()

# Create cleaned dataset without outliers
print(f"\nCreating cleaned dataset...")
print(f"Original combined_df shape: {combined_df.shape}")

# Mark outlier rows in the combined dataframe
combined_df['is_outlier'] = False
combined_df.loc[outliers.index, 'is_outlier'] = True

# Create cleaned dataset
combined_df_clean = combined_df[~combined_df['is_outlier']].copy()
print(f"Cleaned combined_df shape: {combined_df_clean.shape}")
print(f"Removed {len(outliers)} outlier rows")

# Show distribution after cleaning
print(f"\nTreatment distribution after cleaning:")
print(combined_df_clean['treatment'].value_counts())

In [None]:
# Compare boxplots before and after outlier removal
from plotly.subplots import make_subplots

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=['With Outliers', 'Outliers Removed'],
    horizontal_spacing=0.1
)

# Before cleaning
for treatment in ['Double Auction', 'AMM + VFI Battery', 'AMM + Informed Trader']:
    data = combined_df[combined_df['treatment'] == treatment]['total_surplus_battery']
    fig.add_trace(
        go.Box(y=data, name=treatment, marker_color=colors[treatment], 
               showlegend=True, legendgroup=treatment),
        row=1, col=1
    )

# After cleaning  
for treatment in ['Double Auction', 'AMM + VFI Battery', 'AMM + Informed Trader']:
    data = combined_df_clean[combined_df_clean['treatment'] == treatment]['total_surplus_battery']
    fig.add_trace(
        go.Box(y=data, name=treatment, marker_color=colors[treatment],
               showlegend=False, legendgroup=treatment),
        row=1, col=2
    )

fig.update_layout(
    title_text="Total Surplus Battery: Before vs After Outlier Removal",
    height=500,
    width=1000
)

fig.update_yaxes(title_text="Total Surplus Battery", row=1, col=1)
fig.update_yaxes(title_text="Total Surplus Battery", row=1, col=2)

fig.show()

In [None]:
# First, identify the outlier parameter combinations for the specific columns
outlier_combinations = outliers[['C_max', 'C_init', 'q_b_max', 'mean_pmax', 'std_pmax']].drop_duplicates()

print("Outlier parameter combinations to remove:")
print(outlier_combinations)
print(f"\nNumber of unique outlier combinations: {len(outlier_combinations)}")

# Create a function to check if a row matches any outlier combination
def matches_outlier_combination(row):
    for _, outlier_combo in outlier_combinations.iterrows():
        if (row['C_max'] == outlier_combo['C_max'] and
            row['C_init'] == outlier_combo['C_init'] and
            row['q_b_max'] == outlier_combo['q_b_max'] and
            row['mean_pmax'] == outlier_combo['mean_pmax'] and
            row['std_pmax'] == outlier_combo['std_pmax']):
            return True
    return False

# Apply the function to mark rows that match outlier combinations
combined_df['matches_outlier_combo'] = combined_df.apply(matches_outlier_combination, axis=1)

# Create new dataframe without any rows that have these parameter combinations
combined_df_filtered = combined_df[~combined_df['matches_outlier_combo']].copy()

print(f"\nOriginal dataframe shape: {combined_df.shape}")
print(f"Filtered dataframe shape: {combined_df_filtered.shape}")
print(f"Removed {combined_df.shape[0] - combined_df_filtered.shape[0]} rows")

# Check treatment distribution after filtering
print(f"\nTreatment distribution after filtering:")
print(combined_df_filtered['treatment'].value_counts())

# Check how many rows were removed from each treatment
print(f"\nRows removed by treatment:")
removed_by_treatment = combined_df[combined_df['matches_outlier_combo']]['treatment'].value_counts()
print(removed_by_treatment)

In [None]:
# Create a new dataframe with only DA and AMM + VFI Battery treatments
# (excluding AMM + Informed Trader)
combined_df_da_vfi = combined_df_filtered[
    combined_df_filtered['treatment'].isin(['Double Auction', 'AMM + VFI Battery'])
].copy()

print(f"Original filtered dataframe shape: {combined_df_filtered.shape}")
print(f"DA + VFI only dataframe shape: {combined_df_da_vfi.shape}")
print(f"Removed {combined_df_filtered.shape[0] - combined_df_da_vfi.shape[0]} informed trader rows")

# Check the treatment distribution in the new dataframe
print(f"\nTreatment distribution in DA + VFI dataframe:")
print(combined_df_da_vfi['treatment'].value_counts())

# Verify battery_type distribution
print(f"\nBattery type distribution in DA + VFI dataframe:")
print(combined_df_da_vfi['battery_type'].value_counts())

# Show AMM distribution
print(f"\nAMM distribution in DA + VFI dataframe:")
print(combined_df_da_vfi['AMM'].value_counts())

## Comparing Double Auction with AMM + VFI optimized battery 

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd

# List of dependent variables (outcomes)
outcome_vars = ['total_surplus_all', 'total_surplus_demand', 'total_surplus_battery', 
                'total_surplus_solar', 'total_surplus_utility']

# Independent variables (predictors)
predictors = 'AMM + C_max + C_init + q_b_max + mean_pmax + std_pmax'

# Dictionary to store regression results
regression_results = {}

print("Running Linear Regressions:")
print("="*60)

# Run regressions for each outcome variable
for outcome in outcome_vars:
    formula = f"{outcome} ~ {predictors}"
    
    # Fit the regression model
    model = smf.ols(formula, data=combined_df_da_vfi).fit()
    regression_results[outcome] = model
    
    print(f"\n{outcome.replace('_', ' ').title()}")
    print("-" * 40)
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adj. R-squared: {model.rsquared_adj:.4f}")
    print(f"F-statistic: {model.fvalue:.4f}")
    print(f"Prob (F-statistic): {model.f_pvalue:.4e}")
    print(f"Number of observations: {int(model.nobs)}")
    
    # Show coefficients with significance
    print("\nCoefficients:")
    for param, coef in model.params.items():
        pval = model.pvalues[param]
        stars = ""
        if pval < 0.001:
            stars = "***"
        elif pval < 0.01:
            stars = "**"
        elif pval < 0.05:
            stars = "*"
        elif pval < 0.1:
            stars = "."
        
        print(f"  {param:12s}: {coef:8.4f}{stars:3s} (p={pval:.4f})")

print(f"\nSignificance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")

In [None]:
# Create a comprehensive results table
results_summary = []

for outcome, model in regression_results.items():
    for param in model.params.index:
        if param != 'Intercept':  # Skip intercept for cleaner table
            coef = model.params[param]
            se = model.bse[param]
            tval = model.tvalues[param]
            pval = model.pvalues[param]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "***"
            elif pval < 0.01:
                stars = "**"
            elif pval < 0.05:
                stars = "*"
            elif pval < 0.1:
                stars = "."
            
            results_summary.append({
                'Outcome': outcome,
                'Variable': param,
                'Coefficient': coef,
                'Std_Error': se,
                'T_Value': tval,
                'P_Value': pval,
                'Significance': stars,
                'Coef_with_Stars': f"{coef:.4f}{stars}"
            })

# Convert to DataFrame for easier viewing
results_df = pd.DataFrame(results_summary)

# Create a pivot table for better visualization
pivot_results = results_df.pivot(index='Variable', columns='Outcome', values='Coef_with_Stars')

print("\nRegression Results Summary (Coefficients with Significance Stars):")
print("="*80)
print(pivot_results.to_string())

# Model fit statistics
print(f"\n\nModel Fit Statistics:")
print("="*50)
fit_stats = []
for outcome, model in regression_results.items():
    fit_stats.append({
        'Outcome': outcome,
        'R_squared': f"{model.rsquared:.4f}",
        'Adj_R_squared': f"{model.rsquared_adj:.4f}",
        'F_statistic': f"{model.fvalue:.4f}",
        'F_p_value': f"{model.f_pvalue:.4e}",
        'N_obs': int(model.nobs)
    })

fit_df = pd.DataFrame(fit_stats)
print(fit_df.to_string(index=False))


In [None]:
# Create LaTeX table manually
def create_latex_regression_table(regression_results):
    """Create a LaTeX table from regression results with significance stars."""
    
    # Get variable names (excluding intercept)
    variables = [var for var in regression_results[list(regression_results.keys())[0]].params.index 
                if var != 'Intercept']
    
    # Start LaTeX table
    latex_code = "\\begin{table}[htbp]\n"
    latex_code += "\\centering\n"
    latex_code += "\\caption{Linear Regression Results}\n"
    latex_code += "\\label{tab:regression_results}\n"
    
    # Table structure
    n_cols = len(regression_results) + 1
    latex_code += f"\\begin{{tabular}}{{l{'c' * (n_cols-1)}}}\n"
    latex_code += "\\hline\\hline\n"
    
    # Header row
    header = " & " + " & ".join([outcome.replace('_', '\\_') for outcome in regression_results.keys()]) + " \\\\\n"
    latex_code += header
    latex_code += "\\hline\n"
    
    # Coefficient rows
    for var in variables:
        row = var.replace('_', '\\_')
        
        for outcome, model in regression_results.items():
            coef = model.params[var]
            pval = model.pvalues[var]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "^{***}"
            elif pval < 0.01:
                stars = "^{**}"
            elif pval < 0.05:
                stars = "^{*}"
            
            row += f" & {coef:.4f}{stars}"
        
        row += " \\\\\n"
        latex_code += row
        
        # Add standard errors in parentheses
        se_row = ""
        for outcome, model in regression_results.items():
            se = model.bse[var]
            se_row += f" & ({se:.4f})"
        se_row += " \\\\\n"
        latex_code += se_row
    
    # Add model statistics
    latex_code += "\\hline\n"
    
    # R-squared row
    r2_row = "R$^2$"
    for outcome, model in regression_results.items():
        r2_row += f" & {model.rsquared:.4f}"
    r2_row += " \\\\\n"
    latex_code += r2_row
    
    # Adjusted R-squared row
    adj_r2_row = "Adj. R$^2$"
    for outcome, model in regression_results.items():
        adj_r2_row += f" & {model.rsquared_adj:.4f}"
    adj_r2_row += " \\\\\n"
    latex_code += adj_r2_row
    
    # Number of observations
    n_row = "Observations"
    for outcome, model in regression_results.items():
        n_row += f" & {int(model.nobs)}"
    n_row += " \\\\\n"
    latex_code += n_row
    
    # End table
    latex_code += "\\hline\\hline\n"
    latex_code += "\\end{tabular}\n"
    latex_code += "\\begin{tablenotes}\n"
    latex_code += "\\small\n"
    latex_code += "\\item Note: Standard errors in parentheses. "
    latex_code += "Significance levels: *** p$<$0.001, ** p$<$0.01, * p$<$0.05\n"
    latex_code += "\\end{tablenotes}\n"
    latex_code += "\\end{table}\n"
    
    return latex_code

# Generate LaTeX table
latex_table = create_latex_regression_table(regression_results)

print("LaTeX Regression Table:")
print("="*50)
print(latex_table)

# Save to file
with open('regression_results.tex', 'w') as f:
    f.write(latex_table)
    
print("\nLaTeX table saved to 'regression_results.tex'")

In [None]:
# Display individual regression summaries for detailed inspection
print("Detailed Regression Summaries:")
print("="*60)

for outcome, model in regression_results.items():
    print(f"\n{outcome.replace('_', ' ').title()} Regression:")
    print("-" * 50)
    print(model.summary())
    print("\n" + "="*60)

## Comparing the ATE for Informed Trader battery over VFI battery 

In [None]:
# Create a new dataframe with only AMM treatments (VFI Battery vs Informed Trader)
# (excluding Double Auction)
combined_df_amm_only = combined_df_filtered[
    combined_df_filtered['treatment'].isin(['AMM + VFI Battery', 'AMM + Informed Trader'])
].copy()

print(f"Original filtered dataframe shape: {combined_df_filtered.shape}")
print(f"AMM only dataframe shape: {combined_df_amm_only.shape}")
print(f"Removed {combined_df_filtered.shape[0] - combined_df_amm_only.shape[0]} Double Auction rows")

# Create new 'informed' binary variable
# True for 'AMM + Informed Trader', False for 'AMM + VFI Battery'
combined_df_amm_only['informed'] = combined_df_amm_only['treatment'] == 'AMM + Informed Trader'

# Check the treatment distribution in the new dataframe
print(f"\nTreatment distribution in AMM-only dataframe:")
print(combined_df_amm_only['treatment'].value_counts())

# Verify informed variable distribution
print(f"\nInformed variable distribution:")
print(combined_df_amm_only['informed'].value_counts())

# Cross-tabulation to verify mapping
print(f"\nCross-tabulation of treatment vs informed:")
crosstab = pd.crosstab(combined_df_amm_only['treatment'], combined_df_amm_only['informed'], margins=True)
print(crosstab)

# Verify battery_type distribution
print(f"\nBattery type distribution in AMM-only dataframe:")
print(combined_df_amm_only['battery_type'].value_counts())

# Show AMM distribution (should all be True)
print(f"\nAMM distribution in AMM-only dataframe:")
print(combined_df_amm_only['AMM'].value_counts())

In [None]:
# Create boxplots comparing AMM treatments
import plotly.express as px

# Define colors for AMM treatments
amm_colors = {
    'AMM + VFI Battery': '#ff7f0e', 
    'AMM + Informed Trader': '#2ca02c'
}

# List of surplus variables to plot
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

print("AMM Treatment Comparison - Boxplots:")
print("="*50)

# Create individual boxplots for each variable
for var in surplus_vars:
    fig = px.box(
        combined_df_amm_only, 
        x='treatment', 
        y=var,
        title=f'{var.replace("_", " ").title()} - AMM: VFI vs Informed Trader',
        color='treatment',
        color_discrete_map=amm_colors,
        points='outliers'
    )
    
    fig.update_layout(
        width=700,
        height=500,
        xaxis_title='AMM Treatment',
        yaxis_title=var.replace('_', ' ').title(),
        title_x=0.5
    )
    
    fig.show()

# Summary statistics by AMM treatment
print(f"\nSummary Statistics by AMM Treatment:")
print("="*60)

for var in surplus_vars:
    print(f"\n{var.replace('_', ' ').title()}:")
    summary = combined_df_amm_only.groupby('treatment')[var].describe()
    print(summary.round(2))

In [None]:
# AMM Regression Analysis: Informed Trader vs VFI Battery
import statsmodels.api as sm
import statsmodels.formula.api as smf

# List of dependent variables (outcomes)
outcome_vars = ['total_surplus_all', 'total_surplus_demand', 'total_surplus_battery', 
                'total_surplus_solar', 'total_surplus_utility']

# Independent variables (predictors) - using 'informed' instead of 'AMM'
amm_predictors = 'informed + C_max + C_init + q_b_max + mean_pmax + std_pmax'

# Dictionary to store AMM regression results
amm_regression_results = {}

print("AMM Regression Analysis: Informed Trader vs VFI Battery")
print("="*70)
print("Note: 'informed' = True for Informed Trader, False for VFI Battery")
print("="*70)

# Run regressions for each outcome variable
for outcome in outcome_vars:
    formula = f"{outcome} ~ {amm_predictors}"
    
    # Fit the regression model
    model = smf.ols(formula, data=combined_df_amm_only).fit()
    amm_regression_results[outcome] = model
    
    print(f"\n{outcome.replace('_', ' ').title()}")
    print("-" * 40)
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adj. R-squared: {model.rsquared_adj:.4f}")
    print(f"F-statistic: {model.fvalue:.4f}")
    print(f"Prob (F-statistic): {model.f_pvalue:.4e}")
    print(f"Number of observations: {int(model.nobs)}")
    
    # Show coefficients with significance
    print("\nCoefficients:")
    for param, coef in model.params.items():
        pval = model.pvalues[param]
        stars = ""
        if pval < 0.001:
            stars = "***"
        elif pval < 0.01:
            stars = "**"
        elif pval < 0.05:
            stars = "*"
        elif pval < 0.1:
            stars = "."
        
        # Special interpretation for 'informed' coefficient
        interpretation = ""
        if param == 'informed[T.True]':
            if coef > 0:
                interpretation = " (Informed Trader > VFI Battery)"
            else:
                interpretation = " (VFI Battery > Informed Trader)"
        
        print(f"  {param:15s}: {coef:8.4f}{stars:3s} (p={pval:.4f}){interpretation}")

print(f"\nSignificance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")

In [None]:
# Create comprehensive results table for AMM analysis
amm_results_summary = []

for outcome, model in amm_regression_results.items():
    for param in model.params.index:
        if param != 'Intercept':  # Skip intercept for cleaner table
            coef = model.params[param]
            se = model.bse[param]
            tval = model.tvalues[param]
            pval = model.pvalues[param]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "***"
            elif pval < 0.01:
                stars = "**"
            elif pval < 0.05:
                stars = "*"
            elif pval < 0.1:
                stars = "."
            
            amm_results_summary.append({
                'Outcome': outcome,
                'Variable': param,
                'Coefficient': coef,
                'Std_Error': se,
                'T_Value': tval,
                'P_Value': pval,
                'Significance': stars,
                'Coef_with_Stars': f"{coef:.4f}{stars}"
            })

# Convert to DataFrame for easier viewing
amm_results_df = pd.DataFrame(amm_results_summary)

# Create a pivot table for better visualization
amm_pivot_results = amm_results_df.pivot(index='Variable', columns='Outcome', values='Coef_with_Stars')

print("\nAMM Regression Results Summary (Coefficients with Significance Stars):")
print("="*80)
print(amm_pivot_results.to_string())

# Model fit statistics
print(f"\n\nAMM Model Fit Statistics:")
print("="*50)
amm_fit_stats = []
for outcome, model in amm_regression_results.items():
    amm_fit_stats.append({
        'Outcome': outcome,
        'R_squared': f"{model.rsquared:.4f}",
        'Adj_R_squared': f"{model.rsquared_adj:.4f}",
        'F_statistic': f"{model.fvalue:.4f}",
        'F_p_value': f"{model.f_pvalue:.4e}",
        'N_obs': int(model.nobs)
    })

amm_fit_df = pd.DataFrame(amm_fit_stats)
print(amm_fit_df.to_string(index=False))

## Latex Table creation

In [None]:
# Create LaTeX table for AMM analysis
def create_amm_latex_regression_table(regression_results):
    """Create a LaTeX table for AMM regression results with significance stars."""
    
    # Get variable names (excluding intercept)
    variables = [var for var in regression_results[list(regression_results.keys())[0]].params.index 
                if var != 'Intercept']
    
    # Start LaTeX table
    latex_code = "\\begin{table}[htbp]\n"
    latex_code += "\\centering\n"
    latex_code += "\\caption{AMM Analysis: Informed Trader vs VFI Battery}\n"
    latex_code += "\\label{tab:amm_regression_results}\n"
    
    # Table structure
    n_cols = len(regression_results) + 1
    latex_code += f"\\begin{{tabular}}{{l{'c' * (n_cols-1)}}}\n"
    latex_code += "\\hline\\hline\n"
    
    # Header row
    header = " & " + " & ".join([outcome.replace('_', '\\_') for outcome in regression_results.keys()]) + " \\\\\n"
    latex_code += header
    latex_code += "\\hline\n"
    
    # Coefficient rows
    for var in variables:
        # Clean up variable names for LaTeX
        var_display = var.replace('_', '\\_').replace('[T.True]', '')
        if 'informed' in var:
            var_display = 'Informed Trader'
        
        row = var_display
        
        for outcome, model in regression_results.items():
            coef = model.params[var]
            pval = model.pvalues[var]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "^{***}"
            elif pval < 0.01:
                stars = "^{**}"
            elif pval < 0.05:
                stars = "^{*}"
            
            row += f" & {coef:.4f}{stars}"
        
        row += " \\\\\n"
        latex_code += row
        
        # Add standard errors in parentheses
        se_row = ""
        for outcome, model in regression_results.items():
            se = model.bse[var]
            se_row += f" & ({se:.4f})"
        se_row += " \\\\\n"
        latex_code += se_row
    
    # Add model statistics
    latex_code += "\\hline\n"
    
    # R-squared row
    r2_row = "R$^2$"
    for outcome, model in regression_results.items():
        r2_row += f" & {model.rsquared:.4f}"
    r2_row += " \\\\\n"
    latex_code += r2_row
    
    # Adjusted R-squared row
    adj_r2_row = "Adj. R$^2$"
    for outcome, model in regression_results.items():
        adj_r2_row += f" & {model.rsquared_adj:.4f}"
    adj_r2_row += " \\\\\n"
    latex_code += adj_r2_row
    
    # Number of observations
    n_row = "Observations"
    for outcome, model in regression_results.items():
        n_row += f" & {int(model.nobs)}"
    n_row += " \\\\\n"
    latex_code += n_row
    
    # End table
    latex_code += "\\hline\\hline\n"
    latex_code += "\\end{tabular}\n"
    latex_code += "\\begin{tablenotes}\n"
    latex_code += "\\small\n"
    latex_code += "\\item Note: Standard errors in parentheses. "
    latex_code += "Reference category: VFI Battery. "
    latex_code += "Significance levels: *** p$<$0.001, ** p$<$0.01, * p$<$0.05\n"
    latex_code += "\\end{tablenotes}\n"
    latex_code += "\\end{table}\n"
    
    return latex_code

# Generate LaTeX table for AMM analysis
amm_latex_table = create_amm_latex_regression_table(amm_regression_results)

print("LaTeX AMM Regression Table:")
print("="*50)
print(amm_latex_table)

# Save to file
with open('amm_regression_results.tex', 'w') as f:
    f.write(amm_latex_table)
    
print("\nAMM LaTeX table saved to 'amm_regression_results.tex'")

# Key findings summary
print(f"\n" + "="*60)
print("KEY FINDINGS SUMMARY:")
print("="*60)
print("The 'informed' coefficient shows the effect of using Informed Trader")
print("vs VFI Battery (reference category) in AMM systems:")
print("- Positive coefficient: Informed Trader performs better")
print("- Negative coefficient: VFI Battery performs better")
print("="*60)

In [None]:
# Display individual regression summaries for detailed inspection
print("Detailed AMM Regression Summaries:")
print("="*60)

for outcome, model in amm_regression_results.items():
    print(f"\n{outcome.replace('_', ' ').title()} Regression:")
    print("-" * 50)
    print(model.summary())
    print("\n" + "="*60)

## DA vs AMM Informed Trader Battery 

In [None]:
# Create a new dataframe with only DA and AMM + Informed Trader treatments
# (excluding AMM + VFI Battery)
combined_df_da_informed = combined_df_filtered[
    combined_df_filtered['treatment'].isin(['Double Auction', 'AMM + Informed Trader'])
].copy()

print(f"Original filtered dataframe shape: {combined_df_filtered.shape}")
print(f"DA + Informed Trader only dataframe shape: {combined_df_da_informed.shape}")
print(f"Removed {combined_df_filtered.shape[0] - combined_df_da_informed.shape[0]} VFI Battery rows")

# Check the treatment distribution in the new dataframe
print(f"\nTreatment distribution in DA + Informed Trader dataframe:")
print(combined_df_da_informed['treatment'].value_counts())

# Verify battery_type distribution
print(f"\nBattery type distribution in DA + Informed Trader dataframe:")
print(combined_df_da_informed['battery_type'].value_counts())

# Show AMM distribution
print(f"\nAMM distribution in DA + Informed Trader dataframe:")
print(combined_df_da_informed['AMM'].value_counts())

# Cross-tabulation to verify treatment mapping
print(f"\nCross-tabulation of treatment vs AMM:")
crosstab = pd.crosstab(combined_df_da_informed['treatment'], combined_df_da_informed['AMM'], margins=True)
print(crosstab)

## Boxplots

In [None]:
# Create boxplots comparing DA vs AMM + Informed Trader treatments
import plotly.express as px

# Define colors for DA vs Informed Trader comparison
da_informed_colors = {
    'Double Auction': '#1f77b4',
    'AMM + Informed Trader': '#2ca02c'
}

# List of surplus variables to plot
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

print("DA vs AMM + Informed Trader Comparison - Boxplots:")
print("="*60)

# Create individual boxplots for each variable
for var in surplus_vars:
    fig = px.box(
        combined_df_da_informed, 
        x='treatment', 
        y=var,
        title=f'{var.replace("_", " ").title()} - DA vs AMM + Informed Trader',
        color='treatment',
        color_discrete_map=da_informed_colors,
        points='outliers'
    )
    
    fig.update_layout(
        width=700,
        height=500,
        xaxis_title='Treatment',
        yaxis_title=var.replace('_', ' ').title(),
        title_x=0.5
    )
    
    fig.show()

# Summary statistics by treatment
print(f"\nSummary Statistics by Treatment:")
print("="*60)

for var in surplus_vars:
    print(f"\n{var.replace('_', ' ').title()}:")
    summary = combined_df_da_informed.groupby('treatment')[var].describe()
    print(summary.round(2))

## Regression Analysis

In [None]:
# DA vs AMM + Informed Trader Regression Analysis
import statsmodels.api as sm
import statsmodels.formula.api as smf

# List of dependent variables (outcomes)
outcome_vars = ['total_surplus_all', 'total_surplus_demand', 'total_surplus_battery', 
                'total_surplus_solar', 'total_surplus_utility']

# Independent variables (predictors) - using AMM to compare mechanisms
da_informed_predictors = 'AMM + C_max + C_init + q_b_max + mean_pmax + std_pmax'

# Dictionary to store DA vs Informed Trader regression results
da_informed_regression_results = {}

print("DA vs AMM + Informed Trader Regression Analysis")
print("="*70)
print("Note: 'AMM' = True for AMM + Informed Trader, False for Double Auction")
print("="*70)

# Run regressions for each outcome variable
for outcome in outcome_vars:
    formula = f"{outcome} ~ {da_informed_predictors}"
    
    # Fit the regression model
    model = smf.ols(formula, data=combined_df_da_informed).fit()
    da_informed_regression_results[outcome] = model
    
    print(f"\n{outcome.replace('_', ' ').title()}")
    print("-" * 40)
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adj. R-squared: {model.rsquared_adj:.4f}")
    print(f"F-statistic: {model.fvalue:.4f}")
    print(f"Prob (F-statistic): {model.f_pvalue:.4e}")
    print(f"Number of observations: {int(model.nobs)}")
    
    # Show coefficients with significance
    print("\nCoefficients:")
    for param, coef in model.params.items():
        pval = model.pvalues[param]
        stars = ""
        if pval < 0.001:
            stars = "***"
        elif pval < 0.01:
            stars = "**"
        elif pval < 0.05:
            stars = "*"
        elif pval < 0.1:
            stars = "."
        
        # Special interpretation for 'AMM' coefficient
        interpretation = ""
        if param == 'AMM[T.True]':
            if coef > 0:
                interpretation = " (AMM + Informed Trader > Double Auction)"
            else:
                interpretation = " (Double Auction > AMM + Informed Trader)"
        
        print(f"  {param:15s}: {coef:8.4f}{stars:3s} (p={pval:.4f}){interpretation}")

print(f"\nSignificance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")

## Results Summary Table

In [None]:
# Create comprehensive results table for DA vs Informed Trader analysis
da_informed_results_summary = []

for outcome, model in da_informed_regression_results.items():
    for param in model.params.index:
        if param != 'Intercept':  # Skip intercept for cleaner table
            coef = model.params[param]
            se = model.bse[param]
            tval = model.tvalues[param]
            pval = model.pvalues[param]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "***"
            elif pval < 0.01:
                stars = "**"
            elif pval < 0.05:
                stars = "*"
            elif pval < 0.1:
                stars = "."
            
            da_informed_results_summary.append({
                'Outcome': outcome,
                'Variable': param,
                'Coefficient': coef,
                'Std_Error': se,
                'T_Value': tval,
                'P_Value': pval,
                'Significance': stars,
                'Coef_with_Stars': f"{coef:.4f}{stars}"
            })

# Convert to DataFrame for easier viewing
da_informed_results_df = pd.DataFrame(da_informed_results_summary)

# Create a pivot table for better visualization
da_informed_pivot_results = da_informed_results_df.pivot(index='Variable', columns='Outcome', values='Coef_with_Stars')

print("\nDA vs AMM + Informed Trader Regression Results Summary:")
print("="*80)
print(da_informed_pivot_results.to_string())

# Model fit statistics
print(f"\n\nDA vs Informed Trader Model Fit Statistics:")
print("="*50)
da_informed_fit_stats = []
for outcome, model in da_informed_regression_results.items():
    da_informed_fit_stats.append({
        'Outcome': outcome,
        'R_squared': f"{model.rsquared:.4f}",
        'Adj_R_squared': f"{model.rsquared_adj:.4f}",
        'F_statistic': f"{model.fvalue:.4f}",
        'F_p_value': f"{model.f_pvalue:.4e}",
        'N_obs': int(model.nobs)
    })

da_informed_fit_df = pd.DataFrame(da_informed_fit_stats)
print(da_informed_fit_df.to_string(index=False))

## Latex Table Generation

In [None]:
# Create LaTeX table for DA vs AMM + Informed Trader analysis
def create_da_informed_latex_regression_table(regression_results):
    """Create a LaTeX table for DA vs Informed Trader regression results with significance stars."""
    
    # Get variable names (excluding intercept)
    variables = [var for var in regression_results[list(regression_results.keys())[0]].params.index 
                if var != 'Intercept']
    
    # Start LaTeX table
    latex_code = "\\begin{table}[htbp]\n"
    latex_code += "\\centering\n"
    latex_code += "\\caption{DA vs AMM + Informed Trader Analysis}\n"
    latex_code += "\\label{tab:da_informed_regression_results}\n"
    
    # Table structure
    n_cols = len(regression_results) + 1
    latex_code += f"\\begin{{tabular}}{{l{'c' * (n_cols-1)}}}\n"
    latex_code += "\\hline\\hline\n"
    
    # Header row
    header = " & " + " & ".join([outcome.replace('_', '\\_') for outcome in regression_results.keys()]) + " \\\\\n"
    latex_code += header
    latex_code += "\\hline\n"
    
    # Coefficient rows
    for var in variables:
        # Clean up variable names for LaTeX
        var_display = var.replace('_', '\\_').replace('[T.True]', '')
        if 'AMM' in var:
            var_display = 'AMM + Informed Trader'
        
        row = var_display
        
        for outcome, model in regression_results.items():
            coef = model.params[var]
            pval = model.pvalues[var]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "^{***}"
            elif pval < 0.01:
                stars = "^{**}"
            elif pval < 0.05:
                stars = "^{*}"
            
            row += f" & {coef:.4f}{stars}"
        
        row += " \\\\\n"
        latex_code += row
        
        # Add standard errors in parentheses
        se_row = ""
        for outcome, model in regression_results.items():
            se = model.bse[var]
            se_row += f" & ({se:.4f})"
        se_row += " \\\\\n"
        latex_code += se_row
    
    # Add model statistics
    latex_code += "\\hline\n"
    
    # R-squared row
    r2_row = "R$^2$"
    for outcome, model in regression_results.items():
        r2_row += f" & {model.rsquared:.4f}"
    r2_row += " \\\\\n"
    latex_code += r2_row
    
    # Adjusted R-squared row
    adj_r2_row = "Adj. R$^2$"
    for outcome, model in regression_results.items():
        adj_r2_row += f" & {model.rsquared_adj:.4f}"
    adj_r2_row += " \\\\\n"
    latex_code += adj_r2_row
    
    # Number of observations
    n_row = "Observations"
    for outcome, model in regression_results.items():
        n_row += f" & {int(model.nobs)}"
    n_row += " \\\\\n"
    latex_code += n_row
    
    # End table
    latex_code += "\\hline\\hline\n"
    latex_code += "\\end{tabular}\n"
    latex_code += "\\begin{tablenotes}\n"
    latex_code += "\\small\n"
    latex_code += "\\item Note: Standard errors in parentheses. "
    latex_code += "Reference category: Double Auction. "
    latex_code += "Significance levels: *** p$<$0.001, ** p$<$0.01, * p$<$0.05\n"
    latex_code += "\\end{tablenotes}\n"
    latex_code += "\\end{table}\n"
    
    return latex_code

# Generate LaTeX table for DA vs Informed Trader analysis
da_informed_latex_table = create_da_informed_latex_regression_table(da_informed_regression_results)

print("LaTeX DA vs AMM + Informed Trader Regression Table:")
print("="*60)
print(da_informed_latex_table)

# Save to file
with open('da_informed_regression_results.tex', 'w') as f:
    f.write(da_informed_latex_table)
    
print("\nDA vs Informed Trader LaTeX table saved to 'da_informed_regression_results.tex'")

# Key findings summary
print(f"\n" + "="*60)
print("KEY FINDINGS SUMMARY:")
print("="*60)
print("The 'AMM' coefficient shows the effect of using AMM + Informed Trader")
print("vs Double Auction (reference category):")
print("- Positive coefficient: AMM + Informed Trader performs better")
print("- Negative coefficient: Double Auction performs better")
print("="*60)