# Data Analysis Notebook

In [1]:
import numpy as np 
import pandas as pd 
import plotly.graph_objects as go

# Data Cleaning and Merging

## Import Data

#### Import Double Auction Model Data 

In [2]:
da_sim_df = pd.read_csv("/Users/nalin/Desktop/UChicago/Thesis/simulation_summary.csv")

In [3]:
da_sim_df.head()

Unnamed: 0,v_max,q_max,c_u,q_u_max,beta,C_max,C_init,q_b_max,mean_pmax,std_pmax,...,surplus_battery_ts,surplus_utility_ts,surplus_demand_ts,surplus_solar_ts,surplus_total_ts,total_surplus_battery,total_surplus_solar,total_surplus_utility,total_surplus_demand,total_surplus_all
0,10,10,5,10,1,5,1,1,5,0.0,...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.5308084989341...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,1553.167958,0.0,2112.5,3665.667958
1,10,10,5,10,1,5,1,1,15,0.0,...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0,...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.5924254968025...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",175.0,233.646981,0.0,5000.0,5408.646981
2,10,10,5,10,1,5,1,1,10,0.0,...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,459.483271,0.0,4475.0,4934.483271
3,10,10,5,10,1,5,1,1,10,5.0,...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0,...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 17.500000...",90.0,375.544039,0.0,4700.0,5165.544039
4,10,10,5,10,1,5,1,1,5,2.5,...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8810216608202...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,874.040781,0.0,3350.0,4224.040781


In [4]:
da_sim_df.columns

Index(['v_max', 'q_max', 'c_u', 'q_u_max', 'beta', 'C_max', 'C_init',
       'q_b_max', 'mean_pmax', 'std_pmax', 'prices', 'socs', 'q_s', 'q_b',
       'q_u', 'q_d', 's_t', 'surplus_battery_ts', 'surplus_utility_ts',
       'surplus_demand_ts', 'surplus_solar_ts', 'surplus_total_ts',
       'total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility',
       'total_surplus_demand', 'total_surplus_all'],
      dtype='object')

In [5]:
da_sim_df["AMM"] = False #set the treatment of the AMM to false 
da_sim_df['battery_type'] = 'optimal'

## Import AMM ABM model data 
Data includes simulation runs from both Informed Trader and VFI Optimized Battery treatments

In [6]:
amm_sim_df = pd.read_csv("/Users/nalin/Desktop/UChicago/Thesis/abm_sim_summary.csv")

In [7]:
amm_sim_df.head()


Unnamed: 0,total_periods,total_trades,total_electricity_traded,total_money_exchanged,avg_price_overall,price_volatility_overall,total_agent_surplus,avg_trades_per_period,successful_trade_rate,max_amm_reserve_x,...,sunset,v_max,q_max,c_u,q_u_max,beta,trades_per_period,battery_type,reserve_x_init,reserve_y_init
0,168,3216,1885.511047,5604.321803,2.972309,1.370302,4726.401258,19.142857,100.0,17.269322,...,20,10,10,5,10,1,20,informed,4.472136,22.36068
1,168,2678,2198.880373,4451.011624,2.024217,1.605744,5493.973135,15.940476,100.0,66.63421,...,20,10,10,5,10,1,20,informed,4.472136,22.36068
2,168,4410,1617.653964,6520.311701,4.030721,1.197414,3891.030704,26.25,100.0,15.067473,...,20,10,10,5,10,1,20,informed,4.472136,22.36068
3,168,5319,1462.76502,6961.862769,4.759386,1.21607,3443.500078,31.660714,100.0,4.640506,...,20,10,10,5,10,1,20,informed,4.472136,22.36068
4,168,3059,1862.17957,5367.360006,2.8823,1.467049,4708.38794,18.208333,100.0,17.789581,...,20,10,10,5,10,1,20,optimal,4.472136,22.36068


In [8]:
amm_sim_df["AMM"] = True 
amm_inf_df = amm_sim_df[(amm_sim_df["battery_type"] == "informed")].copy()
amm_vfi_df = amm_sim_df[(amm_sim_df["battery_type"] == "optimal")].copy()

In [9]:
amm_inf_df = amm_inf_df.drop(columns=['q_b_vfi', 'soc_vfi', 'surplus_battery_vfi_ts', 'total_surplus_battery_vfi'])
amm_inf_df.head()

Unnamed: 0,total_periods,total_trades,total_electricity_traded,total_money_exchanged,avg_price_overall,price_volatility_overall,total_agent_surplus,avg_trades_per_period,successful_trade_rate,max_amm_reserve_x,...,v_max,q_max,c_u,q_u_max,beta,trades_per_period,battery_type,reserve_x_init,reserve_y_init,AMM
0,168,3216,1885.511047,5604.321803,2.972309,1.370302,4726.401258,19.142857,100.0,17.269322,...,10,10,5,10,1,20,informed,4.472136,22.36068,True
1,168,2678,2198.880373,4451.011624,2.024217,1.605744,5493.973135,15.940476,100.0,66.63421,...,10,10,5,10,1,20,informed,4.472136,22.36068,True
2,168,4410,1617.653964,6520.311701,4.030721,1.197414,3891.030704,26.25,100.0,15.067473,...,10,10,5,10,1,20,informed,4.472136,22.36068,True
3,168,5319,1462.76502,6961.862769,4.759386,1.21607,3443.500078,31.660714,100.0,4.640506,...,10,10,5,10,1,20,informed,4.472136,22.36068,True
8,168,1454,2819.106813,2301.911055,0.816539,2.167029,6979.594249,8.654762,100.0,142.004765,...,10,10,5,10,1,20,informed,4.472136,22.36068,True


In [10]:
amm_inf_df.columns

Index(['total_periods', 'total_trades', 'total_electricity_traded',
       'total_money_exchanged', 'avg_price_overall',
       'price_volatility_overall', 'total_agent_surplus',
       'avg_trades_per_period', 'successful_trade_rate', 'max_amm_reserve_x',
       'max_amm_reserve_y', 'final_amm_reserve_y', 'final_amm_reserve_x',
       'prices', 's_t', 'q_d', 'q_s', 'q_u', 'q_b_inf', 'soc_inf',
       'surplus_solar_ts', 'surplus_utility_ts', 'surplus_demand_ts',
       'surplus_battery_inf_ts', 'surplus_total_ts',
       'total_surplus_battery_inf', 'total_surplus_utility',
       'total_surplus_demand', 'total_surplus_solar', 'total_surplus_all',
       'C_max', 'C_init', 'q_b_max', 'mean_pmax', 'std_pmax', 'days', 'T',
       'sunrise', 'sunset', 'v_max', 'q_max', 'c_u', 'q_u_max', 'beta',
       'trades_per_period', 'battery_type', 'reserve_x_init', 'reserve_y_init',
       'AMM'],
      dtype='object')

In [11]:
amm_inf_df.rename(columns={
    'soc_inf': 'socs',
    'q_b_inf': 'q_b',
    'surplus_battery_inf_ts': 'surplus_battery_ts', 
    'total_surplus_battery_inf': 'total_surplus_battery'
}, inplace=True)
amm_inf_df.columns


Index(['total_periods', 'total_trades', 'total_electricity_traded',
       'total_money_exchanged', 'avg_price_overall',
       'price_volatility_overall', 'total_agent_surplus',
       'avg_trades_per_period', 'successful_trade_rate', 'max_amm_reserve_x',
       'max_amm_reserve_y', 'final_amm_reserve_y', 'final_amm_reserve_x',
       'prices', 's_t', 'q_d', 'q_s', 'q_u', 'q_b', 'socs', 'surplus_solar_ts',
       'surplus_utility_ts', 'surplus_demand_ts', 'surplus_battery_ts',
       'surplus_total_ts', 'total_surplus_battery', 'total_surplus_utility',
       'total_surplus_demand', 'total_surplus_solar', 'total_surplus_all',
       'C_max', 'C_init', 'q_b_max', 'mean_pmax', 'std_pmax', 'days', 'T',
       'sunrise', 'sunset', 'v_max', 'q_max', 'c_u', 'q_u_max', 'beta',
       'trades_per_period', 'battery_type', 'reserve_x_init', 'reserve_y_init',
       'AMM'],
      dtype='object')

In [12]:
amm_vfi_df 
amm_vfi_df = amm_vfi_df.drop(columns=['q_b_inf', 'soc_inf', 'surplus_battery_inf_ts', 'total_surplus_battery_inf'])

In [13]:
amm_vfi_df.rename(columns={
    'soc_vfi': 'socs',
    'q_b_vfi': 'q_b',
    'surplus_battery_vfi_ts': 'surplus_battery_ts', 
    'total_surplus_battery_vfi': 'total_surplus_battery'
}, inplace=True)

amm_vfi_df.columns

Index(['total_periods', 'total_trades', 'total_electricity_traded',
       'total_money_exchanged', 'avg_price_overall',
       'price_volatility_overall', 'total_agent_surplus',
       'avg_trades_per_period', 'successful_trade_rate', 'max_amm_reserve_x',
       'max_amm_reserve_y', 'final_amm_reserve_y', 'final_amm_reserve_x',
       'prices', 's_t', 'q_d', 'q_s', 'q_u', 'q_b', 'socs', 'surplus_solar_ts',
       'surplus_utility_ts', 'surplus_demand_ts', 'surplus_battery_ts',
       'surplus_total_ts', 'total_surplus_battery', 'total_surplus_utility',
       'total_surplus_demand', 'total_surplus_solar', 'total_surplus_all',
       'C_max', 'C_init', 'q_b_max', 'mean_pmax', 'std_pmax', 'days', 'T',
       'sunrise', 'sunset', 'v_max', 'q_max', 'c_u', 'q_u_max', 'beta',
       'trades_per_period', 'battery_type', 'reserve_x_init', 'reserve_y_init',
       'AMM'],
      dtype='object')

In [14]:
combined_df = pd.concat([da_sim_df, amm_inf_df, amm_vfi_df], ignore_index=True)

In [15]:
combined_df.columns

Index(['v_max', 'q_max', 'c_u', 'q_u_max', 'beta', 'C_max', 'C_init',
       'q_b_max', 'mean_pmax', 'std_pmax', 'prices', 'socs', 'q_s', 'q_b',
       'q_u', 'q_d', 's_t', 'surplus_battery_ts', 'surplus_utility_ts',
       'surplus_demand_ts', 'surplus_solar_ts', 'surplus_total_ts',
       'total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility',
       'total_surplus_demand', 'total_surplus_all', 'AMM', 'battery_type',
       'total_periods', 'total_trades', 'total_electricity_traded',
       'total_money_exchanged', 'avg_price_overall',
       'price_volatility_overall', 'total_agent_surplus',
       'avg_trades_per_period', 'successful_trade_rate', 'max_amm_reserve_x',
       'max_amm_reserve_y', 'final_amm_reserve_y', 'final_amm_reserve_x',
       'days', 'T', 'sunrise', 'sunset', 'trades_per_period', 'reserve_x_init',
       'reserve_y_init'],
      dtype='object')

In [16]:
pd.set_option('display.max_columns', None)
combined_df.head()

Unnamed: 0,v_max,q_max,c_u,q_u_max,beta,C_max,C_init,q_b_max,mean_pmax,std_pmax,prices,socs,q_s,q_b,q_u,q_d,s_t,surplus_battery_ts,surplus_utility_ts,surplus_demand_ts,surplus_solar_ts,surplus_total_ts,total_surplus_battery,total_surplus_solar,total_surplus_utility,total_surplus_demand,total_surplus_all,AMM,battery_type,total_periods,total_trades,total_electricity_traded,total_money_exchanged,avg_price_overall,price_volatility_overall,total_agent_surplus,avg_trades_per_period,successful_trade_rate,max_amm_reserve_x,max_amm_reserve_y,final_amm_reserve_y,final_amm_reserve_x,days,T,sunrise,sunset,trades_per_period,reserve_x_init,reserve_y_init
0,10,10,5,10,1,5,1,1,5,0.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.88739533...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.5308084989341...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,1553.167958,0.0,2112.5,3665.667958,False,optimal,,,,,,,,,,,,,,,,,,,,
1,10,10,5,10,1,5,1,1,15,0.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 0.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 3, 4, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.1848509936051...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.1848509936051...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0,...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.5924254968025...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",175.0,233.646981,0.0,5000.0,5408.646981,False,optimal,,,,,,,,,,,,,,,,,,,,
2,10,10,5,10,1,5,1,1,10,0.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.1232339957367...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.1232339957367...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,459.483271,0.0,4475.0,4934.483271,False,optimal,,,,,,,,,,,,,,,,,,,,
3,10,10,5,10,1,5,1,1,10,5.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 0.0, ...","[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 3, 4, 5, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1524086643280...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1524086643280...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0,...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 17.500000...",90.0,375.544039,0.0,4700.0,5165.544039,False,optimal,,,,,,,,,,,,,,,,,,,,
4,10,10,5,10,1,5,1,1,5,2.5,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8810216608202...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,874.040781,0.0,3350.0,4224.040781,False,optimal,,,,,,,,,,,,,,,,,,,,


In [17]:
combined_df.to_csv("da_amm_combined.csv")

In [20]:
combined_df = pd.read_csv("da_amm_combined.csv")

In [21]:
combined_df.head()

Unnamed: 0.1,Unnamed: 0,v_max,q_max,c_u,q_u_max,beta,C_max,C_init,q_b_max,mean_pmax,std_pmax,prices,socs,q_s,q_b,q_u,q_d,s_t,surplus_battery_ts,surplus_utility_ts,surplus_demand_ts,surplus_solar_ts,surplus_total_ts,total_surplus_battery,total_surplus_solar,total_surplus_utility,total_surplus_demand,total_surplus_all,AMM,battery_type,total_periods,total_trades,total_electricity_traded,total_money_exchanged,avg_price_overall,price_volatility_overall,total_agent_surplus,avg_trades_per_period,successful_trade_rate,max_amm_reserve_x,max_amm_reserve_y,final_amm_reserve_y,final_amm_reserve_x,days,T,sunrise,sunset,trades_per_period,reserve_x_init,reserve_y_init
0,0,10,10,5,10,1,5,1,1,5,0.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.88739533...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.5308084989341...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,1553.167958,0.0,2112.5,3665.667958,False,optimal,,,,,,,,,,,,,,,,,,,,
1,1,10,10,5,10,1,5,1,1,15,0.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 0.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 3, 4, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.1848509936051...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.1848509936051...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0,...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.5924254968025...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",175.0,233.646981,0.0,5000.0,5408.646981,False,optimal,,,,,,,,,,,,,,,,,,,,
2,2,10,10,5,10,1,5,1,1,10,0.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.1232339957367...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.1232339957367...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0616169978683...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,459.483271,0.0,4475.0,4934.483271,False,optimal,,,,,,,,,,,,,,,,,,,,
3,3,10,10,5,10,1,5,1,1,10,5.0,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 0.0, ...","[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 2, 3, 4, 5, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1524086643280...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1524086643280...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0,...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 17.500000...",90.0,375.544039,0.0,4700.0,5165.544039,False,optimal,,,,,,,,,,,,,,,,,,,,
4,4,10,10,5,10,1,5,1,1,5,2.5,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.9999999999999...","[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7620433216404...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8810216608202...","[12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.500000...",0.0,874.040781,0.0,3350.0,4224.040781,False,optimal,,,,,,,,,,,,,,,,,,,,


# Data Analysis

## Simulation Parameter Grid

| Parameter | Variable Name | Values | Description |
|-----------|---------------|--------|-------------|
| **Battery Capacity (Max)** | `C_max` | [5, 10, 15, 20] | Maximum battery storage capacity |
| **Initial Battery Charge** | `C_init` | [20%, 50%, 80%] of C_max | Starting state of charge |
| **Battery Trading Limit** | `q_b_max` | [1, 2, 3, 4, 5] | Maximum energy battery can trade per period |
| **Mean Solar Peak Power** | `mean_pmax` | [5, 10, 15, 20] | Average maximum daily solar generation |
| **Solar Variability** | `std_pmax` | [0, mean_pmax/2] | Standard deviation of solar peak power |
| **Battery Strategy** | `battery_type` | ['informed', 'optimal'] | Informed trader vs VFI optimized battery |
| **Mechanism** | `AMM` | bool | We have two exchange mechanism the DA and AMM |

### Fixed Parameters
| Parameter | Value | Description |
|-----------|-------|-------------|
| Simulation Days | 7 | Length of simulation |
| Trades per Period | 20 | Number of trades executed each hour |
| Sunrise/Sunset | 6:00/20:00 | Solar generation window |
| Demand Parameters | v_max=10, q_max=10 | Maximum willingness to pay and quantity |
| Utility Cost | c_u=5 | Marginal cost of utility generation |
| Utility Capacity | q_u_max=10 | Maximum utility generation capacity |
| Discount Factor | β=1 | No discounting in optimization |
| AMM Initial Price | 5.0 | Starting M_token/E_token exchange rate |
| AMM Reserves | x≈4.47, y≈22.36 | Initial liquidity pool reserves |

**Total Combinations:** 4 × 3 × 5 × 4 × 2 × 2  + 480= **1440 simulations** per run 

This setup allows you to analyze how battery capacity, charging strategy, solar variability, and trading behavior interact across different market conditions in your energy trading ABM.

## Summary Statistics and Data Visualization 

In [22]:

# Create treatment variable for the three experimental conditions
def assign_treatment(row):
    if row['AMM'] == False and row['battery_type'] == 'optimal':
        return 'Double Auction'
    elif row['AMM'] == True and row['battery_type'] == 'optimal':
        return 'AMM + VFI Battery'
    elif row['AMM'] == True and row['battery_type'] == 'informed':
        return 'AMM + Informed Trader'
    else:
        return 'Unknown'

combined_df['treatment'] = combined_df.apply(assign_treatment, axis=1)

# Check the treatment distribution
print("Treatment counts:")
print(combined_df['treatment'].value_counts())
print("\nSample of data with treatments:")
print(combined_df[['AMM', 'battery_type', 'treatment']].head(10))

Treatment counts:
treatment
Double Auction           480
AMM + Informed Trader    480
AMM + VFI Battery        480
Name: count, dtype: int64

Sample of data with treatments:
     AMM battery_type       treatment
0  False      optimal  Double Auction
1  False      optimal  Double Auction
2  False      optimal  Double Auction
3  False      optimal  Double Auction
4  False      optimal  Double Auction
5  False      optimal  Double Auction
6  False      optimal  Double Auction
7  False      optimal  Double Auction
8  False      optimal  Double Auction
9  False      optimal  Double Auction


In [23]:
import plotly.express as px
from plotly.subplots import make_subplots

# List of surplus variables to plot
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

# Define colors for each treatment
colors = {
    'Double Auction': '#1f77b4',
    'AMM + VFI Battery': '#ff7f0e', 
    'AMM + Informed Trader': '#2ca02c'
}

# Create individual boxplots for each variable (easier to read)
for var in surplus_vars:
    fig = px.box(
        combined_df[combined_df['treatment'] != 'Unknown'], 
        x='treatment', 
        y=var,
        title=f'{var.replace("_", " ").title()} by Treatment',
        color='treatment',
        color_discrete_map=colors,
        points='outliers'
    )
    
    fig.update_layout(
        width=800,
        height=500,
        xaxis_title='Treatment',
        yaxis_title=var.replace('_', ' ').title(),
        title_x=0.5
    )
    
    fig.show()

In [62]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# List of surplus variables to plot in specific order for 2x3 layout
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

# Define colors for each treatment
colors = {
    'Double Auction': '#1f77b4',
    'AMM + VFI Battery': '#ff7f0e', 
    'AMM + Informed Trader': '#2ca02c'
}

# Create 2x3 subplot layout
# Row 1: Battery, Solar, Utility
# Row 2: Demand, Total, (empty)
subplot_titles = [
    'Battery Surplus', 'Solar Surplus', 'Utility Surplus',
    'Demand Surplus', 'Total Surplus', ''
]

fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=subplot_titles,
    vertical_spacing=0.12,
    horizontal_spacing=0.08
)

# Filter data to exclude unknown treatments
plot_data = combined_df[combined_df['treatment'] != 'Unknown']

# Plot positions for 2x3 grid
positions = [(1,1), (1,2), (1,3), (2,1), (2,2)]

for i, var in enumerate(surplus_vars):
    row, col = positions[i]
    
    # Create boxplot for each treatment
    for treatment in ['Double Auction', 'AMM + VFI Battery', 'AMM + Informed Trader']:
        treatment_data = plot_data[plot_data['treatment'] == treatment][var]
        
        fig.add_trace(
            go.Box(
                y=treatment_data,
                name=treatment,
                marker_color=colors[treatment],
                legendgroup=treatment,
                showlegend=(i == 0),  # Only show legend for first subplot
                boxpoints='outliers'
            ),
            row=row, col=col
        )

# Update layout
fig.update_layout(
    title_text="Surplus Analysis by Treatment",
    title_x=0.5,
    height=700,
    width=1200,
    boxmode='group',
    
)

# Update y-axis labels for each subplot
y_labels = ['Battery Surplus', 'Solar Surplus', 'Utility Surplus', 
           'Demand Surplus', 'Total Surplus']

for i, label in enumerate(y_labels):
    row, col = positions[i]
    fig.update_yaxes(title_text=label, row=row, col=col)

# Update x-axis labels (remove for cleaner look since we have legend)
for i in range(len(surplus_vars)):
    row, col = positions[i]
    fig.update_xaxes(showticklabels=False, row=row, col=col)

fig.show()

In [24]:

# Summary statistics by treatment
print("Summary Statistics by Treatment:\n")

for var in surplus_vars:
    print(f"\n{var.replace('_', ' ').title()}:")
    summary = combined_df[combined_df['treatment'] != 'Unknown'].groupby('treatment')[var].describe()
    print(summary.round(2))

Summary Statistics by Treatment:


Total Surplus Battery:
                       count        mean          std           min     25%  \
treatment                                                                     
AMM + Informed Trader  480.0      262.33       137.95  6.150000e+00  183.44   
AMM + VFI Battery      480.0 -7083592.68  32820369.64 -2.999998e+08    0.00   
Double Auction         480.0      212.76       214.55  0.000000e+00    0.00   

                          50%     75%     max  
treatment                                      
AMM + Informed Trader  244.03  358.49  632.99  
AMM + VFI Battery        0.00   78.75  307.77  
Double Auction         175.00  350.00  700.00  

Total Surplus Solar:
                       count        mean          std     min     25%     50%  \
treatment                                                                       
AMM + Informed Trader  480.0      603.49       305.56   34.61  353.04  614.64   
AMM + VFI Battery      480.0  5000966.91 

## Period Visualizations

In [56]:
import plotly.graph_objects as go
import ast

def find_row_index(df, C_max, q_b_max, mean_pmax, std_pmax, C_init, battery_type=None, AMM=None):
    """
    Find row index(es) in the dataframe based on parameter values.
    
    Parameters:
    - df: DataFrame to search in
    - C_max, q_b_max, mean_pmax, std_pmax, C_init: Parameter values to match
    - battery_type: Optional filter for battery type ('informed', 'optimal')
    - AMM: Optional filter for AMM (True/False)
    
    Returns:
    - List of matching row indices
    """
    mask = (
        (df['C_max'] == C_max) &
        (df['q_b_max'] == q_b_max) &
        (df['mean_pmax'] == mean_pmax) &
        (df['std_pmax'] == std_pmax) &
        (df['C_init'] == C_init)
    )
    
    if battery_type is not None:
        mask = mask & (df['battery_type'] == battery_type)
    
    if AMM is not None:
        mask = mask & (df['AMM'] == AMM)
    
    matching_indices = df[mask].index.tolist()
    
    if len(matching_indices) == 0:
        print(f"No rows found with parameters: C_max={C_max}, q_b_max={q_b_max}, mean_pmax={mean_pmax}, std_pmax={std_pmax}, C_init={C_init}")
        if battery_type: print(f"  battery_type={battery_type}")
        if AMM is not None: print(f"  AMM={AMM}")
    elif len(matching_indices) == 1:
        print(f"Found 1 row at index {matching_indices[0]}")
    else:
        print(f"Found {len(matching_indices)} rows at indices: {matching_indices}")
        print("Consider adding battery_type or AMM filters to narrow down results")
    
    return matching_indices

def plot_summary_row_combined(df, row_index, title_suffix=""):
    """
    Plot summary for a specific row in combined_df.
    
    Parameters:
    - df: The combined dataframe 
    - row_index: Index of the row to plot
    - title_suffix: Additional text for plot titles
    """
    if row_index not in df.index:
        print(f"Row index {row_index} not found in dataframe")
        return None
    
    row = df.loc[row_index]
    
    # Helper to parse the stringified lists
    def parse(col):
        if isinstance(col, str):
            return ast.literal_eval(col)
        return col  # already a list
    
    battery_type = row["battery_type"]
    treatment = row.get("treatment", "Unknown")
    
    # Get battery-specific data based on the combined_df structure
    if battery_type == "informed":
        q_b_list = parse(row['q_b'])  # After renaming, informed trader data is in 'q_b'
        socs = parse(row['socs'])     # After renaming, informed trader data is in 'socs'
    elif battery_type == "optimal":
        q_b_list = parse(row['q_b'])  # After renaming, VFI data is in 'q_b'
        socs = parse(row['socs'])     # After renaming, VFI data is in 'socs'
    else:
        q_b_list, socs = [], []

    # Parse other time series data
    prices = parse(row['prices'])
    q_s_list = parse(row['q_s'])
    q_u_list = parse(row['q_u'])
    q_d_list = parse(row['q_d'])
    s_t = parse(row['s_t'])
    
    time_index = list(range(len(prices)))
    
    # Extract parameters for title
    params_text = f"C_max={row['C_max']}, C_init={row['C_init']}, q_b_max={row['q_b_max']}, mean_pmax={row['mean_pmax']}, std_pmax={row['std_pmax']}"
    full_title_suffix = f"{title_suffix} - {treatment} - {params_text}"

    # 1. Battery Dispatch (q_b)
    fig_battery = go.Figure()
    fig_battery.add_trace(go.Scatter(
        x=time_index, y=q_b_list, 
        mode='lines+markers', 
        name=f'Battery Dispatch ({battery_type})', 
        line=dict(color='red')
    ))
    fig_battery.add_hline(y=0, line_dash="dash", line_color="gray")
    fig_battery.update_layout(
        title=f"Battery Dispatch - {full_title_suffix}", 
        xaxis_title="Time (hours)", 
        yaxis_title="Battery Dispatch (q_b)", 
        xaxis=dict(range=[0, len(time_index)]),
        height=400
    )

    # 2. Battery State of Charge (SOC)
    fig_soc = go.Figure()
    fig_soc.add_trace(go.Scatter(
        x=time_index, y=socs, 
        mode='lines+markers', 
        name='SOC', 
        line=dict(color='green')
    ))
    fig_soc.add_hline(
        y=socs[0], line_dash="dot", line_color="orange", 
        annotation_text=f"Initial SOC: {socs[0]:.2f}", 
        annotation_position="top right"
    )
    fig_soc.add_hline(
        y=row['C_max'], line_dash="dash", line_color="red", 
        annotation_text=f"Max Capacity: {row['C_max']}", 
        annotation_position="bottom right"
    )
    fig_soc.update_layout(
        title=f"Battery State-of-Charge - {full_title_suffix}", 
        xaxis_title="Time (hours)", 
        yaxis_title="SOC",
        yaxis=dict(range=[0, row['C_max']*1.1]),
        height=400
    )

    # 3. Market Clearing Price
    fig_price = go.Figure()
    fig_price.add_trace(go.Scatter(
        x=time_index, y=prices, 
        mode='lines+markers', 
        name='Market Price', 
        line=dict(color='red')
    ))
    fig_price.update_layout(
        title=f"Market Clearing Price {full_title_suffix}", 
        xaxis_title="Time (hours)", 
        yaxis_title="Price",
        height=400
    )

    # 4. Dispatch Quantities
    fig_dispatch = go.Figure()
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_s_list, 
        mode='lines', 
        name='Solar Dispatch', 
        line=dict(color='green')
    ))
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_b_list, 
        mode='lines', 
        name=f'Battery Dispatch ({battery_type})', 
        line=dict(color='red')
    ))
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_u_list, 
        mode='lines', 
        name='Utility Dispatch', 
        line=dict(color='purple')
    ))
    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=q_d_list, 
        mode='lines', 
        name='Quantity Demanded', 
        line=dict(color='blue')
    ))

    fig_dispatch.add_trace(go.Scatter(
        x=time_index, y=s_t, 
        mode='lines', 
        name='Solar Generation', 
        line=dict(color='orange', dash='dash') 
    ))
    
    fig_dispatch.update_layout(
        title=f"Dispatch and Market Quantities {full_title_suffix}",
        xaxis_title="Time (hours)", 
        yaxis_title="Quantity",
        height=500
    )

    # Show all plots
    fig_battery.show()
    fig_soc.show() 
    fig_price.show()
    fig_dispatch.show()
    
    # Return figures for further manipulation if needed
    return {
        "battery_dispatch": fig_battery,
        "state_of_charge": fig_soc,
        "market_price": fig_price,
        "dispatch_quantities": fig_dispatch,
        "row_info": {
            "index": row_index,
            "treatment": treatment,
            "battery_type": battery_type,
            "parameters": params_text
        }
    }


In [74]:

# Example usage:
indices = find_row_index(combined_df, C_max=10, q_b_max=1, mean_pmax=15, std_pmax=0, C_init=5, battery_type='optimal', AMM=True)
if indices:
    plot_summary_row_combined(combined_df, indices[0])

Found 1 row at index 1124


In [73]:
# Example usage:
indices = find_row_index(combined_df, C_max=10, q_b_max=1, mean_pmax=15, std_pmax=0, C_init=5, battery_type='informed', AMM=True)
if indices:
    plot_summary_row_combined(combined_df, indices[0])

Found 1 row at index 645


### Removing Outliers

In [25]:
# Identify outliers for total_surplus_battery in 'AMM + VFI Battery' treatment
import numpy as np

# Filter data for AMM + VFI Battery treatment
amm_vfi_data = combined_df[combined_df['treatment'] == 'AMM + VFI Battery'].copy()

# Calculate outliers using IQR method
Q1 = amm_vfi_data['total_surplus_battery'].quantile(0.25)
Q3 = amm_vfi_data['total_surplus_battery'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

print(f"AMM + VFI Battery - total_surplus_battery statistics:")
print(f"Q1: {Q1:.2f}")
print(f"Q3: {Q3:.2f}")
print(f"IQR: {IQR:.2f}")
print(f"Lower bound (Q1 - 1.5*IQR): {lower_bound:.2f}")
print(f"Upper bound (Q3 + 1.5*IQR): {upper_bound:.2f}")

# Identify outlier rows
outliers = amm_vfi_data[
    (amm_vfi_data['total_surplus_battery'] < lower_bound) | 
    (amm_vfi_data['total_surplus_battery'] > upper_bound)
]

print(f"\nNumber of outliers: {len(outliers)}")
print(f"Total observations in AMM + VFI Battery: {len(amm_vfi_data)}")
print(f"Percentage of outliers: {len(outliers)/len(amm_vfi_data)*100:.1f}%")

if len(outliers) > 0:
    print(f"\nOutlier values:")
    print(outliers['total_surplus_battery'].sort_values().values)

AMM + VFI Battery - total_surplus_battery statistics:
Q1: 0.00
Q3: 78.75
IQR: 78.75
Lower bound (Q1 - 1.5*IQR): -118.12
Upper bound (Q3 + 1.5*IQR): 196.87

Number of outliers: 76
Total observations in AMM + VFI Battery: 480
Percentage of outliers: 15.8%

Outlier values:
[-2.99999793e+08 -2.00066019e+08 -2.00065953e+08 -1.99999928e+08
 -1.99999891e+08 -1.99999847e+08 -1.99999749e+08 -1.99999732e+08
 -1.00001537e+08 -1.00000548e+08 -1.00000344e+08 -1.00000113e+08
 -1.00000027e+08 -1.00000001e+08 -9.99999665e+07 -9.99999652e+07
 -9.99999610e+07 -9.99999602e+07 -9.99999565e+07 -9.99999535e+07
 -9.99999498e+07 -9.99999484e+07 -9.99999278e+07 -9.99999004e+07
 -9.99998713e+07 -2.64264302e+03 -2.46740430e+03 -1.72461984e+03
 -1.34740412e+03 -7.62127627e+02 -5.13065964e+02 -5.11269124e+02
 -5.02478207e+02 -4.33554539e+02 -4.07193937e+02 -3.28374066e+02
 -2.94738634e+02 -2.58286909e+02 -2.10256913e+02 -1.91647092e+02
 -1.71397291e+02 -1.63866283e+02 -1.51586006e+02 -1.45852335e+02
 -1.36668196e+

In [26]:
# Show parameter combinations for outlier rows
if len(outliers) > 0:
    print("\nParameter combinations for outlier rows:")
    print("="*60)
    
    # Key parameters to examine
    key_params = ['v_max', 'q_max', 'c_u', 'q_u_max', 'beta', 'C_max', 'C_init', 'q_b_max', 
                  'total_surplus_battery', 'total_surplus_all', 'avg_price_overall']
    
    # Show outliers with key parameters
    outlier_params = outliers[key_params].copy()
    outlier_params = outlier_params.sort_values('total_surplus_battery')
    
    print(outlier_params.to_string(index=False))
    
    # Show the row indices of outliers in the original dataframe
    print(f"\nRow indices of outliers in combined_df:")
    print(outliers.index.tolist())


Parameter combinations for outlier rows:
 v_max  q_max  c_u  q_u_max  beta  C_max  C_init  q_b_max  total_surplus_battery  total_surplus_all  avg_price_overall
    10     10    5       10     1     20      10        5          -2.999998e+08        7077.867307      205230.585814
    10     10    5       10     1     20      16        5          -2.000660e+08        7072.818410      139129.655701
    10     10    5       10     1     15       3        5          -2.000660e+08        6982.113062      141331.451300
    10     10    5       10     1     20      10        5          -1.999999e+08        7023.618346      138723.570890
    10     10    5       10     1     10       2        5          -1.999999e+08        6913.175319      143231.677935
    10     10    5       10     1     20       4        5          -1.999998e+08        7042.719820      136394.622237
    10     10    5       10     1     20       4        5          -1.999997e+08        7007.044242      138758.247015
    10

In [27]:
# Show parameter combinations for outlier rows
if len(outliers) > 0:
    print("\nParameter combinations for outlier rows:")
    print("="*60)
    
    # Key parameters to examine
    key_params = ['v_max', 'q_max', 'c_u', 'q_u_max', 'beta', 'C_max', 'C_init', 'q_b_max', 
                  'total_surplus_battery', 'total_surplus_all', 'avg_price_overall']
    
    # Show outliers with key parameters
    outlier_params = outliers[key_params].copy()
    outlier_params = outlier_params.sort_values('total_surplus_battery')
    
    print(outlier_params.to_string(index=False))
    
    # Show the row indices of outliers in the original dataframe
    print(f"\nRow indices of outliers in combined_df:")
    print(outliers.index.tolist())


Parameter combinations for outlier rows:
 v_max  q_max  c_u  q_u_max  beta  C_max  C_init  q_b_max  total_surplus_battery  total_surplus_all  avg_price_overall
    10     10    5       10     1     20      10        5          -2.999998e+08        7077.867307      205230.585814
    10     10    5       10     1     20      16        5          -2.000660e+08        7072.818410      139129.655701
    10     10    5       10     1     15       3        5          -2.000660e+08        6982.113062      141331.451300
    10     10    5       10     1     20      10        5          -1.999999e+08        7023.618346      138723.570890
    10     10    5       10     1     10       2        5          -1.999999e+08        6913.175319      143231.677935
    10     10    5       10     1     20       4        5          -1.999998e+08        7042.719820      136394.622237
    10     10    5       10     1     20       4        5          -1.999997e+08        7007.044242      138758.247015
    10

In [28]:
# Visualize the outliers
fig = px.box(
    amm_vfi_data, 
    y='total_surplus_battery',
    title='Total Surplus Battery - AMM + VFI Battery Treatment (with outliers highlighted)',
    points='all'  # Show all points
)

# Add horizontal lines for outlier bounds
fig.add_hline(y=lower_bound, line_dash="dash", line_color="red", 
              annotation_text=f"Lower bound: {lower_bound:.2f}")
fig.add_hline(y=upper_bound, line_dash="dash", line_color="red", 
              annotation_text=f"Upper bound: {upper_bound:.2f}")

fig.update_layout(width=600, height=500)
fig.show()

# Create cleaned dataset without outliers
print(f"\nCreating cleaned dataset...")
print(f"Original combined_df shape: {combined_df.shape}")

# Mark outlier rows in the combined dataframe
combined_df['is_outlier'] = False
combined_df.loc[outliers.index, 'is_outlier'] = True

# Create cleaned dataset
combined_df_clean = combined_df[~combined_df['is_outlier']].copy()
print(f"Cleaned combined_df shape: {combined_df_clean.shape}")
print(f"Removed {len(outliers)} outlier rows")

# Show distribution after cleaning
print(f"\nTreatment distribution after cleaning:")
print(combined_df_clean['treatment'].value_counts())


Creating cleaned dataset...
Original combined_df shape: (1440, 51)
Cleaned combined_df shape: (1364, 52)
Removed 76 outlier rows

Treatment distribution after cleaning:
treatment
Double Auction           480
AMM + Informed Trader    480
AMM + VFI Battery        404
Name: count, dtype: int64


In [29]:
# Compare boxplots before and after outlier removal
from plotly.subplots import make_subplots

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=['With Outliers', 'Outliers Removed'],
    horizontal_spacing=0.1
)

# Before cleaning
for treatment in ['Double Auction', 'AMM + VFI Battery', 'AMM + Informed Trader']:
    data = combined_df[combined_df['treatment'] == treatment]['total_surplus_battery']
    fig.add_trace(
        go.Box(y=data, name=treatment, marker_color=colors[treatment], 
               showlegend=True, legendgroup=treatment),
        row=1, col=1
    )

# After cleaning  
for treatment in ['Double Auction', 'AMM + VFI Battery', 'AMM + Informed Trader']:
    data = combined_df_clean[combined_df_clean['treatment'] == treatment]['total_surplus_battery']
    fig.add_trace(
        go.Box(y=data, name=treatment, marker_color=colors[treatment],
               showlegend=False, legendgroup=treatment),
        row=1, col=2
    )

fig.update_layout(
    title_text="Total Surplus Battery: Before vs After Outlier Removal",
    height=500,
    width=1000
)

fig.update_yaxes(title_text="Total Surplus Battery", row=1, col=1)
fig.update_yaxes(title_text="Total Surplus Battery", row=1, col=2)

fig.show()

In [30]:
# First, identify the outlier parameter combinations for the specific columns
outlier_combinations = outliers[['C_max', 'C_init', 'q_b_max', 'mean_pmax', 'std_pmax']].drop_duplicates()

print("Outlier parameter combinations to remove:")
print(outlier_combinations)
print(f"\nNumber of unique outlier combinations: {len(outlier_combinations)}")

# Create a function to check if a row matches any outlier combination
def matches_outlier_combination(row):
    for _, outlier_combo in outlier_combinations.iterrows():
        if (row['C_max'] == outlier_combo['C_max'] and
            row['C_init'] == outlier_combo['C_init'] and
            row['q_b_max'] == outlier_combo['q_b_max'] and
            row['mean_pmax'] == outlier_combo['mean_pmax'] and
            row['std_pmax'] == outlier_combo['std_pmax']):
            return True
    return False

# Apply the function to mark rows that match outlier combinations
combined_df['matches_outlier_combo'] = combined_df.apply(matches_outlier_combination, axis=1)

# Create new dataframe without any rows that have these parameter combinations
combined_df_filtered = combined_df[~combined_df['matches_outlier_combo']].copy()

print(f"\nOriginal dataframe shape: {combined_df.shape}")
print(f"Filtered dataframe shape: {combined_df_filtered.shape}")
print(f"Removed {combined_df.shape[0] - combined_df_filtered.shape[0]} rows")

# Check treatment distribution after filtering
print(f"\nTreatment distribution after filtering:")
print(combined_df_filtered['treatment'].value_counts())

# Check how many rows were removed from each treatment
print(f"\nRows removed by treatment:")
removed_by_treatment = combined_df[combined_df['matches_outlier_combo']]['treatment'].value_counts()
print(removed_by_treatment)

Outlier parameter combinations to remove:
      C_max  C_init  q_b_max  mean_pmax  std_pmax
988       5       1        4         15       7.5
995       5       1        5         10       5.0
1036      5       2        5         15       7.5
1076      5       4        5         15       7.5
1093     10       2        2         15       0.0
...     ...     ...      ...        ...       ...
1429     20      16        4         15       0.0
1430     20      16        4         20       0.0
1437     20      16        5         15       7.5
1438     20      16        5         20       0.0
1439     20      16        5         20      10.0

[76 rows x 5 columns]

Number of unique outlier combinations: 76

Original dataframe shape: (1440, 53)
Filtered dataframe shape: (1212, 53)
Removed 228 rows

Treatment distribution after filtering:
treatment
Double Auction           404
AMM + Informed Trader    404
AMM + VFI Battery        404
Name: count, dtype: int64

Rows removed by treatment:
treatmen

In [31]:
# Create a new dataframe with only DA and AMM + VFI Battery treatments
# (excluding AMM + Informed Trader)
combined_df_da_vfi = combined_df_filtered[
    combined_df_filtered['treatment'].isin(['Double Auction', 'AMM + VFI Battery'])
].copy()

print(f"Original filtered dataframe shape: {combined_df_filtered.shape}")
print(f"DA + VFI only dataframe shape: {combined_df_da_vfi.shape}")
print(f"Removed {combined_df_filtered.shape[0] - combined_df_da_vfi.shape[0]} informed trader rows")

# Check the treatment distribution in the new dataframe
print(f"\nTreatment distribution in DA + VFI dataframe:")
print(combined_df_da_vfi['treatment'].value_counts())

# Verify battery_type distribution
print(f"\nBattery type distribution in DA + VFI dataframe:")
print(combined_df_da_vfi['battery_type'].value_counts())

# Show AMM distribution
print(f"\nAMM distribution in DA + VFI dataframe:")
print(combined_df_da_vfi['AMM'].value_counts())

Original filtered dataframe shape: (1212, 53)
DA + VFI only dataframe shape: (808, 53)
Removed 404 informed trader rows

Treatment distribution in DA + VFI dataframe:
treatment
Double Auction       404
AMM + VFI Battery    404
Name: count, dtype: int64

Battery type distribution in DA + VFI dataframe:
battery_type
optimal    808
Name: count, dtype: int64

AMM distribution in DA + VFI dataframe:
AMM
False    404
True     404
Name: count, dtype: int64


## Comparing Double Auction with AMM + VFI optimized battery 

In [32]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd

# List of dependent variables (outcomes)
outcome_vars = ['total_surplus_all', 'total_surplus_demand', 'total_surplus_battery', 
                'total_surplus_solar', 'total_surplus_utility']

# Independent variables (predictors)
predictors = 'AMM + C_max + C_init + q_b_max + mean_pmax + std_pmax'

# Dictionary to store regression results
regression_results = {}

print("Running Linear Regressions:")
print("="*60)

# Run regressions for each outcome variable
for outcome in outcome_vars:
    formula = f"{outcome} ~ {predictors}"
    
    # Fit the regression model
    model = smf.ols(formula, data=combined_df_da_vfi).fit()
    regression_results[outcome] = model
    
    print(f"\n{outcome.replace('_', ' ').title()}")
    print("-" * 40)
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adj. R-squared: {model.rsquared_adj:.4f}")
    print(f"F-statistic: {model.fvalue:.4f}")
    print(f"Prob (F-statistic): {model.f_pvalue:.4e}")
    print(f"Number of observations: {int(model.nobs)}")
    
    # Show coefficients with significance
    print("\nCoefficients:")
    for param, coef in model.params.items():
        pval = model.pvalues[param]
        stars = ""
        if pval < 0.001:
            stars = "***"
        elif pval < 0.01:
            stars = "**"
        elif pval < 0.05:
            stars = "*"
        elif pval < 0.1:
            stars = "."
        
        print(f"  {param:12s}: {coef:8.4f}{stars:3s} (p={pval:.4f})")

print(f"\nSignificance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")

Running Linear Regressions:

Total Surplus All
----------------------------------------
R-squared: 0.8382
Adj. R-squared: 0.8370
F-statistic: 691.5923
Prob (F-statistic): 8.9229e-313
Number of observations: 808

Coefficients:
  Intercept   : 2666.0082*** (p=0.0000)
  AMM[T.True] : 505.1680*** (p=0.0000)
  C_max       :   6.4627    (p=0.1274)
  C_init      :  -0.2591    (p=0.9621)
  q_b_max     :   8.1525    (p=0.5207)
  mean_pmax   : 168.1487*** (p=0.0000)
  std_pmax    :  83.7215*** (p=0.0000)

Total Surplus Demand
----------------------------------------
R-squared: 0.8263
Adj. R-squared: 0.8250
F-statistic: 635.1504
Prob (F-statistic): 1.8324e-300
Number of observations: 808

Coefficients:
  Intercept   : 1442.4620*** (p=0.0000)
  AMM[T.True] : 276.1376*** (p=0.0000)
  C_max       :  -0.1232    (p=0.9823)
  C_init      :  -0.1964    (p=0.9781)
  q_b_max     :   1.6143    (p=0.9227)
  mean_pmax   : 210.4532*** (p=0.0000)
  std_pmax    : 117.2187*** (p=0.0000)

Total Surplus Battery
--

In [None]:
# Create a comprehensive results table
results_summary = []

for outcome, model in regression_results.items():
    for param in model.params.index:
        if param != 'Intercept':  # Skip intercept for cleaner table
            coef = model.params[param]
            se = model.bse[param]
            tval = model.tvalues[param]
            pval = model.pvalues[param]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "***"
            elif pval < 0.01:
                stars = "**"
            elif pval < 0.05:
                stars = "*"
            elif pval < 0.1:
                stars = "."
            
            results_summary.append({
                'Outcome': outcome,
                'Variable': param,
                'Coefficient': coef,
                'Std_Error': se,
                'T_Value': tval,
                'P_Value': pval,
                'Significance': stars,
                'Coef_with_Stars': f"{coef:.4f}{stars}"
            })

# Convert to DataFrame for easier viewing
results_df = pd.DataFrame(results_summary)

# Create a pivot table for better visualization
pivot_results = results_df.pivot(index='Variable', columns='Outcome', values='Coef_with_Stars')

print("\nRegression Results Summary (Coefficients with Significance Stars):")
print("="*80)
print(pivot_results.to_string())

# Model fit statistics
print(f"\n\nModel Fit Statistics:")
print("="*50)
fit_stats = []
for outcome, model in regression_results.items():
    fit_stats.append({
        'Outcome': outcome,
        'R_squared': f"{model.rsquared:.4f}",
        'Adj_R_squared': f"{model.rsquared_adj:.4f}",
        'F_statistic': f"{model.fvalue:.4f}",
        'F_p_value': f"{model.f_pvalue:.4e}",
        'N_obs': int(model.nobs)
    })

fit_df = pd.DataFrame(fit_stats)
print(fit_df.to_string(index=False))



Regression Results Summary (Coefficients with Significance Stars):
Outcome     total_surplus_all total_surplus_battery total_surplus_demand total_surplus_solar total_surplus_utility
Variable                                                                                                          
AMM[T.True]       505.1680***          -129.9076***          276.1376***         126.6954***           232.2425***
C_init                -0.2591               -0.5594              -0.1964              0.5063               -0.0096
C_max                  6.4627             6.3892***              -0.1232              0.5327               -0.3360
mean_pmax         168.1487***            13.7470***          210.4532***         -47.1742***            -8.8773***
q_b_max                8.1525                0.0336               1.6143              4.9237                1.5809
std_pmax           83.7215***              3.4718**          117.2187***         -31.4852***            -5.4839***


Model Fit 

In [34]:
# Create LaTeX table manually
def create_latex_regression_table(regression_results):
    """Create a LaTeX table from regression results with significance stars."""
    
    # Get variable names (excluding intercept)
    variables = [var for var in regression_results[list(regression_results.keys())[0]].params.index 
                if var != 'Intercept']
    
    # Start LaTeX table
    latex_code = "\\begin{table}[htbp]\n"
    latex_code += "\\centering\n"
    latex_code += "\\caption{Linear Regression Results}\n"
    latex_code += "\\label{tab:regression_results}\n"
    
    # Table structure
    n_cols = len(regression_results) + 1
    latex_code += f"\\begin{{tabular}}{{l{'c' * (n_cols-1)}}}\n"
    latex_code += "\\hline\\hline\n"
    
    # Header row
    header = " & " + " & ".join([outcome.replace('_', '\\_') for outcome in regression_results.keys()]) + " \\\\\n"
    latex_code += header
    latex_code += "\\hline\n"
    
    # Coefficient rows
    for var in variables:
        row = var.replace('_', '\\_')
        
        for outcome, model in regression_results.items():
            coef = model.params[var]
            pval = model.pvalues[var]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "^{***}"
            elif pval < 0.01:
                stars = "^{**}"
            elif pval < 0.05:
                stars = "^{*}"
            
            row += f" & {coef:.4f}{stars}"
        
        row += " \\\\\n"
        latex_code += row
        
        # Add standard errors in parentheses
        se_row = ""
        for outcome, model in regression_results.items():
            se = model.bse[var]
            se_row += f" & ({se:.4f})"
        se_row += " \\\\\n"
        latex_code += se_row
    
    # Add model statistics
    latex_code += "\\hline\n"
    
    # R-squared row
    r2_row = "R$^2$"
    for outcome, model in regression_results.items():
        r2_row += f" & {model.rsquared:.4f}"
    r2_row += " \\\\\n"
    latex_code += r2_row
    
    # Adjusted R-squared row
    adj_r2_row = "Adj. R$^2$"
    for outcome, model in regression_results.items():
        adj_r2_row += f" & {model.rsquared_adj:.4f}"
    adj_r2_row += " \\\\\n"
    latex_code += adj_r2_row
    
    # Number of observations
    n_row = "Observations"
    for outcome, model in regression_results.items():
        n_row += f" & {int(model.nobs)}"
    n_row += " \\\\\n"
    latex_code += n_row
    
    # End table
    latex_code += "\\hline\\hline\n"
    latex_code += "\\end{tabular}\n"
    latex_code += "\\begin{tablenotes}\n"
    latex_code += "\\small\n"
    latex_code += "\\item Note: Standard errors in parentheses. "
    latex_code += "Significance levels: *** p$<$0.001, ** p$<$0.01, * p$<$0.05\n"
    latex_code += "\\end{tablenotes}\n"
    latex_code += "\\end{table}\n"
    
    return latex_code

# Generate LaTeX table
latex_table = create_latex_regression_table(regression_results)

print("LaTeX Regression Table:")
print("="*50)
print(latex_table)

# Save to file
with open('regression_results.tex', 'w') as f:
    f.write(latex_table)
    
print("\nLaTeX table saved to 'regression_results.tex'")

LaTeX Regression Table:
\begin{table}[htbp]
\centering
\caption{Linear Regression Results}
\label{tab:regression_results}
\begin{tabular}{lccccc}
\hline\hline
 & total\_surplus\_all & total\_surplus\_demand & total\_surplus\_battery & total\_surplus\_solar & total\_surplus\_utility \\
\hline
AMM[T.True] & 505.1680^{***} & 276.1376^{***} & -129.9076^{***} & 126.6954^{***} & 232.2425^{***} \\
 & (35.4162) & (46.4444) & (7.5714) & (15.0123) & (4.4827) \\
C\_max & 6.4627 & -0.1232 & 6.3892^{***} & 0.5327 & -0.3360 \\
 & (4.2356) & (5.5545) & (0.9055) & (1.7954) & (0.5361) \\
C\_init & -0.2591 & -0.1964 & -0.5594 & 0.5063 & -0.0096 \\
 & (5.4516) & (7.1492) & (1.1655) & (2.3109) & (0.6900) \\
q\_b\_max & 8.1525 & 1.6143 & 0.0336 & 4.9237 & 1.5809 \\
 & (12.6885) & (16.6396) & (2.7126) & (5.3784) & (1.6060) \\
mean\_pmax & 168.1487^{***} & 210.4532^{***} & 13.7470^{***} & -47.1742^{***} & -8.8773^{***} \\
 & (3.6459) & (4.7812) & (0.7794) & (1.5454) & (0.4615) \\
std\_pmax & 83.7215^{***} & 

In [35]:
# Display individual regression summaries for detailed inspection
print("Detailed Regression Summaries:")
print("="*60)

for outcome, model in regression_results.items():
    print(f"\n{outcome.replace('_', ' ').title()} Regression:")
    print("-" * 50)
    print(model.summary())
    print("\n" + "="*60)

Detailed Regression Summaries:

Total Surplus All Regression:
--------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:      total_surplus_all   R-squared:                       0.838
Model:                            OLS   Adj. R-squared:                  0.837
Method:                 Least Squares   F-statistic:                     691.6
Date:                Thu, 07 Aug 2025   Prob (F-statistic):          8.92e-313
Time:                        19:19:01   Log-Likelihood:                -6169.8
No. Observations:                 808   AIC:                         1.235e+04
Df Residuals:                     801   BIC:                         1.239e+04
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-----------------

## Comparing the ATE for Informed Trader battery over VFI battery 

In [36]:
# Create a new dataframe with only AMM treatments (VFI Battery vs Informed Trader)
# (excluding Double Auction)
combined_df_amm_only = combined_df_filtered[
    combined_df_filtered['treatment'].isin(['AMM + VFI Battery', 'AMM + Informed Trader'])
].copy()

print(f"Original filtered dataframe shape: {combined_df_filtered.shape}")
print(f"AMM only dataframe shape: {combined_df_amm_only.shape}")
print(f"Removed {combined_df_filtered.shape[0] - combined_df_amm_only.shape[0]} Double Auction rows")

# Create new 'informed' binary variable
# True for 'AMM + Informed Trader', False for 'AMM + VFI Battery'
combined_df_amm_only['informed'] = combined_df_amm_only['treatment'] == 'AMM + Informed Trader'

# Check the treatment distribution in the new dataframe
print(f"\nTreatment distribution in AMM-only dataframe:")
print(combined_df_amm_only['treatment'].value_counts())

# Verify informed variable distribution
print(f"\nInformed variable distribution:")
print(combined_df_amm_only['informed'].value_counts())

# Cross-tabulation to verify mapping
print(f"\nCross-tabulation of treatment vs informed:")
crosstab = pd.crosstab(combined_df_amm_only['treatment'], combined_df_amm_only['informed'], margins=True)
print(crosstab)

# Verify battery_type distribution
print(f"\nBattery type distribution in AMM-only dataframe:")
print(combined_df_amm_only['battery_type'].value_counts())

# Show AMM distribution (should all be True)
print(f"\nAMM distribution in AMM-only dataframe:")
print(combined_df_amm_only['AMM'].value_counts())

Original filtered dataframe shape: (1212, 53)
AMM only dataframe shape: (808, 53)
Removed 404 Double Auction rows

Treatment distribution in AMM-only dataframe:
treatment
AMM + Informed Trader    404
AMM + VFI Battery        404
Name: count, dtype: int64

Informed variable distribution:
informed
True     404
False    404
Name: count, dtype: int64

Cross-tabulation of treatment vs informed:
informed               False  True  All
treatment                              
AMM + Informed Trader      0   404  404
AMM + VFI Battery        404     0  404
All                      404   404  808

Battery type distribution in AMM-only dataframe:
battery_type
informed    404
optimal     404
Name: count, dtype: int64

AMM distribution in AMM-only dataframe:
AMM
True    808
Name: count, dtype: int64


In [37]:
# Create boxplots comparing AMM treatments
import plotly.express as px

# Define colors for AMM treatments
amm_colors = {
    'AMM + VFI Battery': '#ff7f0e', 
    'AMM + Informed Trader': '#2ca02c'
}

# List of surplus variables to plot
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

print("AMM Treatment Comparison - Boxplots:")
print("="*50)

# Create individual boxplots for each variable
for var in surplus_vars:
    fig = px.box(
        combined_df_amm_only, 
        x='treatment', 
        y=var,
        title=f'{var.replace("_", " ").title()} - AMM: VFI vs Informed Trader',
        color='treatment',
        color_discrete_map=amm_colors,
        points='outliers'
    )
    
    fig.update_layout(
        width=700,
        height=500,
        xaxis_title='AMM Treatment',
        yaxis_title=var.replace('_', ' ').title(),
        title_x=0.5
    )
    
    fig.show()

# Summary statistics by AMM treatment
print(f"\nSummary Statistics by AMM Treatment:")
print("="*60)

for var in surplus_vars:
    print(f"\n{var.replace('_', ' ').title()}:")
    summary = combined_df_amm_only.groupby('treatment')[var].describe()
    print(summary.round(2))

AMM Treatment Comparison - Boxplots:



Summary Statistics by AMM Treatment:

Total Surplus Battery:
                       count    mean     std     min    25%     50%     75%  \
treatment                                                                     
AMM + Informed Trader  404.0  244.34  125.72    6.15  174.7  226.38  333.36   
AMM + VFI Battery      404.0   34.09   61.63 -112.79    0.0    0.00   60.85   

                         max  
treatment                     
AMM + Informed Trader  609.5  
AMM + VFI Battery      195.9  

Total Surplus Solar:
                       count    mean     std    min     25%     50%     75%  \
treatment                                                                     
AMM + Informed Trader  404.0  640.95  309.79  34.61  392.22  701.23  919.05   
AMM + VFI Battery      404.0  683.96  292.79  42.12  524.30  773.23  923.19   

                           max  
treatment                       
AMM + Informed Trader  1028.23  
AMM + VFI Battery      1060.61  

Total Surplus Utility:
  

In [38]:
# AMM Regression Analysis: Informed Trader vs VFI Battery
import statsmodels.api as sm
import statsmodels.formula.api as smf

# List of dependent variables (outcomes)
outcome_vars = ['total_surplus_all', 'total_surplus_demand', 'total_surplus_battery', 
                'total_surplus_solar', 'total_surplus_utility']

# Independent variables (predictors) - using 'informed' instead of 'AMM'
amm_predictors = 'informed + C_max + C_init + q_b_max + mean_pmax + std_pmax'

# Dictionary to store AMM regression results
amm_regression_results = {}

print("AMM Regression Analysis: Informed Trader vs VFI Battery")
print("="*70)
print("Note: 'informed' = True for Informed Trader, False for VFI Battery")
print("="*70)

# Run regressions for each outcome variable
for outcome in outcome_vars:
    formula = f"{outcome} ~ {amm_predictors}"
    
    # Fit the regression model
    model = smf.ols(formula, data=combined_df_amm_only).fit()
    amm_regression_results[outcome] = model
    
    print(f"\n{outcome.replace('_', ' ').title()}")
    print("-" * 40)
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adj. R-squared: {model.rsquared_adj:.4f}")
    print(f"F-statistic: {model.fvalue:.4f}")
    print(f"Prob (F-statistic): {model.f_pvalue:.4e}")
    print(f"Number of observations: {int(model.nobs)}")
    
    # Show coefficients with significance
    print("\nCoefficients:")
    for param, coef in model.params.items():
        pval = model.pvalues[param]
        stars = ""
        if pval < 0.001:
            stars = "***"
        elif pval < 0.01:
            stars = "**"
        elif pval < 0.05:
            stars = "*"
        elif pval < 0.1:
            stars = "."
        
        # Special interpretation for 'informed' coefficient
        interpretation = ""
        if param == 'informed[T.True]':
            if coef > 0:
                interpretation = " (Informed Trader > VFI Battery)"
            else:
                interpretation = " (VFI Battery > Informed Trader)"
        
        print(f"  {param:15s}: {coef:8.4f}{stars:3s} (p={pval:.4f}){interpretation}")

print(f"\nSignificance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")

AMM Regression Analysis: Informed Trader vs VFI Battery
Note: 'informed' = True for Informed Trader, False for VFI Battery

Total Surplus All
----------------------------------------
R-squared: 0.9946
Adj. R-squared: 0.9946
F-statistic: 24693.0677
Prob (F-statistic): 0.0000e+00
Number of observations: 808

Coefficients:
  Intercept      : 2434.7531*** (p=0.0000)
  informed[T.True]:  14.6342.   (p=0.0692) (Informed Trader > VFI Battery)
  C_max          :   0.8930    (p=0.3535)
  C_init         :   2.3426.   (p=0.0588)
  q_b_max        :   0.2258    (p=0.9376)
  mean_pmax      : 222.4988*** (p=0.0000)
  std_pmax       : 139.5908*** (p=0.0000)

Total Surplus Demand
----------------------------------------
R-squared: 0.9977
Adj. R-squared: 0.9976
F-statistic: 56741.4572
Prob (F-statistic): 0.0000e+00
Number of observations: 808

Coefficients:
  Intercept      : 794.9295*** (p=0.0000)
  informed[T.True]: -136.9288*** (p=0.0000) (VFI Battery > Informed Trader)
  C_max          :  -3.3098***

In [39]:
# Create comprehensive results table for AMM analysis
amm_results_summary = []

for outcome, model in amm_regression_results.items():
    for param in model.params.index:
        if param != 'Intercept':  # Skip intercept for cleaner table
            coef = model.params[param]
            se = model.bse[param]
            tval = model.tvalues[param]
            pval = model.pvalues[param]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "***"
            elif pval < 0.01:
                stars = "**"
            elif pval < 0.05:
                stars = "*"
            elif pval < 0.1:
                stars = "."
            
            amm_results_summary.append({
                'Outcome': outcome,
                'Variable': param,
                'Coefficient': coef,
                'Std_Error': se,
                'T_Value': tval,
                'P_Value': pval,
                'Significance': stars,
                'Coef_with_Stars': f"{coef:.4f}{stars}"
            })

# Convert to DataFrame for easier viewing
amm_results_df = pd.DataFrame(amm_results_summary)

# Create a pivot table for better visualization
amm_pivot_results = amm_results_df.pivot(index='Variable', columns='Outcome', values='Coef_with_Stars')

print("\nAMM Regression Results Summary (Coefficients with Significance Stars):")
print("="*80)
print(amm_pivot_results.to_string())

# Model fit statistics
print(f"\n\nAMM Model Fit Statistics:")
print("="*50)
amm_fit_stats = []
for outcome, model in amm_regression_results.items():
    amm_fit_stats.append({
        'Outcome': outcome,
        'R_squared': f"{model.rsquared:.4f}",
        'Adj_R_squared': f"{model.rsquared_adj:.4f}",
        'F_statistic': f"{model.fvalue:.4f}",
        'F_p_value': f"{model.f_pvalue:.4e}",
        'N_obs': int(model.nobs)
    })

amm_fit_df = pd.DataFrame(amm_fit_stats)
print(amm_fit_df.to_string(index=False))


AMM Regression Results Summary (Coefficients with Significance Stars):
Outcome          total_surplus_all total_surplus_battery total_surplus_demand total_surplus_solar total_surplus_utility
Variable                                                                                                               
C_init                     2.3426.               2.0604*               0.4950              0.2403              -0.4532*
C_max                       0.8930             4.0751***           -3.3098***              0.6930            -0.5653***
informed[T.True]          14.6342.           210.2525***         -136.9288***         -43.0079***           -15.6817***
mean_pmax              222.4988***                0.5556          275.9766***         -36.7468***           -17.2866***
q_b_max                     0.2258             9.4693***          -17.5621***           7.2600***               1.0587*
std_pmax               139.5908***            -9.3897***          195.9624***         -3

## Latex Table creation

In [47]:
# Create LaTeX table for AMM analysis
def create_amm_latex_regression_table(regression_results):
    """Create a LaTeX table for AMM regression results with significance stars."""
    
    # Get variable names (excluding intercept)
    variables = [var for var in regression_results[list(regression_results.keys())[0]].params.index 
                if var != 'Intercept']
    
    # Start LaTeX table
    latex_code = "\\begin{table}[htbp]\n"
    latex_code += "\\centering\n"
    latex_code += "\\caption{AMM Analysis: Informed Trader vs VFI Battery}\n"
    latex_code += "\\label{tab:amm_regression_results}\n"
    
    # Table structure
    n_cols = len(regression_results) + 1
    latex_code += f"\\begin{{tabular}}{{l{'c' * (n_cols-1)}}}\n"
    latex_code += "\\hline\\hline\n"
    
    # Header row
    header = " & " + " & ".join([outcome.replace('_', '\\_') for outcome in regression_results.keys()]) + " \\\\\n"
    latex_code += header
    latex_code += "\\hline\n"
    
    # Coefficient rows
    for var in variables:
        # Clean up variable names for LaTeX
        var_display = var.replace('_', '\\_').replace('[T.True]', '')
        if 'informed' in var:
            var_display = 'Informed Trader'
        
        row = var_display
        
        for outcome, model in regression_results.items():
            coef = model.params[var]
            pval = model.pvalues[var]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "^{***}"
            elif pval < 0.01:
                stars = "^{**}"
            elif pval < 0.05:
                stars = "^{*}"
            
            row += f" & {coef:.4f}{stars}"
        
        row += " \\\\\n"
        latex_code += row
        
        # Add standard errors in parentheses
        se_row = ""
        for outcome, model in regression_results.items():
            se = model.bse[var]
            se_row += f" & ({se:.4f})"
        se_row += " \\\\\n"
        latex_code += se_row
    
    # Add model statistics
    latex_code += "\\hline\n"
    
    # R-squared row
    r2_row = "R$^2$"
    for outcome, model in regression_results.items():
        r2_row += f" & {model.rsquared:.4f}"
    r2_row += " \\\\\n"
    latex_code += r2_row
    
    # Adjusted R-squared row
    adj_r2_row = "Adj. R$^2$"
    for outcome, model in regression_results.items():
        adj_r2_row += f" & {model.rsquared_adj:.4f}"
    adj_r2_row += " \\\\\n"
    latex_code += adj_r2_row
    
    # Number of observations
    n_row = "Observations"
    for outcome, model in regression_results.items():
        n_row += f" & {int(model.nobs)}"
    n_row += " \\\\\n"
    latex_code += n_row
    
    # End table
    latex_code += "\\hline\\hline\n"
    latex_code += "\\end{tabular}\n"
    latex_code += "\\begin{tablenotes}\n"
    latex_code += "\\small\n"
    latex_code += "\\item Note: Standard errors in parentheses. "
    latex_code += "Reference category: VFI Battery. "
    latex_code += "Significance levels: *** p$<$0.001, ** p$<$0.01, * p$<$0.05\n"
    latex_code += "\\end{tablenotes}\n"
    latex_code += "\\end{table}\n"
    
    return latex_code

# Generate LaTeX table for AMM analysis
amm_latex_table = create_amm_latex_regression_table(amm_regression_results)

print("LaTeX AMM Regression Table:")
print("="*50)
print(amm_latex_table)

# Save to file
with open('amm_regression_results.tex', 'w') as f:
    f.write(amm_latex_table)
    
print("\nAMM LaTeX table saved to 'amm_regression_results.tex'")

# Key findings summary
print(f"\n" + "="*60)
print("KEY FINDINGS SUMMARY:")
print("="*60)
print("The 'informed' coefficient shows the effect of using Informed Trader")
print("vs VFI Battery (reference category) in AMM systems:")
print("- Positive coefficient: Informed Trader performs better")
print("- Negative coefficient: VFI Battery performs better")
print("="*60)

LaTeX AMM Regression Table:
\begin{table}[htbp]
\centering
\caption{AMM Analysis: Informed Trader vs VFI Battery}
\label{tab:amm_regression_results}
\begin{tabular}{lccccc}
\hline\hline
 & total\_surplus\_all & total\_surplus\_demand & total\_surplus\_battery & total\_surplus\_solar & total\_surplus\_utility \\
\hline
Informed Trader & 14.6342 & -136.9288^{***} & 210.2525^{***} & -43.0079^{***} & -15.6817^{***} \\
 & (8.0429) & (6.8067) & (6.2036) & (4.6118) & (1.2538) \\
C\_max & 0.8930 & -3.3098^{***} & 4.0751^{***} & 0.6930 & -0.5653^{***} \\
 & (0.9619) & (0.8140) & (0.7419) & (0.5515) & (0.1499) \\
C\_init & 2.3426 & 0.4950 & 2.0604^{*} & 0.2403 & -0.4532^{*} \\
 & (1.2381) & (1.0478) & (0.9549) & (0.7099) & (0.1930) \\
q\_b\_max & 0.2258 & -17.5621^{***} & 9.4693^{***} & 7.2600^{***} & 1.0587^{*} \\
 & (2.8815) & (2.4386) & (2.2226) & (1.6523) & (0.4492) \\
mean\_pmax & 222.4988^{***} & 275.9766^{***} & 0.5556 & -36.7468^{***} & -17.2866^{***} \\
 & (0.8280) & (0.7007) & (0.6386)

In [41]:
# Display individual regression summaries for detailed inspection
print("Detailed AMM Regression Summaries:")
print("="*60)

for outcome, model in amm_regression_results.items():
    print(f"\n{outcome.replace('_', ' ').title()} Regression:")
    print("-" * 50)
    print(model.summary())
    print("\n" + "="*60)

Detailed AMM Regression Summaries:

Total Surplus All Regression:
--------------------------------------------------
                            OLS Regression Results                            
Dep. Variable:      total_surplus_all   R-squared:                       0.995
Model:                            OLS   Adj. R-squared:                  0.995
Method:                 Least Squares   F-statistic:                 2.469e+04
Date:                Thu, 07 Aug 2025   Prob (F-statistic):               0.00
Time:                        19:31:21   Log-Likelihood:                -4972.0
No. Observations:                 808   AIC:                             9958.
Df Residuals:                     801   BIC:                             9991.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
--------

## DA vs AMM Informed Trader Battery 

In [42]:
# Create a new dataframe with only DA and AMM + Informed Trader treatments
# (excluding AMM + VFI Battery)
combined_df_da_informed = combined_df_filtered[
    combined_df_filtered['treatment'].isin(['Double Auction', 'AMM + Informed Trader'])
].copy()

print(f"Original filtered dataframe shape: {combined_df_filtered.shape}")
print(f"DA + Informed Trader only dataframe shape: {combined_df_da_informed.shape}")
print(f"Removed {combined_df_filtered.shape[0] - combined_df_da_informed.shape[0]} VFI Battery rows")

# Check the treatment distribution in the new dataframe
print(f"\nTreatment distribution in DA + Informed Trader dataframe:")
print(combined_df_da_informed['treatment'].value_counts())

# Verify battery_type distribution
print(f"\nBattery type distribution in DA + Informed Trader dataframe:")
print(combined_df_da_informed['battery_type'].value_counts())

# Show AMM distribution
print(f"\nAMM distribution in DA + Informed Trader dataframe:")
print(combined_df_da_informed['AMM'].value_counts())

# Cross-tabulation to verify treatment mapping
print(f"\nCross-tabulation of treatment vs AMM:")
crosstab = pd.crosstab(combined_df_da_informed['treatment'], combined_df_da_informed['AMM'], margins=True)
print(crosstab)

Original filtered dataframe shape: (1212, 53)
DA + Informed Trader only dataframe shape: (808, 53)
Removed 404 VFI Battery rows

Treatment distribution in DA + Informed Trader dataframe:
treatment
Double Auction           404
AMM + Informed Trader    404
Name: count, dtype: int64

Battery type distribution in DA + Informed Trader dataframe:
battery_type
optimal     404
informed    404
Name: count, dtype: int64

AMM distribution in DA + Informed Trader dataframe:
AMM
False    404
True     404
Name: count, dtype: int64

Cross-tabulation of treatment vs AMM:
AMM                    False  True  All
treatment                              
AMM + Informed Trader      0   404  404
Double Auction           404     0  404
All                      404   404  808


## Boxplots

In [43]:
# Create boxplots comparing DA vs AMM + Informed Trader treatments
import plotly.express as px

# Define colors for DA vs Informed Trader comparison
da_informed_colors = {
    'Double Auction': '#1f77b4',
    'AMM + Informed Trader': '#2ca02c'
}

# List of surplus variables to plot
surplus_vars = ['total_surplus_battery', 'total_surplus_solar', 'total_surplus_utility', 
                'total_surplus_demand', 'total_surplus_all']

print("DA vs AMM + Informed Trader Comparison - Boxplots:")
print("="*60)

# Create individual boxplots for each variable
for var in surplus_vars:
    fig = px.box(
        combined_df_da_informed, 
        x='treatment', 
        y=var,
        title=f'{var.replace("_", " ").title()} - DA vs AMM + Informed Trader',
        color='treatment',
        color_discrete_map=da_informed_colors,
        points='outliers'
    )
    
    fig.update_layout(
        width=700,
        height=500,
        xaxis_title='Treatment',
        yaxis_title=var.replace('_', ' ').title(),
        title_x=0.5
    )
    
    fig.show()

# Summary statistics by treatment
print(f"\nSummary Statistics by Treatment:")
print("="*60)

for var in surplus_vars:
    print(f"\n{var.replace('_', ' ').title()}:")
    summary = combined_df_da_informed.groupby('treatment')[var].describe()
    print(summary.round(2))

DA vs AMM + Informed Trader Comparison - Boxplots:



Summary Statistics by Treatment:

Total Surplus Battery:
                       count    mean     std   min    25%     50%     75%  \
treatment                                                                   
AMM + Informed Trader  404.0  244.34  125.72  6.15  174.7  226.38  333.36   
Double Auction         404.0  164.00  185.88  0.00    0.0  152.50  305.00   

                         max  
treatment                     
AMM + Informed Trader  609.5  
Double Auction         700.0  

Total Surplus Solar:
                       count    mean     std     min     25%     50%     75%  \
treatment                                                                      
AMM + Informed Trader  404.0  640.95  309.79   34.61  392.22  701.23  919.05   
Double Auction         404.0  557.27  478.46  118.43  233.65  375.54  874.04   

                           max  
treatment                       
AMM + Informed Trader  1028.23  
Double Auction         1553.17  

Total Surplus Utility:
          

## Regression Analysis

In [44]:
# DA vs AMM + Informed Trader Regression Analysis
import statsmodels.api as sm
import statsmodels.formula.api as smf

# List of dependent variables (outcomes)
outcome_vars = ['total_surplus_all', 'total_surplus_demand', 'total_surplus_battery', 
                'total_surplus_solar', 'total_surplus_utility']

# Independent variables (predictors) - using AMM to compare mechanisms
da_informed_predictors = 'AMM + C_max + C_init + q_b_max + mean_pmax + std_pmax'

# Dictionary to store DA vs Informed Trader regression results
da_informed_regression_results = {}

print("DA vs AMM + Informed Trader Regression Analysis")
print("="*70)
print("Note: 'AMM' = True for AMM + Informed Trader, False for Double Auction")
print("="*70)

# Run regressions for each outcome variable
for outcome in outcome_vars:
    formula = f"{outcome} ~ {da_informed_predictors}"
    
    # Fit the regression model
    model = smf.ols(formula, data=combined_df_da_informed).fit()
    da_informed_regression_results[outcome] = model
    
    print(f"\n{outcome.replace('_', ' ').title()}")
    print("-" * 40)
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adj. R-squared: {model.rsquared_adj:.4f}")
    print(f"F-statistic: {model.fvalue:.4f}")
    print(f"Prob (F-statistic): {model.f_pvalue:.4e}")
    print(f"Number of observations: {int(model.nobs)}")
    
    # Show coefficients with significance
    print("\nCoefficients:")
    for param, coef in model.params.items():
        pval = model.pvalues[param]
        stars = ""
        if pval < 0.001:
            stars = "***"
        elif pval < 0.01:
            stars = "**"
        elif pval < 0.05:
            stars = "*"
        elif pval < 0.1:
            stars = "."
        
        # Special interpretation for 'AMM' coefficient
        interpretation = ""
        if param == 'AMM[T.True]':
            if coef > 0:
                interpretation = " (AMM + Informed Trader > Double Auction)"
            else:
                interpretation = " (Double Auction > AMM + Informed Trader)"
        
        print(f"  {param:15s}: {coef:8.4f}{stars:3s} (p={pval:.4f}){interpretation}")

print(f"\nSignificance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")

DA vs AMM + Informed Trader Regression Analysis
Note: 'AMM' = True for AMM + Informed Trader, False for Double Auction

Total Surplus All
----------------------------------------
R-squared: 0.8390
Adj. R-squared: 0.8378
F-statistic: 695.4908
Prob (F-statistic): 1.3534e-313
Number of observations: 808

Coefficients:
  Intercept      : 2669.5367*** (p=0.0000)
  AMM[T.True]    : 519.8021*** (p=0.0000) (AMM + Informed Trader > Double Auction)
  C_max          :   5.6931    (p=0.1785)
  C_init         :   1.7202    (p=0.7520)
  q_b_max        :   7.3720    (p=0.5607)
  mean_pmax      : 167.5127*** (p=0.0000)
  std_pmax       :  84.8602*** (p=0.0000)

Total Surplus Demand
----------------------------------------
R-squared: 0.8179
Adj. R-squared: 0.8165
F-statistic: 599.5409
Prob (F-statistic): 3.1966e-292
Number of observations: 808

Coefficients:
  Intercept      : 1426.1469*** (p=0.0000)
  AMM[T.True]    : 139.2089**  (p=0.0048) (AMM + Informed Trader > Double Auction)
  C_max          :  

## Results Summary Table

In [45]:
# Create comprehensive results table for DA vs Informed Trader analysis
da_informed_results_summary = []

for outcome, model in da_informed_regression_results.items():
    for param in model.params.index:
        if param != 'Intercept':  # Skip intercept for cleaner table
            coef = model.params[param]
            se = model.bse[param]
            tval = model.tvalues[param]
            pval = model.pvalues[param]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "***"
            elif pval < 0.01:
                stars = "**"
            elif pval < 0.05:
                stars = "*"
            elif pval < 0.1:
                stars = "."
            
            da_informed_results_summary.append({
                'Outcome': outcome,
                'Variable': param,
                'Coefficient': coef,
                'Std_Error': se,
                'T_Value': tval,
                'P_Value': pval,
                'Significance': stars,
                'Coef_with_Stars': f"{coef:.4f}{stars}"
            })

# Convert to DataFrame for easier viewing
da_informed_results_df = pd.DataFrame(da_informed_results_summary)

# Create a pivot table for better visualization
da_informed_pivot_results = da_informed_results_df.pivot(index='Variable', columns='Outcome', values='Coef_with_Stars')

print("\nDA vs AMM + Informed Trader Regression Results Summary:")
print("="*80)
print(da_informed_pivot_results.to_string())

# Model fit statistics
print(f"\n\nDA vs Informed Trader Model Fit Statistics:")
print("="*50)
da_informed_fit_stats = []
for outcome, model in da_informed_regression_results.items():
    da_informed_fit_stats.append({
        'Outcome': outcome,
        'R_squared': f"{model.rsquared:.4f}",
        'Adj_R_squared': f"{model.rsquared_adj:.4f}",
        'F_statistic': f"{model.fvalue:.4f}",
        'F_p_value': f"{model.f_pvalue:.4e}",
        'N_obs': int(model.nobs)
    })

da_informed_fit_df = pd.DataFrame(da_informed_fit_stats)
print(da_informed_fit_df.to_string(index=False))


DA vs AMM + Informed Trader Regression Results Summary:
Outcome     total_surplus_all total_surplus_battery total_surplus_demand total_surplus_solar total_surplus_utility
Variable                                                                                                          
AMM[T.True]       519.8021***            80.3450***           139.2089**          83.6875***           216.5608***
C_init                 1.7202                1.8430               0.2819              0.0389               -0.4436
C_max                  5.6931             8.3205***              -2.0606             -0.3376               -0.2293
mean_pmax         167.5127***            10.3396***          216.6787***         -51.0963***            -8.4093***
q_b_max                7.3720            23.8893***             -16.1229              0.1279               -0.5222
std_pmax           84.8602***              -2.9583.          121.8398***         -28.9464***            -5.0748***


DA vs Informed Trader

## Latex Table Generation

In [46]:
# Create LaTeX table for DA vs AMM + Informed Trader analysis
def create_da_informed_latex_regression_table(regression_results):
    """Create a LaTeX table for DA vs Informed Trader regression results with significance stars."""
    
    # Get variable names (excluding intercept)
    variables = [var for var in regression_results[list(regression_results.keys())[0]].params.index 
                if var != 'Intercept']
    
    # Start LaTeX table
    latex_code = "\\begin{table}[htbp]\n"
    latex_code += "\\centering\n"
    latex_code += "\\caption{DA vs AMM + Informed Trader Analysis}\n"
    latex_code += "\\label{tab:da_informed_regression_results}\n"
    
    # Table structure
    n_cols = len(regression_results) + 1
    latex_code += f"\\begin{{tabular}}{{l{'c' * (n_cols-1)}}}\n"
    latex_code += "\\hline\\hline\n"
    
    # Header row
    header = " & " + " & ".join([outcome.replace('_', '\\_') for outcome in regression_results.keys()]) + " \\\\\n"
    latex_code += header
    latex_code += "\\hline\n"
    
    # Coefficient rows
    for var in variables:
        # Clean up variable names for LaTeX
        var_display = var.replace('_', '\\_').replace('[T.True]', '')
        if 'AMM' in var:
            var_display = 'AMM + Informed Trader'
        
        row = var_display
        
        for outcome, model in regression_results.items():
            coef = model.params[var]
            pval = model.pvalues[var]
            
            # Add significance stars
            stars = ""
            if pval < 0.001:
                stars = "^{***}"
            elif pval < 0.01:
                stars = "^{**}"
            elif pval < 0.05:
                stars = "^{*}"
            
            row += f" & {coef:.4f}{stars}"
        
        row += " \\\\\n"
        latex_code += row
        
        # Add standard errors in parentheses
        se_row = ""
        for outcome, model in regression_results.items():
            se = model.bse[var]
            se_row += f" & ({se:.4f})"
        se_row += " \\\\\n"
        latex_code += se_row
    
    # Add model statistics
    latex_code += "\\hline\n"
    
    # R-squared row
    r2_row = "R$^2$"
    for outcome, model in regression_results.items():
        r2_row += f" & {model.rsquared:.4f}"
    r2_row += " \\\\\n"
    latex_code += r2_row
    
    # Adjusted R-squared row
    adj_r2_row = "Adj. R$^2$"
    for outcome, model in regression_results.items():
        adj_r2_row += f" & {model.rsquared_adj:.4f}"
    adj_r2_row += " \\\\\n"
    latex_code += adj_r2_row
    
    # Number of observations
    n_row = "Observations"
    for outcome, model in regression_results.items():
        n_row += f" & {int(model.nobs)}"
    n_row += " \\\\\n"
    latex_code += n_row
    
    # End table
    latex_code += "\\hline\\hline\n"
    latex_code += "\\end{tabular}\n"
    latex_code += "\\begin{tablenotes}\n"
    latex_code += "\\small\n"
    latex_code += "\\item Note: Standard errors in parentheses. "
    latex_code += "Reference category: Double Auction. "
    latex_code += "Significance levels: *** p$<$0.001, ** p$<$0.01, * p$<$0.05\n"
    latex_code += "\\end{tablenotes}\n"
    latex_code += "\\end{table}\n"
    
    return latex_code

# Generate LaTeX table for DA vs Informed Trader analysis
da_informed_latex_table = create_da_informed_latex_regression_table(da_informed_regression_results)

print("LaTeX DA vs AMM + Informed Trader Regression Table:")
print("="*60)
print(da_informed_latex_table)

# Save to file
with open('da_informed_regression_results.tex', 'w') as f:
    f.write(da_informed_latex_table)
    
print("\nDA vs Informed Trader LaTeX table saved to 'da_informed_regression_results.tex'")

# Key findings summary
print(f"\n" + "="*60)
print("KEY FINDINGS SUMMARY:")
print("="*60)
print("The 'AMM' coefficient shows the effect of using AMM + Informed Trader")
print("vs Double Auction (reference category):")
print("- Positive coefficient: AMM + Informed Trader performs better")
print("- Negative coefficient: Double Auction performs better")
print("="*60)

LaTeX DA vs AMM + Informed Trader Regression Table:
\begin{table}[htbp]
\centering
\caption{DA vs AMM + Informed Trader Analysis}
\label{tab:da_informed_regression_results}
\begin{tabular}{lccccc}
\hline\hline
 & total\_surplus\_all & total\_surplus\_demand & total\_surplus\_battery & total\_surplus\_solar & total\_surplus\_utility \\
\hline
AMM + Informed Trader & 519.8021^{***} & 139.2089^{**} & 80.3450^{***} & 83.6875^{***} & 216.5608^{***} \\
 & (35.3491) & (49.2519) & (9.8681) & (14.3468) & (4.1332) \\
C\_max & 5.6931 & -2.0606 & 8.3205^{***} & -0.3376 & -0.2293 \\
 & (4.2275) & (5.8902) & (1.1802) & (1.7158) & (0.4943) \\
C\_init & 1.7202 & 0.2819 & 1.8430 & 0.0389 & -0.4436 \\
 & (5.4413) & (7.5814) & (1.5190) & (2.2084) & (0.6362) \\
q\_b\_max & 7.3720 & -16.1229 & 23.8893^{***} & 0.1279 & -0.5222 \\
 & (12.6644) & (17.6454) & (3.5354) & (5.1400) & (1.4808) \\
mean\_pmax & 167.5127^{***} & 216.6787^{***} & 10.3396^{***} & -51.0963^{***} & -8.4093^{***} \\
 & (3.6390) & (5.0702)