In [175]:
import os
import sys

sys.path.append(os.path.abspath(".."))

import polars as pl
from pathlib import Path

from src.utils.utils import has_converged_to_price
from src.utils.utils_latex import inject_latex_table_note
from src.plotting.final_figures import plot_monopoly_experiment_svg

OUPUT_PATH = Path("../latex/imgs/res/")
OUPUT_PATH.mkdir(parents=True, exist_ok=True)
OUPUT_TABLES_PATH = Path("../latex/tables/")
OUPUT_TABLES_PATH.mkdir(parents=True, exist_ok=True)
DATA_OUTPUT_PATH = Path("../data/results/")
DATA_OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

MONOPOLY_OUPUT_PATH = Path(OUPUT_PATH) / "monopoly"
MONOPOLY_OUPUT_PATH.mkdir(parents=True, exist_ok=True)

In [178]:
df = pl.read_parquet(DATA_OUTPUT_PATH / "all_experiments.parquet")
df = df.filter(pl.col("num_agents") == 2).sort(
    ["experiment_timestamp", "agent"]
)
df = df.to_pandas()

In [181]:
df_filtered = df[df['round'] > 100].copy()

df_p1 = df_filtered[df_filtered['experiment_name'].str.contains("P1")]

df_p2 = df_filtered[df_filtered['experiment_name'].str.contains("P2")]

df_p1['run_id'] = df_p1['experiment_timestamp'] + "_" + df_p1['alpha'].astype(str)

df_pivot = df_p1.pivot_table(
    index=['run_id', 'round'],
    columns='agent',
    values='chosen_price'
).reset_index()

df_pivot = df_pivot.sort_values(by=['run_id', 'round'])

# Shift prices within each run
df_pivot['round_prev'] = df_pivot['round'] - 1

# Merge lagged prices
df_lagged = df_pivot.merge(
    df_pivot[['run_id', 'round', 'Firm A', 'Firm B']],
    left_on=['run_id', 'round_prev'],
    right_on=['run_id', 'round'],
    suffixes=('', '_lag')
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_p1['run_id'] = df_p1['experiment_timestamp'] + "_" + df_p1['alpha'].astype(str)


In [182]:
# Only keep disjoint rounds
df_lagged = df_lagged[df_lagged['round'] % 2 == 1].copy()

# Alternate which agent is the dependent one
df_lagged['use_FirmA'] = (df_lagged['round'] // 2) % 2 == 0

# Select dependent and independent variables
df_lagged['price'] = df_lagged.apply(lambda x: x['Firm A'] if x['use_FirmA'] else x['Firm B'], axis=1)
df_lagged['price_lag_self'] = df_lagged.apply(lambda x: x['Firm A' + '_lag'] if x['use_FirmA'] else x['Firm B' + '_lag'], axis=1)
df_lagged['price_lag_comp'] = df_lagged.apply(lambda x: x['Firm B' + '_lag'] if x['use_FirmA'] else x['Firm A' + '_lag'], axis=1)

# Set run-specific fixed effects
df_lagged['firm_run'] = df_lagged['run_id'] + "_" + df_lagged['use_FirmA'].astype(str)


In [183]:
df_lagged

agent,run_id,round,Firm A,Firm B,round_prev,round_lag,Firm A_lag,Firm B_lag,use_FirmA,price,price_lag_self,price_lag_comp,firm_run
1,1750988131_3.2,103,5.65,5.660,102,102,5.66,5.670,False,5.660,5.670,5.66,1750988131_3.2_False
3,1750988131_3.2,105,5.65,5.630,104,104,5.64,5.640,True,5.650,5.640,5.64,1750988131_3.2_True
5,1750988131_3.2,107,5.65,5.620,106,106,5.64,5.640,False,5.620,5.640,5.64,1750988131_3.2_False
7,1750988131_3.2,109,5.65,5.600,108,108,5.64,5.610,True,5.650,5.640,5.61,1750988131_3.2_True
9,1750988131_3.2,111,5.58,5.580,110,110,5.66,5.590,False,5.580,5.590,5.66,1750988131_3.2_False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8348,1751222073_10.0,291,19.62,13.570,290,290,19.61,13.570,False,13.570,13.570,19.61,1751222073_10.0_False
8350,1751222073_10.0,293,19.61,13.570,292,292,19.61,13.570,True,19.610,19.610,13.57,1751222073_10.0_True
8352,1751222073_10.0,295,19.60,13.570,294,294,19.61,13.570,False,13.570,13.570,19.61,1751222073_10.0_False
8354,1751222073_10.0,297,19.60,13.565,296,296,19.60,13.570,True,19.600,19.600,13.57,1751222073_10.0_True


In [184]:
import statsmodels.formula.api as smf

model = smf.ols(
    formula='price ~ price_lag_self + price_lag_comp + C(firm_run)',
    data=df_lagged
).fit(cov_type='HC1')  # robust SEs
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.048e+08
Date:                Tue, 01 Jul 2025   Prob (F-statistic):               0.00
Time:                        17:32:18   Log-Likelihood:                 7847.7
No. Observations:                4158   AIC:                        -1.552e+04
Df Residuals:                    4072   BIC:                        -1.498e+04
Df Model:                          85                                         
Covariance Type:                  HC1                                         
                                           coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------


# Triopoly Analysis

----

In [225]:
df = pl.read_parquet(DATA_OUTPUT_PATH / "all_experiments.parquet")
df = df.filter(pl.col("num_agents") == 4).sort(
    ["experiment_timestamp", "agent"]
)
df = df.to_pandas()
df['price'] = df['chosen_price']/df['alpha']

0. Filter the needed columns: experiment_timestamp, experiment_name, agent, round, chosen_price, profit, market_data
1. Keep the last 100 periods for each experiment
2. Calculate the mean of price by each firm in each experiment by experiment_name
3. 

In [226]:
# Step 0: Make sure the correct columns are selected
needed_columns = [
    'experiment_timestamp', 'experiment_name', 'agent', 
    'round', 'price', 'profit'
]

# Filter to only needed columns
df = df[needed_columns]

# Filter rows after round 200
df = df[df['round'] > 200]

# Group by timestamp, experiment, and agent, then calculate mean price
grouped_df = df.groupby(['experiment_timestamp', 'experiment_name', 'agent'])['price'].mean().reset_index()

# Optional: Rename column for clarity
grouped_df = grouped_df.rename(columns={'price': 'mean_price'})


In [227]:
grouped_df

Unnamed: 0,experiment_timestamp,experiment_name,agent,mean_price


In [224]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Step 1: Pivot so each agent is a column
pivot_df = grouped_df.pivot_table(
    index=['experiment_timestamp', 'experiment_name'],
    columns='agent',
    values='mean_price'
).reset_index()

# Step 2: Separate price data and add experiment_name for hue
price_data = pivot_df.drop(columns=['experiment_timestamp'])  # keep experiment_name

# Step 3: Plot with hue
sns.pairplot(price_data, hue='experiment_name')
plt.suptitle("Pair Plot of Mean Prices by Agent (Colored by Prefix P1 vs P2)", y=1.02)
plt.show()


ValueError: No variables found for grid columns.

1. Dividir el dataset entre p1 y p2
2. distribucion de los precios hue = 