In [490]:
import os
import sys

sys.path.append(os.path.abspath(".."))

import polars as pl
from pathlib import Path

from linearmodels.panel import PanelOLS

OUPUT_PATH = Path("../latex/imgs/res/")
OUPUT_PATH.mkdir(parents=True, exist_ok=True)
OUPUT_TABLES_PATH = Path("../latex/tables/")
OUPUT_TABLES_PATH.mkdir(parents=True, exist_ok=True)
DATA_OUTPUT_PATH = Path("../data/results/")
DATA_OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

DUOPOLY_OUPUT_PATH = Path(OUPUT_PATH) / "duopoly"
DUOPOLY_OUPUT_PATH.mkdir(parents=True, exist_ok=True)

In [491]:
df = (
    pl.read_parquet(DATA_OUTPUT_PATH / "all_experiments.parquet")
    .filter((pl.col("num_agents") == 2) & (pl.col("is_symmetric")))
    .with_columns(
        (pl.col("experiment_timestamp").rank("dense")).alias("run_id"),
        (pl.col("agent").rank("dense")).alias("firm_id"),
        pl.col("chosen_price").truediv(pl.col("alpha")).round(4).alias("price"),
    )
    .rename(
        {
            "round": "period",
            "price": "price",
            "agent_prefix_type": "prompt_prefix",
        }
    )
    .with_columns(
        # concat run_id and firm_id to create a unique identifier
        pl.concat_str(["run_id", "firm_id"], separator="_").alias("run_firm_id")
    )
    .select(["run_firm_id", "run_id", "firm_id", "period", "price", "prompt_prefix"])
    .sort(["run_id", "firm_id", "period"])
)
df

run_firm_id,run_id,firm_id,period,price,prompt_prefix
str,u32,u32,i64,f64,str
"""1_1""",1,1,1,4.51,"""P1"""
"""1_1""",1,1,2,2.1875,"""P1"""
"""1_1""",1,1,3,2.0312,"""P1"""
"""1_1""",1,1,4,1.875,"""P1"""
"""1_1""",1,1,5,1.7188,"""P1"""
…,…,…,…,…,…
"""42_2""",42,2,296,1.5605,"""P2"""
"""42_2""",42,2,297,1.5605,"""P2"""
"""42_2""",42,2,298,1.5605,"""P2"""
"""42_2""",42,2,299,1.5605,"""P2"""


# Stationarity check
---

- https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.adfuller.html

The null hypothesis of the Augmented Dickey-Fuller is that there is a unit root, with the alternative that there is no unit root. If the pvalue is above a critical size, then we cannot reject that there is a unit root.

The p-values are obtained through regression surface approximation from MacKinnon 1994, but using the updated 2010 tables. If the p-value is close to significant, then the critical values should be used to judge whether to reject the null.

The autolag option and maxlag for it are described in Greene.

In [492]:
# run an ADF test on the price series for each firm
from statsmodels.tsa.stattools import adfuller

series = df.filter(pl.col("run_firm_id") == "1_1").select("price").to_series().to_list()
result = adfuller(series, maxlag=22)
# interpret the result
result = {
    "test_statistic": result[0],
    "p_value": result[1],
    "used_lag": result[2],
    "n_obs": result[3],
    "critical_values": result[4],
    "icbest": result[5],
}
print(f"Test statistic: {result['test_statistic']}")
print(f"P-value: {result['p_value']}")
print(f"Used lag: {result['used_lag']}")
print(f"Number of observations: {result['n_obs']}")
print("Critical values:")
for key, value in result["critical_values"].items():
    print(f"  {key}: {value}")

if result["p_value"] < 0.05:
    print("The series is stationary (reject the null hypothesis of a unit root).")
else:
    print(
        "The series is non-stationary (fail to reject the null hypothesis of a unit root)."
    )

Test statistic: -2.1509734348584275
P-value: 0.22451767002607304
Used lag: 5
Number of observations: 294
Critical values:
  1%: -3.452789844280995
  5%: -2.871421512222641
  10%: -2.5720351510944512
The series is non-stationary (fail to reject the null hypothesis of a unit root).


In [493]:
from statsmodels.tsa.stattools import adfuller
import pandas as pd

# Initialize counters
stationary_count = 0
non_stationary_count = 0

# Initialize a list to collect results
results = []

# Iterate through each firm ID
for firm_id in df["run_firm_id"].unique():
    # Filter series for the current firm_id
    series = (
        df.filter(pl.col("run_firm_id") == firm_id)
        .select("price")
        .to_series()
        .to_list()
    )

    # Run ADF test
    result = adfuller(series, maxlag=22)

    # Interpret the result
    test_statistic = result[0]
    p_value = result[1]

    # Determine stationarity
    if p_value < 0.05:
        stationary = True
        stationary_count += 1
    else:
        stationary = False
        non_stationary_count += 1

    # Append result to list
    results.append(
        {
            "run_firm_id": firm_id,
            "test_statistic": test_statistic,
            "p_value": p_value,
            "stationary": stationary,
        }
    )

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Print summary table
print("Summary of ADF Test Results by run_firm_id:")
print(results_df[["run_firm_id", "stationary"]].groupby("stationary").count())

# Optionally, print additional details
print("\nDetailed Results:")
print(results_df)

# Optionally, you can also print counts
print(f"\nNumber of stationary series: {stationary_count}")
print(f"Number of non-stationary series: {non_stationary_count}")

Summary of ADF Test Results by run_firm_id:
            run_firm_id
stationary             
False                47
True                 37

Detailed Results:
   run_firm_id  test_statistic       p_value  stationary
0         33_1       -7.460490  5.370102e-11        True
1         25_1       -3.441754  9.615206e-03        True
2         11_1       -7.333189  1.113929e-10        True
3         32_1       -1.744814  4.081831e-01       False
4         13_1        0.259087  9.753876e-01       False
..         ...             ...           ...         ...
79         8_2       -0.956470  7.688081e-01       False
80        13_2       -2.041308  2.687595e-01       False
81         1_1       -2.150973  2.245177e-01       False
82         5_1       -0.916635  7.824448e-01       False
83        38_1       -8.779544  2.397373e-14        True

[84 rows x 4 columns]

Number of stationary series: 37
Number of non-stationary series: 47


In [494]:
from statsmodels.tsa.stattools import adfuller
import pandas as pd

# Initialize counters
stationary_count = 0
non_stationary_count = 0

# Initialize a list to collect results
results = []

# Iterate through each firm ID
for firm_id in df["run_firm_id"].unique():
    # Filter series for the current firm_id
    series = (
        df.filter(pl.col("run_firm_id") == firm_id)
        .with_columns(
            #   pl.col("price").diff(1).fill_null(0).alias("price_diff")
            (pl.col("price").log())
            .diff(1)
            .fill_null(0)
            .alias("price_diff")  # NOTE! This uses log differences
        )
        .select("price_diff")
        .to_series()
        .to_list()
    )

    # Run ADF test
    result = adfuller(series, maxlag=22)

    # Interpret the result
    test_statistic = result[0]
    p_value = result[1]

    # Determine stationarity
    if p_value < 0.05:
        stationary = True
        stationary_count += 1
    else:
        stationary = False
        non_stationary_count += 1

    # Append result to list
    results.append(
        {
            "run_firm_id": firm_id,
            "test_statistic": test_statistic,
            "p_value": p_value,
            "stationary": stationary,
        }
    )

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Print summary table
print("Summary of ADF Test Results by run_firm_id:")
print(results_df[["run_firm_id", "stationary"]].groupby("stationary").count())

# Optionally, print additional details
print("\nDetailed Results:")
print(results_df)

# Optionally, you can also print counts
print(f"\nNumber of stationary series: {stationary_count}")
print(f"Number of non-stationary series: {non_stationary_count}")

Summary of ADF Test Results by run_firm_id:
            run_firm_id
stationary             
False                 5
True                 79

Detailed Results:
   run_firm_id  test_statistic       p_value  stationary
0         41_1       -6.826983  1.936618e-09        True
1          4_1       -7.246624  1.825140e-10        True
2         22_2       -4.916286  3.241614e-05        True
3         13_2       -5.493622  2.147682e-06        True
4         31_2       -2.140602  2.285047e-01       False
..         ...             ...           ...         ...
79        23_1       -8.345288  3.097298e-13        True
80        21_2       -5.526985  1.823339e-06        True
81         2_2       -8.042168  1.834620e-12        True
82        10_1       -2.975416  3.725491e-02        True
83        29_2       -6.950856  9.701338e-10        True

[84 rows x 4 columns]

Number of stationary series: 79
Number of non-stationary series: 5


We need to work with price differences.

In [495]:
df = df.with_columns(
    (pl.col("price").log())
    .diff(1)
    .over("run_firm_id")
    .fill_null(0)
    .alias("price_log_diff")  # NOTE! This uses log differences
)
df.head()

run_firm_id,run_id,firm_id,period,price,prompt_prefix,price_log_diff
str,u32,u32,i64,f64,str,f64
"""1_1""",1,1,1,4.51,"""P1""",0.0
"""1_1""",1,1,2,2.1875,"""P1""",-0.723538
"""1_1""",1,1,3,2.0312,"""P1""",-0.074133
"""1_1""",1,1,4,1.875,"""P1""",-0.080018
"""1_1""",1,1,5,1.7188,"""P1""",-0.086982


# Fixed effects regression (trigger strategy)
---

We are interested in the responsiveness of agents to each other since it is a feature of a reward-punishment strategy. We are interested in stickiness since it measures the persistence of such rewards and punishments.

To measure responsiveness and stickiness, we perform a linear regression with the following model:
$$p_{i,r}^t = \alpha_{i,r} + \gamma p_{i,r}^{t-1} + \delta p_{-i,r}^{t-1}+\epsilon_{i,r}^{t}$$

$$\Delta \log(p_{i,r}^t) =   \gamma \Delta \log(p_{i,r}^{t-1}) + \delta \Delta \log(p_{-i,r}^{t-1})+ \Delta \epsilon_{i,r}^{t}$$


where $p_{i,r}^t$ is the price set by the agent $i$ at period $t$ of run $r$ of the experiment, $p_{i,r}^t$ is the price set by competitors at period $t$ of run $r$ and nd $α_{i,r}$ is a firm-run fixed effect.

In [496]:
df

run_firm_id,run_id,firm_id,period,price,prompt_prefix,price_log_diff
str,u32,u32,i64,f64,str,f64
"""1_1""",1,1,1,4.51,"""P1""",0.0
"""1_1""",1,1,2,2.1875,"""P1""",-0.723538
"""1_1""",1,1,3,2.0312,"""P1""",-0.074133
"""1_1""",1,1,4,1.875,"""P1""",-0.080018
"""1_1""",1,1,5,1.7188,"""P1""",-0.086982
…,…,…,…,…,…,…
"""42_2""",42,2,296,1.5605,"""P2""",0.0
"""42_2""",42,2,297,1.5605,"""P2""",0.0
"""42_2""",42,2,298,1.5605,"""P2""",0.0
"""42_2""",42,2,299,1.5605,"""P2""",0.0


In [497]:
# Get competitor's price by reshaping the table (pivot)
df_fe = (
    df.pivot(
        values="price_log_diff",
        index=["run_id", "period", "prompt_prefix"],
        on="firm_id",
    )
    .rename(
        {
            "1": "1_log_diff",
            "2": "2_log_diff",
        }
    )
    .with_columns(
        [
            pl.col("1_log_diff").shift(1).alias("1_log_diff_lag"),
            pl.col("2_log_diff").shift(1).alias("2_log_diff_lag"),
        ]
    )
    # Filter first period
    .filter(pl.col("period") > 1)
    # Filter and keep only disjoint periods
    .filter(pl.col("period") % 2 == 0)
    # Alternate between the firms. Keep only
    .with_columns(
        [
            pl.when(pl.col("period") % 4 == 2)
            .then(pl.col("1_log_diff"))
            .otherwise(pl.col("2_log_diff"))
            .alias("price_log_diff"),
            pl.when(pl.col("period") % 4 == 2)
            .then(pl.col("1_log_diff_lag"))
            .otherwise(pl.col("2_log_diff_lag"))
            .alias("price_log_diff_lag_own"),
            pl.when(pl.col("period") % 4 == 2)
            .then(pl.col("2_log_diff_lag"))
            .otherwise(pl.col("1_log_diff_lag"))
            .alias("price_log_diff_lag_comp"),
        ]
    )
    .sort(["run_id", "period", "prompt_prefix"])
)
df_fe

run_id,period,prompt_prefix,1_log_diff,2_log_diff,1_log_diff_lag,2_log_diff_lag,price_log_diff,price_log_diff_lag_own,price_log_diff_lag_comp
u32,i64,str,f64,f64,f64,f64,f64,f64,f64
1,2,"""P1""",-0.723538,0.258771,0.0,0.0,-0.723538,0.0,0.0
1,4,"""P1""",-0.080018,0.064517,-0.074133,-0.287661,0.064517,-0.287661,-0.074133
1,6,"""P1""",0.044437,-0.074133,-0.086982,-0.133531,0.044437,-0.086982,-0.133531
1,8,"""P1""",-0.057589,-0.042546,0.083355,-0.080018,-0.042546,-0.080018,0.083355
1,10,"""P1""",0.016287,-0.016104,0.033283,0.083355,0.016287,0.033283,0.083355
…,…,…,…,…,…,…,…,…,…
42,292,"""P2""",0.00031,0.0,-0.00031,0.0,0.0,0.0,-0.00031
42,294,"""P2""",0.00031,0.0,0.0,0.0,0.00031,0.0,0.0
42,296,"""P2""",-0.00031,0.0,-0.00031,0.0,0.0,0.0,-0.00031
42,298,"""P2""",0.00031,0.0,0.0,0.0,0.00031,0.0,0.0


In [498]:
df_fe = df_fe.select(
    [
        "run_id",
        "period",
        "prompt_prefix",
        "price_log_diff",
        "price_log_diff_lag_own",
        "price_log_diff_lag_comp",
    ]
)
df_fe

run_id,period,prompt_prefix,price_log_diff,price_log_diff_lag_own,price_log_diff_lag_comp
u32,i64,str,f64,f64,f64
1,2,"""P1""",-0.723538,0.0,0.0
1,4,"""P1""",0.064517,-0.287661,-0.074133
1,6,"""P1""",0.044437,-0.086982,-0.133531
1,8,"""P1""",-0.042546,-0.080018,0.083355
1,10,"""P1""",0.016287,0.033283,0.083355
…,…,…,…,…,…
42,292,"""P2""",0.0,0.0,-0.00031
42,294,"""P2""",0.00031,0.0,0.0
42,296,"""P2""",0.0,0.0,-0.00031
42,298,"""P2""",0.00031,0.0,0.0


In [499]:
df_fe["prompt_prefix"].value_counts()

prompt_prefix,count
str,u32
"""P2""",3150
"""P1""",3150


## P1vsP1

In [500]:
df_fe_p1 = df_fe.filter(pl.col("prompt_prefix") == "P1").sort(["run_id", "period"])
df_fe_p1

run_id,period,prompt_prefix,price_log_diff,price_log_diff_lag_own,price_log_diff_lag_comp
u32,i64,str,f64,f64,f64
1,2,"""P1""",-0.723538,0.0,0.0
1,4,"""P1""",0.064517,-0.287661,-0.074133
1,6,"""P1""",0.044437,-0.086982,-0.133531
1,8,"""P1""",-0.042546,-0.080018,0.083355
1,10,"""P1""",0.016287,0.033283,0.083355
…,…,…,…,…,…
40,292,"""P1""",0.0,0.0,0.0
40,294,"""P1""",0.0,0.0,0.0
40,296,"""P1""",0.0,0.0,0.0
40,298,"""P1""",0.0,0.0,0.0


In [501]:
df_fe_p1 = df_fe_p1.to_pandas()
df_fe_p1.set_index(["run_id", "period"], inplace=True)
df_fe_p1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,prompt_prefix,price_log_diff,price_log_diff_lag_own,price_log_diff_lag_comp
run_id,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2,P1,-0.723538,0.0,0.0
1,4,P1,0.064517,-0.287661,-0.074133
1,6,P1,0.044437,-0.086982,-0.133531
1,8,P1,-0.042546,-0.080018,0.083355
1,10,P1,0.016287,0.033283,0.083355


In [502]:
# Run PanelOLS with entity effects (fixed effects)
model = PanelOLS.from_formula(
    "price_log_diff ~ price_log_diff_lag_own + price_log_diff_lag_comp + EntityEffects",
    data=df_fe_p1,
).fit(cov_type="robust")
print(model.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:         price_log_diff   R-squared:                        0.1225
Estimator:                   PanelOLS   R-squared (Between):              0.0937
No. Observations:                3150   R-squared (Within):               0.1225
Date:                Wed, Jul 02 2025   R-squared (Overall):              0.1223
Time:                        16:50:50   Log-likelihood                    5957.3
Cov. Estimator:                Robust                                           
                                        F-statistic:                      218.31
Entities:                          21   P-value                           0.0000
Avg Obs:                       150.00   Distribution:                  F(2,3127)
Min Obs:                       150.00                                           
Max Obs:                       150.00   F-statistic (robust):             7.7787
                            

## P2vsP2

In [456]:
df_fe_p2 = df_fe.filter(pl.col("prompt_prefix") == "P2").sort(["run_id", "period"])
df_fe_p2

run_id,period,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
u32,i64,str,f64,f64,f64
2,2,"""P2""",-1.897013,0.0,0.0
2,4,"""P2""",0.223144,0.287682,0.510655
2,6,"""P2""",0.117747,0.287682,0.182322
2,8,"""P2""",0.117783,0.133531,0.105396
2,10,"""P2""",-0.042546,0.086982,-0.057177
…,…,…,…,…,…
42,292,"""P2""",0.0,0.0,-0.00031
42,294,"""P2""",0.00031,0.0,0.0
42,296,"""P2""",0.0,0.0,-0.00031
42,298,"""P2""",0.00031,0.0,0.0


In [457]:
df_fe_p2 = df_fe_p2.to_pandas()
df_fe_p2.set_index(["run_id", "period"], inplace=True)
df_fe_p2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
run_id,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,2,P2,-1.897013,0.0,0.0
2,4,P2,0.223144,0.287682,0.510655
2,6,P2,0.117747,0.287682,0.182322
2,8,P2,0.117783,0.133531,0.105396
2,10,P2,-0.042546,0.086982,-0.057177


In [458]:
# Run PanelOLS with entity effects (fixed effects)
model = PanelOLS.from_formula(
    "price_diff ~ price_diff_lag_own + price_diff_lag_comp + EntityEffects",
    data=df_fe_p2,
).fit(cov_type="robust")
print(model.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:             price_diff   R-squared:                        0.0169
Estimator:                   PanelOLS   R-squared (Between):              0.0199
No. Observations:                2850   R-squared (Within):               0.0169
Date:                Wed, Jul 02 2025   R-squared (Overall):              0.0169
Time:                        16:46:51   Log-likelihood                    3320.1
Cov. Estimator:                Robust                                           
                                        F-statistic:                      24.314
Entities:                          19   P-value                           0.0000
Avg Obs:                       150.00   Distribution:                  F(2,2829)
Min Obs:                       150.00                                           
Max Obs:                       150.00   F-statistic (robust):             5.7221
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
