In [412]:
import os
import sys

sys.path.append(os.path.abspath(".."))

import polars as pl
from pathlib import Path

from linearmodels.panel import PanelOLS

OUPUT_PATH = Path("../latex/imgs/res/")
OUPUT_PATH.mkdir(parents=True, exist_ok=True)
OUPUT_TABLES_PATH = Path("../latex/tables/")
OUPUT_TABLES_PATH.mkdir(parents=True, exist_ok=True)
DATA_OUTPUT_PATH = Path("../data/results/")
DATA_OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

DUOPOLY_OUPUT_PATH = Path(OUPUT_PATH) / "duopoly"
DUOPOLY_OUPUT_PATH.mkdir(parents=True, exist_ok=True)

In [413]:
df = (
    pl.read_parquet(DATA_OUTPUT_PATH / "all_experiments.parquet")
    .filter((pl.col("num_agents") == 2) & (pl.col("is_symmetric")))
    .with_columns(
        (pl.col("experiment_timestamp").rank("dense")).alias("run_id"),
        (pl.col("agent").rank("dense")).alias("firm_id"),
        pl.col("chosen_price").truediv(pl.col("alpha")).round(4).alias("price"),
    )
    .rename(
        {
            "round": "period",
            "price": "price",
            "agent_prefix_type": "prompt_prefix",
        }
    )
    .with_columns(
        # concat run_id and firm_id to create a unique identifier
        pl.concat_str(["run_id", "firm_id"], separator="_").alias("run_firm_id")
    )
    .select(["run_firm_id", "run_id", "firm_id", "period", "price", "prompt_prefix"])
    .sort(["run_id", "firm_id", "period"])
)
df

run_firm_id,run_id,firm_id,period,price,prompt_prefix
str,u32,u32,i64,f64,str
"""1_1""",1,1,1,4.51,"""P1"""
"""1_1""",1,1,2,2.1875,"""P1"""
"""1_1""",1,1,3,2.0312,"""P1"""
"""1_1""",1,1,4,1.875,"""P1"""
"""1_1""",1,1,5,1.7188,"""P1"""
…,…,…,…,…,…
"""42_2""",42,2,296,1.5605,"""P2"""
"""42_2""",42,2,297,1.5605,"""P2"""
"""42_2""",42,2,298,1.5605,"""P2"""
"""42_2""",42,2,299,1.5605,"""P2"""


# Stationarity check
---

- https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.adfuller.html

The null hypothesis of the Augmented Dickey-Fuller is that there is a unit root, with the alternative that there is no unit root. If the pvalue is above a critical size, then we cannot reject that there is a unit root.

The p-values are obtained through regression surface approximation from MacKinnon 1994, but using the updated 2010 tables. If the p-value is close to significant, then the critical values should be used to judge whether to reject the null.

The autolag option and maxlag for it are described in Greene.

In [414]:
# run an ADF test on the price series for each firm
from statsmodels.tsa.stattools import adfuller

series = df.filter(pl.col("run_firm_id") == "1_1").select("price").to_series().to_list()
result = adfuller(series, maxlag=22)
# interpret the result
result = {
    "test_statistic": result[0],
    "p_value": result[1],
    "used_lag": result[2],
    "n_obs": result[3],
    "critical_values": result[4],
    "icbest": result[5],
}
print(f"Test statistic: {result['test_statistic']}")
print(f"P-value: {result['p_value']}")
print(f"Used lag: {result['used_lag']}")
print(f"Number of observations: {result['n_obs']}")
print("Critical values:")
for key, value in result["critical_values"].items():
    print(f"  {key}: {value}")

if result["p_value"] < 0.05:
    print("The series is stationary (reject the null hypothesis of a unit root).")
else:
    print(
        "The series is non-stationary (fail to reject the null hypothesis of a unit root)."
    )

Test statistic: -2.1509734348584275
P-value: 0.22451767002607304
Used lag: 5
Number of observations: 294
Critical values:
  1%: -3.452789844280995
  5%: -2.871421512222641
  10%: -2.5720351510944512
The series is non-stationary (fail to reject the null hypothesis of a unit root).


In [415]:
from statsmodels.tsa.stattools import adfuller
import pandas as pd

# Initialize counters
stationary_count = 0
non_stationary_count = 0

# Initialize a list to collect results
results = []

# Iterate through each firm ID
for firm_id in df["run_firm_id"].unique():
    # Filter series for the current firm_id
    series = (
        df.filter(pl.col("run_firm_id") == firm_id)
        .select("price")
        .to_series()
        .to_list()
    )

    # Run ADF test
    result = adfuller(series, maxlag=22)

    # Interpret the result
    test_statistic = result[0]
    p_value = result[1]

    # Determine stationarity
    if p_value < 0.05:
        stationary = True
        stationary_count += 1
    else:
        stationary = False
        non_stationary_count += 1

    # Append result to list
    results.append(
        {
            "run_firm_id": firm_id,
            "test_statistic": test_statistic,
            "p_value": p_value,
            "stationary": stationary,
        }
    )

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Print summary table
print("Summary of ADF Test Results by run_firm_id:")
print(results_df[["run_firm_id", "stationary"]].groupby("stationary").count())

# Optionally, print additional details
print("\nDetailed Results:")
print(results_df)

# Optionally, you can also print counts
print(f"\nNumber of stationary series: {stationary_count}")
print(f"Number of non-stationary series: {non_stationary_count}")

Summary of ADF Test Results by run_firm_id:
            run_firm_id
stationary             
False                47
True                 37

Detailed Results:
   run_firm_id  test_statistic   p_value  stationary
0         20_2       -0.281503  0.928084       False
1         19_2       -0.782008  0.824350       False
2          3_2      -19.171754  0.000000        True
3         28_2       -1.427426  0.569060       False
4          1_1       -2.150973  0.224518       False
..         ...             ...       ...         ...
79         1_2       -1.090181  0.718972       False
80        18_1       -1.524748  0.521235       False
81        36_2       -0.135059  0.945833       False
82        40_2       -2.686332  0.076433       False
83        17_1       -3.121623  0.024994        True

[84 rows x 4 columns]

Number of stationary series: 37
Number of non-stationary series: 47


In [416]:
from statsmodels.tsa.stattools import adfuller
import pandas as pd

# Initialize counters
stationary_count = 0
non_stationary_count = 0

# Initialize a list to collect results
results = []

# Iterate through each firm ID
for firm_id in df["run_firm_id"].unique():
    # Filter series for the current firm_id
    series = (
        df.filter(pl.col("run_firm_id") == firm_id)
        .with_columns(
            #   pl.col("price").diff(1).fill_null(0).alias("price_diff")
            (pl.col("price").log())
            .diff(1)
            .fill_null(0)
            .alias("price_diff")  # NOTE! This uses log differences
        )
        .select("price_diff")
        .to_series()
        .to_list()
    )

    # Run ADF test
    result = adfuller(series, maxlag=22)

    # Interpret the result
    test_statistic = result[0]
    p_value = result[1]

    # Determine stationarity
    if p_value < 0.05:
        stationary = True
        stationary_count += 1
    else:
        stationary = False
        non_stationary_count += 1

    # Append result to list
    results.append(
        {
            "run_firm_id": firm_id,
            "test_statistic": test_statistic,
            "p_value": p_value,
            "stationary": stationary,
        }
    )

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Print summary table
print("Summary of ADF Test Results by run_firm_id:")
print(results_df[["run_firm_id", "stationary"]].groupby("stationary").count())

# Optionally, print additional details
print("\nDetailed Results:")
print(results_df)

# Optionally, you can also print counts
print(f"\nNumber of stationary series: {stationary_count}")
print(f"Number of non-stationary series: {non_stationary_count}")

Summary of ADF Test Results by run_firm_id:
            run_firm_id
stationary             
False                 5
True                 79

Detailed Results:
   run_firm_id  test_statistic       p_value  stationary
0          4_1       -7.246624  1.825140e-10        True
1         27_1       -2.211265  2.021822e-01       False
2         30_1      -20.959286  0.000000e+00        True
3         23_1       -8.345288  3.097298e-13        True
4          2_1      -21.722200  0.000000e+00        True
..         ...             ...           ...         ...
79        25_1       -5.349245  4.325877e-06        True
80        33_1      -19.851029  0.000000e+00        True
81         6_2      -17.743153  3.395238e-30        True
82        12_2      -26.582581  0.000000e+00        True
83        20_2      -10.759965  2.535621e-19        True

[84 rows x 4 columns]

Number of stationary series: 79
Number of non-stationary series: 5


We need to work with price differences.

In [417]:
df = df.with_columns(
    # pl.col("price").diff(1).over("run_firm_id").fill_null(0).alias("price_diff")
    (pl.col("price").log())
    .diff(1)
    .over("run_firm_id")
    .fill_null(0)
    .alias("price_diff")  # NOTE! This uses log differences
)
df.head()

run_firm_id,run_id,firm_id,period,price,prompt_prefix,price_diff
str,u32,u32,i64,f64,str,f64
"""1_1""",1,1,1,4.51,"""P1""",0.0
"""1_1""",1,1,2,2.1875,"""P1""",-0.723538
"""1_1""",1,1,3,2.0312,"""P1""",-0.074133
"""1_1""",1,1,4,1.875,"""P1""",-0.080018
"""1_1""",1,1,5,1.7188,"""P1""",-0.086982


# Fixed effects regression (trigger strategy)
---

We are interested in the responsiveness of agents to each other since it is a feature of a reward-punishment strategy. We are interested in stickiness since it measures the persistence of such rewards and punishments.

To measure responsiveness and stickiness, we perform a linear regression with the following model:
$$p_{i,r}^t = \alpha_{i,r} + \gamma p_{i,r}^{t-1} + \delta p_{-i,r}^{t-1}+\epsilon_{i,r}^{t}$$

where $p_{i,r}^t$ is the price set by the agent $i$ at period $t$ of run $r$ of the experiment, $p_{i,r}^t$ is the price set by competitors at period $t$ of run $r$ and nd $α_{i,r}$ is a firm-run fixed effect.

In [418]:
df

run_firm_id,run_id,firm_id,period,price,prompt_prefix,price_diff
str,u32,u32,i64,f64,str,f64
"""1_1""",1,1,1,4.51,"""P1""",0.0
"""1_1""",1,1,2,2.1875,"""P1""",-0.723538
"""1_1""",1,1,3,2.0312,"""P1""",-0.074133
"""1_1""",1,1,4,1.875,"""P1""",-0.080018
"""1_1""",1,1,5,1.7188,"""P1""",-0.086982
…,…,…,…,…,…,…
"""42_2""",42,2,296,1.5605,"""P2""",0.0
"""42_2""",42,2,297,1.5605,"""P2""",0.0
"""42_2""",42,2,298,1.5605,"""P2""",0.0
"""42_2""",42,2,299,1.5605,"""P2""",0.0


In [None]:
# Get competitor's price by reshaping the table (pivot)
df_fe = (
    df.pivot(
        values="price_diff",
        index=["run_id", "period", "prompt_prefix"],
        on="firm_id",
    )
    .rename(
        {
            "1": "1_diff",
            "2": "2_diff",
        }
    )
    .with_columns(
        [
            pl.col("1_diff").shift(1).alias("1_diff_lag"),
            pl.col("2_diff").shift(1).alias("2_diff_lag"),
        ]
    )
    # Filter the periods where the agents had no full memory
    .filter(pl.col("period") > 1)
    # .filter(pl.col("period")<101)
    # Filter and keep only disjoint periods
    .filter(pl.col("period") % 2 == 0)
    # Alternate between the firms. Keep only
    .with_columns(
        [
            pl.when(pl.col("period") % 4 == 2)
            .then(pl.col("1_diff"))
            .otherwise(pl.col("2_diff"))
            .alias("price_diff"),
            pl.when(pl.col("period") % 4 == 2)
            .then(pl.col("1_diff_lag"))
            .otherwise(pl.col("2_diff_lag"))
            .alias("price_diff_lag_own"),
            pl.when(pl.col("period") % 4 == 2)
            .then(pl.col("2_diff_lag"))
            .otherwise(pl.col("1_diff_lag"))
            .alias("price_diff_lag_comp"),
        ]
    )
    .sort(["run_id", "period", "prompt_prefix"])
)
df_fe

run_id,period,prompt_prefix,1_diff,2_diff,1_diff_lag,2_diff_lag,price_diff,price_diff_lag_own,price_diff_lag_comp
u32,i64,str,f64,f64,f64,f64,f64,f64,f64
1,102,"""P1""",-0.005244,-0.001748,0.005244,-0.003487,-0.005244,0.005244,-0.003487
1,104,"""P1""",-0.001757,-0.003568,-0.001811,-0.001751,-0.003568,-0.001751,-0.001811
1,106,"""P1""",-0.001757,0.00176,0.001757,-0.00176,-0.001757,0.001757,-0.00176
1,108,"""P1""",-0.001757,-0.001767,0.001757,-0.003581,-0.001767,-0.003581,0.001757
1,110,"""P1""",0.001811,-0.001773,0.001757,-0.00177,0.001811,0.001757,-0.00177
…,…,…,…,…,…,…,…,…,…
42,292,"""P2""",0.00031,0.0,-0.00031,0.0,0.0,0.0,-0.00031
42,294,"""P2""",0.00031,0.0,0.0,0.0,0.00031,0.0,0.0
42,296,"""P2""",-0.00031,0.0,-0.00031,0.0,0.0,0.0,-0.00031
42,298,"""P2""",0.00031,0.0,0.0,0.0,0.00031,0.0,0.0


In [420]:
df_fe = df_fe.select(
    [
        "run_id",
        "period",
        "prompt_prefix",
        "price_diff",
        "price_diff_lag_own",
        "price_diff_lag_comp",
    ]
)
df_fe

run_id,period,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
u32,i64,str,f64,f64,f64
1,102,"""P1""",-0.005244,0.005244,-0.003487
1,104,"""P1""",-0.003568,-0.001751,-0.001811
1,106,"""P1""",-0.001757,0.001757,-0.00176
1,108,"""P1""",-0.001767,-0.003581,0.001757
1,110,"""P1""",0.001811,0.001757,-0.00177
…,…,…,…,…,…
42,292,"""P2""",0.0,0.0,-0.00031
42,294,"""P2""",0.00031,0.0,0.0
42,296,"""P2""",0.0,0.0,-0.00031
42,298,"""P2""",0.00031,0.0,0.0


In [421]:
df_fe["prompt_prefix"].value_counts()

prompt_prefix,count
str,u32
"""P1""",2100
"""P2""",2100


## P1vsP1

In [422]:
df_fe_p1 = df_fe.filter(pl.col("prompt_prefix") == "P1").sort(["run_id", "period"])
df_fe_p1

run_id,period,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
u32,i64,str,f64,f64,f64
1,102,"""P1""",-0.005244,0.005244,-0.003487
1,104,"""P1""",-0.003568,-0.001751,-0.001811
1,106,"""P1""",-0.001757,0.001757,-0.00176
1,108,"""P1""",-0.001767,-0.003581,0.001757
1,110,"""P1""",0.001811,0.001757,-0.00177
…,…,…,…,…,…
40,292,"""P1""",0.0,0.0,0.0
40,294,"""P1""",0.0,0.0,0.0
40,296,"""P1""",0.0,0.0,0.0
40,298,"""P1""",0.0,0.0,0.0


In [423]:
df_fe_p1 = df_fe_p1.to_pandas()
df_fe_p1.set_index(["run_id", "period"], inplace=True)
df_fe_p1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
run_id,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,102,P1,-0.005244,0.005244,-0.003487
1,104,P1,-0.003568,-0.001751,-0.001811
1,106,P1,-0.001757,0.001757,-0.00176
1,108,P1,-0.001767,-0.003581,0.001757
1,110,P1,0.001811,0.001757,-0.00177


In [424]:
# Run PanelOLS with entity effects (fixed effects)
model = PanelOLS.from_formula(
    "price_diff ~ price_diff_lag_own + price_diff_lag_comp + EntityEffects",
    data=df_fe_p1,
).fit(cov_type="robust")
print(model.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:             price_diff   R-squared:                        0.0289
Estimator:                   PanelOLS   R-squared (Between):              0.1592
No. Observations:                2100   R-squared (Within):               0.0289
Date:                Wed, Jul 02 2025   R-squared (Overall):              0.0327
Time:                        15:23:28   Log-likelihood                    8903.7
Cov. Estimator:                Robust                                           
                                        F-statistic:                      30.881
Entities:                          21   P-value                           0.0000
Avg Obs:                      100.000   Distribution:                  F(2,2077)
Min Obs:                      100.000                                           
Max Obs:                      100.000   F-statistic (robust):             2.6217
                            

Excellent work applying the ADF test, transforming the data properly, and estimating the differenced model — that's exactly the rigorous approach you want.

Let's unpack the interpretation **step by step**, especially considering that you're now working with **log-differenced series**, which essentially represents **percentage changes**, and how to interpret the signs of your coefficients.

---

## **1. What Does "Price Diff" Mean Here?**

You've applied the **log difference** transformation:

$$
\Delta \log(p_{i,t}) = \log(p_{i,t}) - \log(p_{i,t-1}) \approx \frac{p_{i,t} - p_{i,t-1}}{p_{i,t-1}}
$$

Thus, `price_diff` is approximately the **percentage change in price** from period $t-1$ to period $t$.

Similarly:

* `price_diff_lag_own`: Percentage change in own price from $t-2$ to $t-1$.
* `price_diff_lag_comp`: Percentage change in competitor's price from $t-2$ to $t-1$.

---

## **2. Interpreting the Coefficients**

### **Own Lagged Price Change: Coefficient = -0.1351 (p = 0.1710, not significant)**

* A negative coefficient suggests that if the firm increased its price in the previous period, it tends to reduce its price in the current period (and vice versa).
* However, the coefficient is **not statistically significant**, so you can't draw strong conclusions from this.
* This could suggest mild mean-reversion in own pricing behavior, but with no statistically robust evidence.

---

### **Competitor's Lagged Price Change: Coefficient = 0.4492 (p = 0.0002, significant)**

* A positive and statistically significant coefficient implies that:

  * If the competitor increased their price last period by, say, 1%, the firm tends to **also** increase its price this period by approximately 0.45%.
  * Conversely, if the competitor decreased their price, the firm tends to follow with a price decrease.

### **Interpretation in Economic Terms:**

✅ This behavior reflects **positive responsiveness or price-following** behavior.
✅ In the context of the experiment (likely tacit collusion or strategic interaction), it implies the firm responds to competitor price movements, possibly as part of a reward-punishment or tit-for-tat dynamic.
✅ Importantly, you're capturing *adjustments* rather than levels, so this reflects how firms **adjust prices in response to recent changes**, not in absolute price levels.

---

## **3. Does This Make Sense in the Log-Differenced Scenario?**

Yes, it does, and here's why:

| Variable              | Interpretation of Coefficient                                                                               |
| --------------------- | ----------------------------------------------------------------------------------------------------------- |
| `price_diff_lag_own`  | Measures inertia or correction: if own price increased last period, does the firm adjust further or revert? |
| `price_diff_lag_comp` | Measures responsiveness: does the firm follow the competitor's price change?                                |

Given that:

* Own price change isn't significant → little evidence for systematic self-correction.
* Competitor's price change is positive and significant → suggests clear **reactive pricing**, consistent with strategic interaction.

---

## **4. Additional Notes**

* The **R² of 0.12** is entirely reasonable for a differenced model — much lower than the level model, which often artificially inflates R² in presence of non-stationarity.
* The **Poolability F-test p-value (0.0849)** suggests *some heterogeneity* across entities (firms/runs), but it's borderline — not a major concern.
* The use of **entity fixed effects** is appropriate; you could explore time fixed effects too, but may not be necessary if time dynamics are minimal.

---

## **5. Optional Further Robustness Suggestions**

✅ Try including **two-period lagged price changes** to see if reactions persist beyond one period.
✅ Run the same regression separately for different prompt conditions (P1 vs. P2) as the paper did, to explore if responsiveness differs.
✅ Check residuals for serial correlation to ensure no remaining dynamic structure violates model assumptions.
✅ Consider Arellano-Bond GMM for further robustness, though with differenced data and lag structure, you're already in good shape.

---

## **6. Conclusion: Clear Interpretation**

Your findings are:

* After appropriately differencing the series to handle unit roots, there's solid evidence that **firms adjust their price in response to competitor's past price changes**, consistent with strategic interaction or collusion dynamics.
* No strong evidence for own-price adjustment beyond what's captured by the differencing.
* The signs make economic sense, especially in a competitive duopoly with potential reward-punishment schemes.

---

**If you want, I can help code the separation by prompt conditions or suggest an Arellano-Bond implementation. Just say the word.**


## P2vsP2

In [425]:
df_fe_p2 = df_fe.filter(pl.col("prompt_prefix") == "P2").sort(["run_id", "period"])
df_fe_p2

run_id,period,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
u32,i64,str,f64,f64,f64
2,102,"""P2""",0.006969,-0.001733,0.000409
2,104,"""P2""",0.0,0.0,-0.005236
2,106,"""P2""",0.0,-0.002624,0.0
2,108,"""P2""",0.0,0.0,0.0
2,110,"""P2""",-0.001736,-0.000895,0.0
…,…,…,…,…,…
42,292,"""P2""",0.0,0.0,-0.00031
42,294,"""P2""",0.00031,0.0,0.0
42,296,"""P2""",0.0,0.0,-0.00031
42,298,"""P2""",0.00031,0.0,0.0


In [426]:
df_fe_p2 = df_fe_p2.to_pandas()
df_fe_p2.set_index(["run_id", "period"], inplace=True)
df_fe_p2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,prompt_prefix,price_diff,price_diff_lag_own,price_diff_lag_comp
run_id,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,102,P2,0.006969,-0.001733,0.000409
2,104,P2,0.0,0.0,-0.005236
2,106,P2,0.0,-0.002624,0.0
2,108,P2,0.0,0.0,0.0
2,110,P2,-0.001736,-0.000895,0.0


In [427]:
# Run PanelOLS with entity effects (fixed effects)
model = PanelOLS.from_formula(
    "price_diff ~ price_diff_lag_own + price_diff_lag_comp + EntityEffects",
    data=df_fe_p2,
).fit(cov_type="robust")
print(model.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:             price_diff   R-squared:                        0.0408
Estimator:                   PanelOLS   R-squared (Between):             -0.1858
No. Observations:                2100   R-squared (Within):               0.0408
Date:                Wed, Jul 02 2025   R-squared (Overall):              0.0399
Time:                        15:23:28   Log-likelihood                    6360.9
Cov. Estimator:                Robust                                           
                                        F-statistic:                      44.168
Entities:                          21   P-value                           0.0000
Avg Obs:                      100.000   Distribution:                  F(2,2077)
Min Obs:                      100.000                                           
Max Obs:                      100.000   F-statistic (robust):             3.3010
                            