In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# === Load Data ===
prices_neg1 = pd.read_csv("prices_round_2_day_-1.csv", sep=";")
prices_0 = pd.read_csv("prices_round_2_day_0.csv", sep=";")
prices_1 = pd.read_csv("prices_round_2_day_1.csv", sep=";")

# === Combine All Days ===
all_prices = pd.concat([prices_neg1, prices_0, prices_1], ignore_index=True)

# === Filter Relevant Products ===
relevant_products = ["CROISSANTS", "JAMS", "DJEMBES", "PICNIC_BASKET1", "PICNIC_BASKET2"]
filtered_df = all_prices[all_prices["product"].isin(relevant_products)]

# === Average Mid Prices (in case of duplicates) ===
filtered_avg = (
    filtered_df.groupby(["timestamp", "product"])["mid_price"]
    .mean()
    .reset_index()
)

# === Pivot Table ===
pivot_df = filtered_avg.pivot(index="timestamp", columns="product", values="mid_price").sort_index()

# === Compute Synthetic Basket Prices ===
pivot_df["BASKET1_SYNTH"] = 6 * pivot_df["CROISSANTS"] + 3 * pivot_df["JAMS"] + 1 * pivot_df["DJEMBES"]
pivot_df["BASKET2_SYNTH"] = 4 * pivot_df["CROISSANTS"] + 2 * pivot_df["JAMS"]

# === Calculate Spread ===
pivot_df["SPREAD1"] = pivot_df["PICNIC_BASKET1"] - pivot_df["BASKET1_SYNTH"]
pivot_df["SPREAD2"] = pivot_df["PICNIC_BASKET2"] - pivot_df["BASKET2_SYNTH"]

# === Z-Score Calculation ===
window = 50  # rolling window size
for i in [1, 2]:
    spread = f"SPREAD{i}"
    pivot_df[f"{spread}_MEAN"] = pivot_df[spread].rolling(window=window).mean()
    pivot_df[f"{spread}_STD"] = pivot_df[spread].rolling(window=window).std()
    pivot_df[f"{spread}_ZSCORE"] = (
        (pivot_df[spread] - pivot_df[f"{spread}_MEAN"]) / pivot_df[f"{spread}_STD"]
    )

# === Display Key Columns ===
print(pivot_df[[
    "SPREAD1", "SPREAD1_MEAN", "SPREAD1_STD", "SPREAD1_ZSCORE",
    "SPREAD2", "SPREAD2_MEAN", "SPREAD2_STD", "SPREAD2_ZSCORE"
]].dropna().head(10))

# === Plotting ===

# Spread and Z-score for Basket 1
fig1, ax1 = plt.subplots()
pivot_df[["SPREAD1", "SPREAD1_MEAN"]].plot(ax=ax1)
ax1.set_title("Basket 1 Spread vs Synthetic Value")
ax1.set_xlabel("Timestamp")
ax1.set_ylabel("Spread")
ax1.axhline(0, color="gray", linestyle="--")

fig2, ax2 = plt.subplots()
pivot_df["SPREAD1_ZSCORE"].plot(ax=ax2)
ax2.set_title("Z-score of Basket 1 Spread")
ax2.set_xlabel("Timestamp")
ax2.axhline(1, color="red", linestyle="--")
ax2.axhline(-1, color="green", linestyle="--")

# Spread and Z-score for Basket 2
fig3, ax3 = plt.subplots()
pivot_df[["SPREAD2", "SPREAD2_MEAN"]].plot(ax=ax3)
ax3.set_title("Basket 2 Spread vs Synthetic Value")
ax3.set_xlabel("Timestamp")
ax3.set_ylabel("Spread")
ax3.axhline(0, color="gray", linestyle="--")

fig4, ax4 = plt.subplots()
pivot_df["SPREAD2_ZSCORE"].plot(ax=ax4)
ax4.set_title("Z-score of Basket 2 Spread")
ax4.set_xlabel("Timestamp")
ax4.axhline(1, color="red", linestyle="--")
ax4.axhline(-1, color="green", linestyle="--")

plt.tight_layout()
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'prices_round_2_day_-1.csv'