In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

def read_market_data(file_path):
    """Read a single prices CSV file into a dataframe."""
    df = pd.read_csv(file_path, sep=';')
    # Convert timestamp to int if needed
    if 'timestamp' in df.columns:
        df['timestamp'] = df['timestamp'].astype(int)

    # add weighted mid price column
    df['weighted_mid_price'] = (df['bid_price_1'] * df['bid_volume_1'] + df['bid_price_2'].fillna(0) * df['bid_volume_2'].fillna(0) + df['ask_price_1'] * df['ask_volume_1'] + df['ask_price_2'].fillna(0) * df['ask_volume_2'].fillna(0)) / (df['bid_volume_1'] + df['bid_volume_2'].fillna(0) + df['ask_volume_1'] + df['ask_volume_2'].fillna(0))
    return df

def read_all_prices_data(round_num, base_dir="round-{}-island-data-bottle"):
    """Read all prices data for a given round and concatenate into a single dataframe."""
    round_dir = base_dir.format(round_num)
    pattern = os.path.join(round_dir, f"prices_round_{round_num}_day_*.csv")
    files = glob.glob(pattern)
    
    dfs = []
    for file in files:
        df = read_market_data(file)
        # Extract day from filename
        day = int(file.split("_day_")[1].split(".csv")[0])
        df['day'] = day
        dfs.append(df)
    
    return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

def read_trade_data(file_path):
    """Read a single trades CSV file into a dataframe."""
    df = pd.read_csv(file_path, sep=';')
    # Convert numeric columns
    if 'price' in df.columns:
        df['price'] = pd.to_numeric(df['price'])
    if 'quantity' in df.columns:
        df['quantity'] = pd.to_numeric(df['quantity'])
    if 'timestamp' in df.columns:
        df['timestamp'] = df['timestamp'].astype(int)
    return df

def read_all_trades_data(round_num, base_dir="round-{}-island-data-bottle"):
    """Read all trades data for a given round and concatenate into a single dataframe."""
    round_dir = base_dir.format(round_num)
    pattern = os.path.join(round_dir, f"trades_round_{round_num}_day_*.csv")
    files = glob.glob(pattern)
    
    dfs = []
    for file in files:
        df = read_trade_data(file)
        # Extract day from filename
        day = int(file.split("_day_")[1].split(".csv")[0])
        df['day'] = day
        dfs.append(df)
    
    return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

# Example usage:
prices_df = read_all_prices_data(1)
trades_df = read_all_trades_data(1)

In [21]:
prices_df

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,ask_price_2,ask_volume_2,ask_price_3,ask_volume_3,mid_price,profit_and_loss,weighted_mid_price
0,-1,0,SQUID_INK,2005,1,2002.0,31.0,,,2006,31,,,,,2005.5,0.0,2004.015873
1,-1,0,RAINFOREST_RESIN,10002,1,9996.0,2.0,9995.0,29.0,10004,2,10005.0,29.0,,,10003.0,0.0,10004.323529
2,-1,0,KELP,2028,1,2026.0,2.0,2025.0,29.0,2029,31,,,,,2028.5,0.0,2028.794118
3,-1,100,KELP,2025,24,,,,,2028,2,2029.0,22.0,,,2026.5,0.0,2026.958333
4,-1,100,RAINFOREST_RESIN,9996,2,9995.0,22.0,,,10004,2,10005.0,22.0,,,10000.0,0.0,10000.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89995,0,999800,KELP,2032,27,,,,,2034,1,2035.0,27.0,,,2033.0,0.0,2033.509091
89996,0,999800,RAINFOREST_RESIN,9995,27,,,,,10005,27,,,,,10000.0,0.0,10000.000000
89997,0,999900,RAINFOREST_RESIN,9996,2,9995.0,21.0,,,10004,2,10005.0,21.0,,,10000.0,0.0,10000.000000
89998,0,999900,SQUID_INK,1838,23,,,,,1841,23,,,,,1839.5,0.0,1839.500000


In [22]:
trades_df

Unnamed: 0,timestamp,buyer,seller,symbol,currency,price,quantity,day
0,0,,,KELP,SEASHELLS,2029.0,13,-1
1,0,,,KELP,SEASHELLS,2029.0,1,-1
2,0,,,RAINFOREST_RESIN,SEASHELLS,10004.0,1,-1
3,0,,,SQUID_INK,SEASHELLS,2006.0,13,-1
4,0,,,SQUID_INK,SEASHELLS,2006.0,1,-1
...,...,...,...,...,...,...,...,...
24881,999500,,,KELP,SEASHELLS,2032.0,2,0
24882,999500,,,RAINFOREST_RESIN,SEASHELLS,9995.0,2,0
24883,999500,,,SQUID_INK,SEASHELLS,1830.0,2,0
24884,999600,,,KELP,SEASHELLS,2032.0,1,0


In [None]:
def partition_prices(prices_df, products):
    partitioned_dfs = []
    
    for product in products:
        product_df = prices_df[prices_df['product'] == product].copy()
        partitioned_dfs.append(product_df)
    
    return tuple(partitioned_dfs)


squid, resin, kelp = partition_prices(prices_df, ["SQUID_INK", "RAINFOREST_RESIN", "KELP"])

for product in prices_df['product'].unique():
    plt.figure(figsize=(24, 10))
    plt.title(product)

    product_log = prices_df[prices_df['product'] == product]

    
    plt.plot(product_log['timestamp'], product_log['bid_price_1'], label='bid price 1', color='blue', alpha=0.5, linewidth=0.5)
    plt.plot(product_log['timestamp'], product_log['ask_price_1'], label='ask price 1', color='red', alpha=0.5, linewidth=0.5)

    for trade in trades_df[trades_df['symbol'] == product].itertuples():
        if trade.buyer == 'SUBMISSION':
            # plot a red dot for a new order
            plt.plot(trade.timestamp, trade.price, 'ro')
        elif trade.seller == 'SUBMISSION':
            # plot a green dot for a new order
            plt.plot(trade.timestamp, trade.price, 'go')
        else:
            # plot a blue line for a trade
            plt.plot([trade.timestamp, trade.timestamp], [trade.price, trade.price], 'bo', alpha=0.5)

    plt.plot(product_log['timestamp'], product_log['mid_price'], label='mid price', color='black', linewidth=2)

    plt.legend()
    plt.show()
    