In [None]:
from datetime import datetime
from functools import partial
from pyexpat import features

import humanfriendly
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import cm
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize
from matplotlib.pyplot import violinplot

from measure import get_average_minimum_dwell_time, get_average_price_delta, load_cycles, get_component_df
from meebits_analysis import load_transactions, create_transaction_graph, OMEGAS

In [None]:
transaction_df = load_transactions()
print(f"Loaded {len(transaction_df)} transactions from meebits.csv")

In [None]:
# Prepare and index data
def quarter_label(x, pos=None):
    dt = mdates.num2date(x)
    quarter = (dt.month - 1) // 3 + 1
    return f"Q{quarter} {dt.year}"


def format_with_commas(x, pos):
    return f"{x:,.0f}"


# Weekly aggregation
eth_weekly = transaction_df["price_eth"].resample("W").agg(["count", "sum"])
usd_weekly = transaction_df["price_usd"].resample("W").sum()

fig, ax1 = plt.subplots(figsize=(14, 6))
ax1.grid(False)

# Main Y-axis (left): USD volume
ax1.set_yscale("log")
usd_plot, = ax1.plot(usd_weekly.index, usd_weekly, label="USD Volume", color="tab:blue")
ax1.set_ylabel("USD Volume (log scale)")
ax1.yaxis.set_major_formatter(ticker.FuncFormatter(format_with_commas))

# Second Y-axis (right): Trade count
ax2 = ax1.twinx()
# ax2.grid(False)
count_plot, = ax2.plot(eth_weekly.index, eth_weekly["count"], label="Trade Count", color="tab:orange", linestyle="--")
ax2.set_ylabel("Trade Count")
ax2.yaxis.set_major_formatter(ticker.FuncFormatter(format_with_commas))

# Third Y-axis (left, offset): ETH volume
ax3 = ax1.twinx()
ax3.grid(False)
ax3.spines["left"] = ax3.spines["right"]  # Temporarily assign spine
ax3.spines["left"].set_position(("axes", -0.15))  # Move it left
ax3.spines["left"].set_visible(True)
ax3.spines["right"].set_visible(False)  # Hide duplicate right spine
ax3.yaxis.set_label_position("left")
ax3.yaxis.set_ticks_position("left")
ax3.set_yscale("log")
eth_plot, = ax3.plot(eth_weekly.index, eth_weekly["sum"], label="ETH Volume", color="tab:green", linestyle=":")
ax3.set_ylabel("ETH Volume (log scale)")
ax3.yaxis.set_major_formatter(ticker.FuncFormatter(format_with_commas))

# X-axis: quarterly ticks
ax1.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1, 4, 7, 10]))
ax1.xaxis.set_major_formatter(plt.FuncFormatter(quarter_label))
ax1.tick_params(axis='x', rotation=45)

# Combined legends
lines = [usd_plot, eth_plot, count_plot]
labels = [line.get_label() for line in lines]
ax1.legend(lines, labels, loc="upper left")

# Title
ax1.set_title("Meebits Weekly Trade Data: USD & ETH Volume (log scale) + Trade Count")

plt.tight_layout()
plt.show()

In [None]:
transaction_graph = create_transaction_graph(transaction_df)
print(transaction_graph)

In [None]:
# Extract and sort edges by timestamp
sorted_transaction_edges = sorted(transaction_graph.edges(data=True), key=lambda x: x[2]["timestamp"])
first_transaction, *_, last_transaction = sorted_transaction_edges

# Convert UNIX timestamps to datetime
transaction_begin = datetime.fromtimestamp(first_transaction[2]["timestamp"])
transaction_end = datetime.fromtimestamp(last_transaction[2]["timestamp"])

total_duration = transaction_end - transaction_begin

summary = {
    "vertices_count": len(transaction_graph.nodes()),
    "begin": transaction_begin,
    "end": transaction_end,
    "duration": total_duration.total_seconds() / (60 * 60 * 24),
    "dwell_days": get_average_minimum_dwell_time(transaction_graph) / 60,
    "transaction_rate": (
            transaction_graph.number_of_edges() / transaction_graph.number_of_nodes() / total_duration.total_seconds()
    ),
}

# Pretty print with humanfriendly duration
print("📊 Transaction Graph Summary")
print("-" * 55)
print(f"⏱️  Begin Time              : {summary['begin'].strftime('%B %d, %Y, %H:%M:%S')}")
print(f"⏳  End Time                : {summary['end'].strftime('%B %d, %Y, %H:%M:%S')}")
print(f"📆  Total Duration          : {humanfriendly.format_timespan(summary['duration'])}")

print("\n🔁 Activity Metrics")
print("-" * 55)
print(f"🔗  Number of Vertices      : {summary['vertices_count']:,}")
print(f"📉  Avg Min Dwell           : {summary['dwell_days']:.2f} days")
print(f"📈  Transaction Rate        : {summary['transaction_rate']:.2e}")


In [None]:
cycle_dfs = {}
component_dfs = {}

for omega in OMEGAS:
    print(f"Analysis for Omega: {omega}")
    cycle_df = load_cycles(omega)
    print(f"Loaded {len(cycle_df)} cycles.")
    cycle_dfs[omega] = cycle_df
    component_df = get_component_df(cycle_df)
    print(f"Loaded {len(component_df)} components.")
    component_dfs[omega] = component_df

components_df = pd.concat(
    [df.assign(omega=key) for key, df in component_dfs.items()],
    ignore_index=True
)

In [None]:
import matplotlib.transforms as mtransforms


def plot_components(
        components_df,
        feature_series, feature_desc="", feature_unit="",
        lower_bound=None, upper_bound=None,
        monthly=False,
        begin=None, end=None,
        sample=None,
        alpha=0.3,
        y_size=10,
):

    plot_df = components_df[["omega", "begin", "end", "duration_minutes"]].copy()
    plot_df['y_jitter'] = np.random.uniform(-0.4, 0.4, size=len(plot_df))

    feature_col = feature_series.name
    plot_df[feature_col] = feature_series

    plot_df = plot_df[
        (plot_df["duration_minutes"] > 1)
        & (~pd.isnull(plot_df[feature_col]))
        ]

    fig, ax = plt.subplots(figsize=(10, y_size), dpi=600)

    inverted = False
    if lower_bound is not None and upper_bound is not None:
        inverted = lower_bound > upper_bound
    if lower_bound is None:
        lower_bound = feature_series.min() if not inverted else feature_series.max()
    if upper_bound is None:
        upper_bound = feature_series.max() if not inverted else feature_series.min()

    # Choose and possibly reverse colormap
    if lower_bound + upper_bound == 0:
        cmap = plt.get_cmap("coolwarm")
    else:
        cmap = plt.get_cmap("plasma")

    if inverted:
        cmap = cmap.reversed()

    begin = begin or plot_df["begin"].min()
    end = end or plot_df["end"].max()

    # Proper normalization (do not reverse vmin/vmax)
    norm = Normalize(vmin=lower_bound, vmax=upper_bound)
    sm = ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])

    # Colorbar with consistent label logic
    cbar = plt.colorbar(sm, ax=ax, orientation="vertical")
    cbar.set_label(feature_unit, loc="top")
    cbar.set_ticks([lower_bound, upper_bound])
    if inverted:
        cbar.set_ticklabels([
            f"≥ {humanfriendly.format_number(upper_bound, 4)}",
            f"≤ {humanfriendly.format_number(lower_bound, 4)}"
        ])
    else:
        cbar.set_ticklabels([
            f"≤ {humanfriendly.format_number(lower_bound, 4)}",
            f"≥ {humanfriendly.format_number(upper_bound, 4)}"
        ])

    category_to_y = {
        omega: i for i, omega
        in enumerate(sorted(plot_df["omega"].unique()))
    }

    for omega, omega_plot_df in plot_df.groupby("omega"):
        np.random.seed(0)

        if sample is not None:
            omega_plot_df = omega_plot_df.sample(n=min(sample, len(omega_plot_df)))

        for _, row in omega_plot_df.iterrows():
            ax.broken_barh(
                [(mdates.date2num(row['begin']), row['duration_minutes'])],
                (category_to_y[omega] + row['y_jitter'] - 0.15, 0.07),
                facecolors=cmap(norm(row[feature_col])),
                edgecolors='none',
                alpha=alpha
            )

    ax.set_xlim(mdates.date2num(begin), mdates.date2num(end))

    # X-axis formatting
    if monthly:
        ax.xaxis.set_major_locator(mdates.MonthLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%b \'%y'))
    else:
        ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1, 4, 7, 10]))
        ax.xaxis.set_major_formatter(plt.FuncFormatter(quarter_label))
    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

    # Y-axis ticks and labels
    ax.set_yticks(list(category_to_y.values()))
    ax.set_ylabel("Omega ω")

    ax.set_title(
        f"Chained Bundled Cycles from {begin.strftime('%B %Y')} until {end.strftime('%B %Y')} - {feature_desc}")
    ax.set_xlabel("Time")
    plt.grid(True, alpha=0.2)

    # Vertical offset for y-axis labels
    offset = mtransforms.ScaledTranslation(0, -18 / 72, fig.dpi_scale_trans)
    for label in ax.get_yticklabels():
        label.set_transform(label.get_transform() + offset)

    # Center y-ticks with jittered bars
    ax.set_yticks([tick + 0.4 for tick in ax.get_yticks()])
    ax.set_yticklabels([humanfriendly.format_timespan(td) for td in category_to_y.keys()])

    plt.tight_layout()
    plt.show()

In [None]:

abs_price_delta = "abs. price-delta"
pos_abs_price_delta = "pos. abs. price-delta"
price_delta_hour = "price-delta per hour"
pos_price_delta_minute = "pos. price-delta per hour"
transaction_rate = "transaction per minute per actor"
dwell_time = "avg. min. dwell time"

features_series = dict()

features_series[abs_price_delta] = components_df["price_delta"].copy()

features_series[price_delta_hour] = components_df["price_delta"].copy() / components_df["duration_hours"].copy()

features_series[pos_abs_price_delta] = components_df["price_delta"].copy()
features_series[pos_abs_price_delta][features_series[pos_abs_price_delta] <= 0] = np.nan

features_series[pos_price_delta_minute] = features_series[pos_abs_price_delta] / components_df["duration_hours"]

features_series[transaction_rate] = components_df["rate"].copy()

features_series[dwell_time] = components_df["dwell_minutes"].copy()



In [None]:
feature_metas = {
    abs_price_delta: dict(
        feature_unit="$",
        lower_bound=-10_000,
        upper_bound=10_000,
    ),
    pos_abs_price_delta: dict(
        feature_unit="$",
        lower_bound=-0,
        upper_bound=100_000,
    ),
    price_delta_hour: dict(
        feature_unit="$/hour",
        lower_bound=-100_000,
        upper_bound=100_000,
    ),
    pos_price_delta_minute: dict(
        feature_unit="$/hour",
        lower_bound=0,
        upper_bound=1_000_000,
    ),
    transaction_rate: dict(
        feature_unit="tx/min/actor",
        lower_bound=0.0,
        upper_bound=0.005,
    ),
    dwell_time: dict(
        feature_unit="dwell-time in minutes",
        lower_bound=4,
        upper_bound=0,
    )
}

In [None]:
sample = None
# sample = 50

for feature_desc, feature_series in features_series.items():
    feature_meta = feature_metas.get(feature_desc, {})
    partial_plot_components = partial(
        plot_components,
        components_df=components_df,
        feature_desc=feature_desc,
        feature_series=feature_series,
        sample=sample,
        y_size=10,
        **feature_meta,
    )
    partial_plot_components(alpha=0.4, monthly=False)
    partial_plot_components(begin=datetime(2022, 1, 1), end=datetime(2023, 1, 1), alpha=0.4, monthly=True)