In [1]:
import pandas as pd
import numpy as np
import json
import glob
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, r2_score
import plotly.graph_objects as go
import warnings

warnings.filterwarnings("ignore")

In [None]:
file_paths = glob.glob("dane_1_procent.parquet")
df = pd.read_parquet(file_paths[0], engine="fastparquet")
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
df = df[['block_height', 'fee', 'datetime']]
df['fee_sat'] = df['fee'] * 100_000_000

In [None]:
with open("market-price.json", "r") as f:
    data = json.load(f)

df_price = pd.DataFrame(data["market-price"])
df_price['datetime'] = pd.to_datetime(df_price['x'], unit='ms')
df_price.rename(columns={'y': 'price_usd'}, inplace=True)

In [None]:
df_price = df_price.set_index('datetime').sort_index()
df = df.set_index('datetime').sort_index()
df_merged = pd.merge_asof(df, df_price, left_index=True, right_index=True, direction='backward')

df_merged.reset_index(inplace=True)
df_merged['fee_usd'] = df_merged['fee'] * df_merged['price_usd']

In [None]:
block_halving1 = df_merged[df_merged['datetime'] >= '2012-11-28'].iloc[0]['block_height']
block_halving4 = df_merged[df_merged['datetime'] >= '2024-04-19'].iloc[0]['block_height']

train_df = df_merged[(df_merged['block_height'] >= block_halving1) & (df_merged['block_height'] < block_halving4)]
test_df = df_merged[df_merged['block_height'] >= block_halving4]

In [None]:
train_weekly = train_df.resample('W-MON', on='datetime').median().dropna()
test_weekly = test_df.resample('W-MON', on='datetime').median().dropna()

train_series = train_weekly['fee_sat']
test_series = test_weekly['fee_sat']

In [None]:
seasonal_period = 210

model = SARIMAX(
    train_series,
    order=(1, 1, 2),
    seasonal_order=(0, 1, 1, seasonal_period),
    enforce_stationarity=False,
    enforce_invertibility=False
)
results = model.fit(disp=False)
print(results.summary())

In [None]:
n_test = len(test_series)
forecast = results.forecast(steps=n_test)
forecast.index = test_series.index

In [None]:
rmse = np.sqrt(mean_squared_error(test_series, forecast))
r2 = r2_score(test_series, forecast)
print(f"\nTest RMSE: {rmse:.2f}")
print(f"Test R²:   {r2:.4f}")

In [None]:
df_plot = pd.DataFrame({
    'datetime': test_series.index,
    'actual_fee_sat': test_series.values,
    'predicted_fee_sat': forecast.values
})

df_price_weekly = df_price.resample('W-MON').mean().reset_index()
df_plot = pd.merge_asof(df_plot.sort_values('datetime'), df_price_weekly.sort_values('datetime'),
                        on='datetime', direction='backward')
df_plot['actual_fee_usd'] = df_plot['actual_fee_sat'] / 100_000_000 * df_plot['price_usd']
df_plot['predicted_fee_usd'] = df_plot['predicted_fee_sat'] / 100_000_000 * df_plot['price_usd']


In [None]:
fig_usd = go.Figure()
fig_usd.add_trace(go.Scatter(
    x=df_plot['datetime'], y=df_plot['actual_fee_usd'],
    mode='lines', name='Rzeczywista opłata (USD)', line=dict(color='orange')
))
fig_usd.add_trace(go.Scatter(
    x=df_plot['datetime'], y=df_plot['predicted_fee_usd'],
    mode='lines', name='Prognozowana opłata (USD)', line=dict(color='blue')
))
fig_usd.update_layout(
    title='SARIMA – rzeczywista vs. prognozowana średnia opłata transakcyjna (USD)',
    xaxis_title='Data',
    yaxis_title='Opłata transakcyjna (USD)',
    template='plotly_white',
    width=1200,
    height=600
)
fig_usd.show()

In [None]:
fig_sat = go.Figure()
fig_sat.add_trace(go.Scatter(
    x=df_plot['datetime'], y=df_plot['actual_fee_sat'],
    mode='lines', name='Rzeczywista opłata (satoshi)', line=dict(color='orange')
))
fig_sat.add_trace(go.Scatter(
    x=df_plot['datetime'], y=df_plot['predicted_fee_sat'],
    mode='lines', name='Prognozowana opłata (satoshi)', line=dict(color='blue')
))
fig_sat.update_layout(
    title='SARIMA – rzeczywista vs. prognozowana średnia opłata transakcyjna (satoshi)',
    xaxis_title='Data',
    yaxis_title='Opłata transakcyjna (satoshi)',
    template='plotly_white',
    width=1200,
    height=600
)
fig_sat.show()