In [1]:
import pandas as pd
import glob
import json
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error, r2_score
import plotly.graph_objects as go

In [None]:
file_paths = glob.glob("parquet/dane_1_procent.parquet")
df = pd.read_parquet(file_paths[0], engine="fastparquet")
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
df = df[['block_height', 'fee', 'datetime']]

In [None]:
with open("market-price.json", "r") as f:
    data = json.load(f)

price_list = data["market-price"]
df_price = pd.DataFrame(price_list)
df_price['datetime'] = pd.to_datetime(df_price['x'], unit='ms')
df_price.rename(columns={'y': 'price'}, inplace=True)

In [None]:
df_price = df_price.set_index('datetime').sort_index()
df = df.set_index('datetime').sort_index()
df_merged = pd.merge_asof(df, df_price, left_index=True, right_index=True, direction='backward')

In [None]:
df = df_merged.reset_index()[['block_height', 'fee', 'price', 'datetime']]
df.rename(columns={'price': 'price_usd'}, inplace=True)

In [None]:
block_halving1 = df[df['datetime'] >= '2012-11-28'].iloc[0]['block_height']
block_halving4 = df[df['datetime'] >= '2024-04-19'].iloc[0]['block_height']

train_df = df[(df['block_height'] >= block_halving1) & (df['block_height'] < block_halving4)]
test_df  = df[df['block_height'] >= block_halving4]

In [None]:
scaler = MinMaxScaler().fit(df[['block_height']])

train_inputs = scaler.transform(train_df[['block_height']])
test_inputs  = scaler.transform(test_df[['block_height']])

train_targets = train_df['fee']
test_targets  = test_df['fee']


In [18]:
print('train_inputs:', train_inputs.shape)
print('train_targets:', train_targets.shape)
print('test_inputs:', test_inputs.shape)
print('test_targets:', test_targets.shape)

train_inputs: (9812889, 1)
train_targets: (9812889,)
test_inputs: (1419007, 1)
test_targets: (1419007,)


In [None]:
model = LinearRegression().fit(train_inputs, train_targets)


In [None]:
test_df_lr = test_df.copy()
test_df_lr['predicted_fee'] = model.predict(test_inputs)

In [None]:
test_df_lr['fee_usd'] = test_df_lr['fee'] * test_df_lr['price_usd']
test_df_lr['predicted_fee_usd'] = test_df_lr['predicted_fee'] * test_df_lr['price_usd']

In [None]:
weekly_actual_usd = test_df_lr.resample('W', on='datetime')['fee_usd'].mean().reset_index()
weekly_pred_usd = test_df_lr.resample('W', on='datetime')['predicted_fee_usd'].mean().reset_index()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=weekly_actual_usd['datetime'],
    y=weekly_actual_usd['fee_usd'],
    mode='lines',
    name='Rzeczywista opłata (USD)',
    line=dict(color='orange')
))

fig.add_trace(go.Scatter(
    x=weekly_pred_usd['datetime'],
    y=weekly_pred_usd['predicted_fee_usd'],
    mode='lines',
    name='Prognozowana opłata (USD)',
    line=dict(color='blue')
))

fig.update_layout(
    title='Linear Regression - rzeczywista vs. prognozowana średnia opłata transakcyjna (USD)',
    xaxis_title='Data',
    yaxis_title='Opłata transakcyjna (USD)',
    template='plotly_white',
    width=1200,
    height=600
)

fig.show()

In [None]:
test_df_lr['fee_sat'] = test_df_lr['fee'] * 100_000_000
test_df_lr['predicted_fee_sat'] = test_df_lr['predicted_fee'] * 100_000_000

weekly_actual_sat = test_df_lr.resample('W', on='datetime')['fee_sat'].mean().reset_index()
weekly_pred_sat = test_df_lr.resample('W', on='datetime')['predicted_fee_sat'].mean().reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=weekly_actual_sat['datetime'],
    y=weekly_actual_sat['fee_sat'],
    mode='lines',
    name='Rzeczywista opłata (satoshi)',
    line=dict(color='orange')
))

fig.add_trace(go.Scatter(
    x=weekly_pred_sat['datetime'],
    y=weekly_pred_sat['predicted_fee_sat'],
    mode='lines',
    name='Prognozowana opłata (satoshi)',
    line=dict(color='blue')
))

fig.update_layout(
    title='Linear Regression - rzeczywista vs. prognozowana średnia opłata transakcyjna (satoshi)',
    xaxis_title='Data',
    yaxis_title='Opłata transakcyjna (satoshi)',
    template='plotly_white',
    width=1200,
    height=600
)

fig.show()
