In [2]:
import pandas as pd
import numpy as np
import json

# Load the onchain data CSV
df_onchain = pd.read_csv('../data/raw/onchain_data.csv')

# Clean up the data
df_onchain = df_onchain.replace('', np.nan)

# Convert numeric columns to proper types
numeric_columns = ['mempool_count', 'mempool_vsize', 'mempool_total_fee_btc', 
                  'fee_fastest_satvB', 'fee_30min_satvB', 'fee_60min_satvB', 
                  'fee_economy_satvB', 'fee_min_satvB', 'tip_height', 'tip_timestamp', 
                  'tip_tx_count', 'tip_size', 'tip_weight', 'bc_blocks', 
                  'bc_transactions', 'bc_mempool_transactions', 'bc_circulation', 
                  'bc_market_price_usd', 'bc_hashrate_24h', 'bc_difficulty', 
                  'bc_average_transaction_fee_24h_usd', 'bc_average_transaction_value_24h_usd', 
                  'bc_median_transaction_fee_24h_usd', 'bc_tip_tx_count', 
                  'bc_tip_size', 'bc_tip_weight', 'bc_tip_difficulty']

for col in numeric_columns:
    if col in df_onchain.columns:
        df_onchain[col] = pd.to_numeric(df_onchain[col], errors='coerce')

# Convert timestamp to datetime
df_onchain['timestamp'] = pd.to_datetime(df_onchain['timestamp'])

# Display the complete dataframe
print("Complete OnChain Data:")
print("=" * 80)
df_onchain


Complete OnChain Data:


Unnamed: 0,timestamp,symbol,mempool_count,mempool_vsize,mempool_total_fee_btc,fee_fastest_satvB,fee_30min_satvB,fee_60min_satvB,fee_economy_satvB,fee_min_satvB,...,bc_difficulty,bc_average_transaction_fee_24h_usd,bc_average_transaction_value_24h_usd,bc_median_transaction_fee_24h_usd,bc_tip_id,bc_tip_time,bc_tip_tx_count,bc_tip_size,bc_tip_weight,bc_tip_difficulty
0,2025-09-25 18:40:53.187531+00:00,BTC/USDT,160960,58767557,19067454,4,3,3,2,1,...,142342600000000.0,,,,,,,,,
1,2025-09-25 18:41:07.923720+00:00,BTC/USDT,161031,58785212,19154993,4,3,3,2,1,...,,,,,,,,,,


In [3]:
# Load the onchain features CSV
df_features = pd.read_csv('../data/processed/onchain_features.csv')

# Clean up the data
df_features = df_features.replace('', np.nan)

# Convert numeric columns to proper types
numeric_columns = ['mempool_congestion_score', 'mempool_trend', 'mempool_volatility',
                  'fee_pressure_score', 'fee_trend', 'fee_volatility',
                  'network_activity_score', 'network_trend', 'network_volatility',
                  'block_production_rate', 'block_size_trend', 'block_weight_trend',
                  'market_structure_score', 'liquidity_score', 'volatility_score']

for col in numeric_columns:
    if col in df_features.columns:
        df_features[col] = pd.to_numeric(df_features[col], errors='coerce')

# Convert timestamp to datetime
df_features['timestamp'] = pd.to_datetime(df_features['timestamp'])

# Display the complete features dataframe
print("Complete OnChain Features:")
print("=" * 80)
df_features


Complete OnChain Features:


Unnamed: 0,timestamp,symbol,mempool_congestion_score,mempool_trend,mempool_volatility,fee_pressure_score,fee_trend,fee_volatility,network_activity_score,network_trend,network_volatility,block_production_rate,block_size_trend,block_weight_trend,market_structure_score,liquidity_score,volatility_score
0,2025-09-25 18:41:07.923720+00:00,BTC/USDT,1.0,71.0,50.204581,0.3,-3.140185e-16,0.0,0.0,0.0,0.0,244.296541,1.102052e-11,-4.336665e-10,0.49,0.0,0.001673


In [4]:
# Load the onchain signals JSON
with open('../data/outputs/onchain_signals.json', 'r') as f:
    signals_data = json.load(f)

# Convert to DataFrame
df_signals = pd.DataFrame(signals_data)

# Convert timestamp to datetime
df_signals['timestamp'] = pd.to_datetime(df_signals['timestamp'])

# Convert numeric columns
numeric_columns = ['signal_strength', 'confidence', 'prediction_hours']
for col in numeric_columns:
    if col in df_signals.columns:
        df_signals[col] = pd.to_numeric(df_signals[col], errors='coerce')

# Display the complete signals dataframe
print("Complete OnChain Signals:")
print("=" * 80)
df_signals


Complete OnChain Signals:


Unnamed: 0,timestamp,symbol,signal_type,signal_strength,confidence,reasoning,features_used,prediction_hours
0,2025-09-25 18:41:07.923720+00:00,BTC/USDT,NEUTRAL,0.2,0.3,High mempool congestion; Low network activity,"[mempool_congestion_score, network_activity_sc...",1.0


In [None]:
# Show mempool data specifically
print("Mempool Data Only:")
print("=" * 40)
mempool_data = df_onchain[['timestamp', 'mempool_count', 'mempool_vsize', 'mempool_total_fee_btc']]
mempool_data


In [None]:
# Show fee data specifically
print("Fee Data Only:")
print("=" * 40)
fee_data = df_onchain[['timestamp', 'fee_fastest_satvB', 'fee_30min_satvB', 'fee_60min_satvB', 'fee_economy_satvB', 'fee_min_satvB']]
fee_data


In [None]:
# Show network statistics
print("Network Statistics:")
print("=" * 40)
network_data = df_onchain[['timestamp', 'bc_blocks', 'bc_transactions', 'bc_mempool_transactions', 'bc_hashrate_24h', 'bc_difficulty']]
network_data


In [None]:
# Show feature scores
print("Feature Scores:")
print("=" * 40)
feature_scores = df_features[['timestamp', 'mempool_congestion_score', 'fee_pressure_score', 'network_activity_score', 'market_structure_score']]
feature_scores


In [1]:

# Show recent signals
print("Recent Signals:")
print("=" * 40)
recent_signals = df_signals[['timestamp', 'signal_type', 'signal_strength', 'confidence', 'reasoning']]
recent_signals


Recent Signals:


NameError: name 'df_signals' is not defined