# Crypto Price Prediction - Data Exploration

This notebook explores the data, features, and preliminary results.


In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yaml

from data_fetcher import BinanceDataFetcher
from feature_engineering import FeatureEngine

# Set style
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (14, 6)

%load_ext autoreload
%autoreload 2


: 

## 1. Load Configuration


In [None]:
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration:")
print(f"  Train symbol: {config['data']['train_symbol']}")
print(f"  Test symbol: {config['data']['test_symbol']}")
print(f"  Days: {config['data']['days']}")
print(f"  Forward prediction: {config['target']['forward_minutes']} minutes")
print(f"  Thresholds: {config['target']['down_threshold']:.3f} to {config['target']['up_threshold']:.3f}")


: 

## 2. Fetch Raw Data


In [None]:
fetcher = BinanceDataFetcher(cache_dir=config['data']['cache_dir'])

# Fetch BTC data
btc_df = fetcher.fetch_and_cache(
    symbol=config['data']['train_symbol'],
    days=config['data']['days'],
    interval=config['data']['interval'],
)

print(f"\nBTC data shape: {btc_df.shape}")
btc_df.head()


## 3. Visualize Raw Data


In [None]:
fig, axes = plt.subplots(3, 1, figsize=(16, 12))

# Price
axes[0].plot(btc_df.index, btc_df['close'], label='Close Price', linewidth=0.8)
axes[0].set_title(f"{config['data']['train_symbol']} - Close Price", fontsize=14, fontweight='bold')
axes[0].set_ylabel('Price (USDT)')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Volume
axes[1].bar(btc_df.index, btc_df['volume'], width=0.0007, label='Volume', alpha=0.7)
axes[1].set_title('Trading Volume', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Volume')
axes[1].legend()
axes[1].grid(alpha=0.3)

# Returns
returns = btc_df['close'].pct_change()
axes[2].plot(btc_df.index, returns, label='Returns', linewidth=0.5, alpha=0.7)
axes[2].axhline(y=0, color='black', linestyle='--', linewidth=0.8)
axes[2].set_title('Returns', fontsize=14, fontweight='bold')
axes[2].set_ylabel('Return (%)')
axes[2].set_xlabel('Time')
axes[2].legend()
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.show()


: 

## Next Steps

Now that we've set up the notebook structure, you can run the cells to explore the data.

For a complete research pipeline, run:
1. `python data_fetcher.py --symbol BTCUSDT --days 30`
2. `python feature_engineering.py`
3. `python train.py --model transformer`
4. `python evaluate.py --checkpoint checkpoints/best_model.pt`
