# Cryptocurrency Data Exploration

This notebook explores the data collected for our trading bot.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set plotting style
plt.style.use('ggplot')
sns.set_theme()

# Display all dataframe columns
pd.set_option('display.max_columns', None)

## Load Data

Let's load some data from our processed data directory.

In [None]:
# Specify exchange, symbol, and timeframe
exchange = 'binance'
symbol = 'BTC_USDT'
timeframe = '1m'

# Load data
try:
    data_path = Path(f"../data/processed/{exchange}/{symbol}/{timeframe}.csv")
    if data_path.exists():
        df = pd.read_csv(data_path, index_col='timestamp', parse_dates=True)
        print(f"Loaded {len(df)} rows from {data_path}")
    else:
        print(f"Data file {data_path} not found. Please run data collection first.")
        df = None
except Exception as e:
    print(f"Error loading data: {str(e)}")
    df = None

## Explore Data

If we have loaded data, let's examine it.

In [None]:
if df is not None:
    # Display basic information
    print("\nDataset Info:")
    print(f"Date Range: {df.index.min()} to {df.index.max()}")
    print(f"Number of Rows: {len(df)}")
    print(f"Columns: {df.columns.tolist()}")
    
    # Display sample data
    print("\nSample Data:")
    display(df.head())
    
    # Display summary statistics
    print("\nSummary Statistics:")
    display(df.describe())

## Price and Volume Analysis

In [None]:
if df is not None:
    # Plot price and volume
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
    
    # Plot price
    ax1.plot(df.index, df['close'], label='Close Price')
    ax1.set_ylabel('Price')
    ax1.set_title(f'{symbol} Price')
    ax1.legend()
    ax1.grid(True)
    
    # Plot volume
    ax2.bar(df.index, df['volume'], width=0.6, alpha=0.5, label='Volume')
    ax2.set_ylabel('Volume')
    ax2.set_title(f'{symbol} Volume')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.show()

## Technical Indicators

In [None]:
if df is not None and 'RSI_14' in df.columns and 'MACDh_12_26_9' in df.columns:
    # Plot RSI and MACD
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 12), sharex=True)
    
    # Plot price
    ax1.plot(df.index, df['close'], label='Close Price')
    ax1.set_ylabel('Price')
    ax1.set_title(f'{symbol} Price')
    ax1.legend()
    ax1.grid(True)
    
    # Plot RSI
    ax2.plot(df.index, df['RSI_14'], label='RSI')
    ax2.axhline(y=70, color='r', linestyle='--', alpha=0.3)
    ax2.axhline(y=30, color='g', linestyle='--', alpha=0.3)
    ax2.set_ylabel('RSI')
    ax2.set_title('RSI (14)')
    ax2.legend()
    ax2.grid(True)
    
    # Plot MACD Histogram
    ax3.bar(df.index, df['MACDh_12_26_9'], width=0.6, alpha=0.5, label='MACD Histogram')
    ax3.set_ylabel('MACD Histogram')
    ax3.set_title('MACD Histogram')
    ax3.legend()
    ax3.grid(True)
    
    plt.tight_layout()
    plt.show()

## Correlation Analysis

In [None]:
if df is not None:
    # Select numerical columns for correlation analysis
    numeric_df = df.select_dtypes(include=[np.number])
    
    # Calculate correlation matrix
    corr_matrix = numeric_df.corr()
    
    # Plot correlation heatmap
    plt.figure(figsize=(14, 12))
    sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title('Feature Correlation Matrix')
    plt.tight_layout()
    plt.show()

In [None]:
if df is not None:
    # Feature distributions
    feature_cols = [
        'bb_pos', 'RSI_14', 'MACDh_12_26_9', 'trend_strength', 
        'volatility', 'volume_ratio', 'range', 'rsi_diff'
    ]
    
    if all(col in df.columns for col in feature_cols):
        fig, axes = plt.subplots(len(feature_cols)//2, 2, figsize=(14, 3*len(feature_cols)//2))
        axes = axes.flatten()
        
        for i, col in enumerate(feature_cols):
            sns.histplot(df[col], kde=True, ax=axes[i])
            axes[i].set_title(f'Distribution of {col}')
            axes[i].grid(True)
        
        plt.tight_layout()
        plt.show()

## Next Steps

Based on this initial exploration, here are some next steps to consider:

1. Implement the feature target creation for model training
2. Analyze the distribution of potential trades based on our profit target
3. Select the most promising features for our initial model
4. Implement the Bayesian model and train it on this data
5. Analyze model performance through backtesting