# Cryptocurrency Data Exploration

This notebook performs initial data analysis and visualization of cryptocurrency data.

## Contents
1. Data Loading and Initial Inspection
2. Data Quality Assessment
3. Basic Statistical Analysis
4. Price Movement Analysis
5. Volume Analysis
6. Correlation Studies


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)


## 1. Data Loading and Initial Inspection


In [None]:
## Load data using our data collector
from src.data_collection.data_collector import DataCollector

async def load_data():
    collector = DataCollector(coins=['bitcoin'])
    data = await collector.collect_all_data()
    return data

# Call the async function and retrieve data
import asyncio
data = asyncio.run(load_data())

# Display first few rows
data['bitcoin']['binance'].head()


## 2. Data Quality Assessment


In [None]:
def assess_data_quality(df):
    """Assess data quality metrics"""
    quality_report = {
        'missing_values': df.isnull().sum(),
        'duplicates': df.duplicated().sum(),
        'data_types': df.dtypes,
        'unique_values': df.nunique(),
        'memory_usage': df.memory_usage(deep=True)
    }
    return pd.DataFrame(quality_report)

quality_assessment = assess_data_quality(data['bitcoin']['binance'])
quality_assessment


## 3. Basic Statistical Analysis


In [None]:
# Calculate basic statistics
df = data['bitcoin']['binance']
stats = df.describe()

# Calculate additional metrics
stats.loc['skew'] = df.skew()
stats.loc['kurtosis'] = df.kurtosis()

stats


## 4. Price Movement Analysis


In [None]:
def plot_price_analysis(df):
    """Plot price movement analysis"""
    fig = make_subplots(rows=2, cols=1, shared_xaxis=True)
    
    # Candlestick chart
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df['open'],
            high=df['high'],
            low=df['low'],
            close=df['close']
        ),
        row=1, col=1
    )
    
    # Daily returns
    daily_returns = df['close'].pct_change()
    fig.add_trace(
        go.Scatter(x=df.index, y=daily_returns, name='Daily Returns'),
        row=2, col=1
    )
    
    fig.update_layout(height=800, title='Price Movement Analysis')
    return fig

fig = plot_price_analysis(df)
fig.show()


## 5. Volume Analysis


In [None]:
def analyze_volume(df):
    """Analyze trading volume patterns"""
    fig = make_subplots(rows=2, cols=1)
    
    # Volume over time
    fig.add_trace(
        go.Bar(x=df.index, y=df['volume'], name='Volume'),
        row=1, col=1
    )
    
    # Volume moving average
    volume_ma = df['volume'].rolling(window=20).mean()
    fig.add_trace(
        go.Scatter(x=df.index, y=volume_ma, name='Volume MA20'),
        row=1, col=1
    )
    
    # Volume-price correlation
    fig.add_trace(
        go.Scatter(
            x=df['close'],
            y=df['volume'],
            mode='markers',
            name='Volume vs Price'
        ),
        row=2, col=1
    )
    
    fig.update_layout(height=800, title='Volume Analysis')
    return fig

fig = analyze_volume(df)
fig.show()


## 6. Correlation Studies


In [None]:
# Calculate correlation matrix
corr_matrix = df.corr()

# Plot correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()
