# Cryptocurrency Stock Analysis

This notebook analyzes cryptocurrency market data.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline


## Load Data


In [None]:
# Load cryptocurrency datasets (sample large datasets for performance)
df_crypto = pd.read_csv('datasets/crypto stock/cryptocurrency.csv')
df_stocks = pd.read_csv('datasets/crypto stock/stocks.csv')

print("Original dataset shapes:")
print(f"Cryptocurrency: {df_crypto.shape}")
print(f"Stocks: {df_stocks.shape}")

# Sample large datasets to improve performance
if len(df_crypto) > 50000:
    df_crypto = df_crypto.sample(50000, random_state=42)
    print(f"\nSampled cryptocurrency data to 50,000 rows for performance")
    
if len(df_stocks) > 50000:
    df_stocks = df_stocks.sample(50000, random_state=42)
    print(f"Sampled stocks data to 50,000 rows for performance")


## Data Exploration


In [None]:
print("=== Cryptocurrency Data ===")
display(df_crypto.head(10))
print("\nColumn info:")
df_crypto.info()
print("\nStatistical Summary:")
display(df_crypto.describe())


In [None]:
print("=== Stocks Data ===")
display(df_stocks.head(10))
print("\nColumn info:")
df_stocks.info()
print("\nStatistical Summary:")
display(df_stocks.describe())


## Data Preprocessing and Visualization


In [None]:
# Convert date columns to datetime
for df, name in [(df_crypto, 'cryptocurrency'), (df_stocks, 'stocks')]:
    for col in df.columns:
        if 'date' in col.lower() or 'time' in col.lower():
            try:
                df[col] = pd.to_datetime(df[col])
                print(f"Converted {col} to datetime in {name} dataset")
            except:
                pass


In [None]:
# Visualization (sample data if dataset is too large)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Cryptocurrency Market Analysis', fontsize=16, fontweight='bold')

date_col = next((col for col in df_crypto.columns if 'date' in col.lower()), None)
price_cols = [col for col in df_crypto.columns if any(x in col.lower() for x in ['close', 'price', 'high', 'low'])]
volume_cols = [col for col in df_crypto.columns if 'volume' in col.lower()]

# Sample data if dataset is very large to speed up plotting
df_sample = df_crypto.sample(min(10000, len(df_crypto))) if len(df_crypto) > 10000 else df_crypto
if date_col:
    df_sample = df_sample.sort_values(date_col)

if date_col and price_cols:
    axes[0, 0].plot(df_sample[date_col], df_sample[price_cols[0]], linewidth=1, alpha=0.7)
    axes[0, 0].set_title('Price Over Time (Sample)')
    axes[0, 0].set_xlabel('Date')
    axes[0, 0].set_ylabel('Price')
    axes[0, 0].tick_params(axis='x', rotation=45)
    axes[0, 0].grid(True, alpha=0.3)

if price_cols:
    axes[0, 1].hist(df_crypto[price_cols[0]].dropna(), bins=50, edgecolor='black', alpha=0.7)
    axes[0, 1].set_title('Price Distribution')
    axes[0, 1].set_xlabel('Price')
    axes[0, 1].set_ylabel('Frequency')

if date_col and volume_cols:
    volume_data = pd.to_numeric(df_sample[volume_cols[0]], errors='coerce')
    axes[1, 0].plot(df_sample[date_col], volume_data, color='orange', linewidth=1, alpha=0.7)
    axes[1, 0].set_title('Trading Volume Over Time (Sample)')
    axes[1, 0].set_xlabel('Date')
    axes[1, 0].set_ylabel('Volume')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(True, alpha=0.3)

numeric_cols = df_crypto.select_dtypes(include=[np.number]).columns
if len(numeric_cols) > 1 and len(numeric_cols) <= 10:
    correlation = df_crypto[numeric_cols].corr()
    sns.heatmap(correlation, annot=True, fmt='.2f', cmap='coolwarm', ax=axes[1, 1], center=0)
    axes[1, 1].set_title('Correlation Matrix')

plt.tight_layout()
plt.show()


## Volatility Analysis


In [None]:
# Calculate returns if we have time series data
if price_cols and date_col:
    df_crypto_sorted = df_crypto.sort_values(date_col)
    df_crypto_sorted['returns'] = df_crypto_sorted[price_cols[0]].pct_change()
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 5))
    
    axes[0].plot(df_crypto_sorted[date_col], df_crypto_sorted['returns'], alpha=0.7)
    axes[0].set_title('Daily Returns Over Time')
    axes[0].set_xlabel('Date')
    axes[0].set_ylabel('Returns')
    axes[0].tick_params(axis='x', rotation=45)
    axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    
    axes[1].hist(df_crypto_sorted['returns'].dropna(), bins=50, edgecolor='black', alpha=0.7)
    axes[1].set_title('Distribution of Returns')
    axes[1].set_xlabel('Returns')
    axes[1].set_ylabel('Frequency')
    axes[1].axvline(x=0, color='r', linestyle='--', alpha=0.5)
    
    plt.tight_layout()
    plt.show()
    
    print("Volatility Metrics:")
    print(f"Average Daily Return: {df_crypto_sorted['returns'].mean():.4f} ({df_crypto_sorted['returns'].mean()*100:.2f}%)")
    print(f"Std Dev of Returns: {df_crypto_sorted['returns'].std():.4f}")


## Key Insights


In [None]:
# Summary statistics
if price_cols:
    price_col = price_cols[0]
    print("=== Cryptocurrency Market Summary ===")
    print(f"Total Records: {len(df_crypto)}")
    print(f"Average Price: ${df_crypto[price_col].mean():.2f}")
    print(f"Median Price: ${df_crypto[price_col].median():.2f}")
    print(f"Price Range: ${df_crypto[price_col].min():.2f} - ${df_crypto[price_col].max():.2f}")
    print(f"Standard Deviation: ${df_crypto[price_col].std():.2f}")
    
    if date_col:
        print(f"\nDate Range: {df_crypto[date_col].min()} to {df_crypto[date_col].max()}")
    
    if volume_cols:
        print(f"\nAverage Trading Volume: {df_crypto[volume_cols[0]].mean():,.0f}")
        print(f"Total Trading Volume: {df_crypto[volume_cols[0]].sum():,.0f}")
