In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.config import get_config
from src.data.data_loader import StockDataLoader
from src.utils.visualization import StockVisualizer

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
pd.set_option('display.max_columns', None)

## 1. Load Configuration and Data

In [None]:
# Load configuration
config = get_config()

# Initialize data loader
data_loader = StockDataLoader(config.project_root)

# Load all stocks data
df_all = data_loader.load_all_stocks(config.get_path('data.all_stocks_path'))

print(f"Dataset shape: {df_all.shape}")
print(f"Date range: {df_all['date'].min()} to {df_all['date'].max()}")
print(f"Number of unique stocks: {df_all['name'].nunique()}")

In [None]:
# Display first few rows
df_all.head(10)

## 2. Data Summary and Statistics

In [None]:
# Get data summary
summary = data_loader.get_data_summary(df_all)
print("\nDataset Summary:")
for key, value in summary.items():
    print(f"\n{key}:")
    print(value)

In [None]:
# Statistical description
df_all[['open', 'high', 'low', 'close', 'volume']].describe()

## 3. Analyze Individual Stock - Apple (AAPL)

In [None]:
# Load AAPL data
df_aapl = data_loader.load_individual_stock('AAPL', config.get_path('data.individual_stocks_dir'))

print(f"AAPL records: {len(df_aapl)}")
print(f"Date range: {df_aapl['date'].min()} to {df_aapl['date'].max()}")
print(f"\nPrice statistics:")
print(df_aapl['close'].describe())

In [None]:
# Visualize AAPL price history
visualizer = StockVisualizer()
visualizer.plot_price_history(df_aapl, ticker='AAPL')
plt.show()

## 4. Price Distribution Analysis

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Close price distribution
axes[0, 0].hist(df_aapl['close'], bins=50, edgecolor='black')
axes[0, 0].set_title('AAPL Close Price Distribution')
axes[0, 0].set_xlabel('Price ($)')
axes[0, 0].set_ylabel('Frequency')

# Daily returns
returns = df_aapl['close'].pct_change()
axes[0, 1].hist(returns.dropna(), bins=50, edgecolor='black', color='orange')
axes[0, 1].set_title('Daily Returns Distribution')
axes[0, 1].set_xlabel('Return')
axes[0, 1].set_ylabel('Frequency')

# Volume distribution
axes[1, 0].hist(df_aapl['volume'], bins=50, edgecolor='black', color='green')
axes[1, 0].set_title('Volume Distribution')
axes[1, 0].set_xlabel('Volume')
axes[1, 0].set_ylabel('Frequency')

# Price over time
axes[1, 1].plot(df_aapl['date'], df_aapl['close'])
axes[1, 1].set_title('Price Over Time')
axes[1, 1].set_xlabel('Date')
axes[1, 1].set_ylabel('Price ($)')

plt.tight_layout()
plt.show()

## 5. Compare Multiple Stocks

In [None]:
# Select top tech stocks
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'FB']
stock_data = {}

for ticker in tickers:
    try:
        stock_data[ticker] = data_loader.load_individual_stock(ticker, config.get_path('data.individual_stocks_dir'))
    except FileNotFoundError:
        print(f"Data not found for {ticker}")

# Plot normalized prices
plt.figure(figsize=(15, 8))

for ticker, df in stock_data.items():
    # Normalize to start at 100
    normalized = (df['close'] / df['close'].iloc[0]) * 100
    plt.plot(df['date'], normalized, label=ticker, linewidth=2)

plt.title('Normalized Stock Price Comparison (Base = 100)', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Normalized Price')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 6. Volatility Analysis

In [None]:
# Calculate rolling volatility (30-day window)
df_aapl['returns'] = df_aapl['close'].pct_change()
df_aapl['volatility'] = df_aapl['returns'].rolling(window=30).std() * np.sqrt(252) * 100

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))

# Price
ax1.plot(df_aapl['date'], df_aapl['close'], linewidth=2)
ax1.set_ylabel('Price ($)')
ax1.set_title('AAPL Price', fontweight='bold')
ax1.grid(True, alpha=0.3)

# Volatility
ax2.plot(df_aapl['date'], df_aapl['volatility'], color='red', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Annualized Volatility (%)')
ax2.set_title('30-Day Rolling Volatility', fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Key Insights

Based on the exploratory analysis:
1. Dataset contains 5 years of daily OHLCV data
2. Stock prices show various trends and patterns
3. Volatility varies significantly over time
4. Volume patterns correlate with price movements
5. Different stocks show different growth trajectories

These insights will guide feature engineering and model development.