# Data Acquisition for Quantitative Trading System

This notebook demonstrates data acquisition from NSE/Yahoo Finance for Indian markets.

**Objectives:**
1. Fetch historical data for NIFTY and stocks
2. Clean and validate data
3. Save processed data for further analysis
4. Perform initial quality checks

In [None]:
# Import libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from src.data_acquisition import DataAcquisition, fetch_multiple_symbols
from src.utils import load_config, print_summary_stats

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
plt.style.use('seaborn-v0_8-darkgrid')

print("Libraries imported successfully!")

## 1. Load Configuration

In [None]:
# Load configuration
config = load_config('../configs/config.yaml')

symbol = config['data']['symbol']
start_date = config['data']['start_date']
end_date = config['data']['end_date']
source = config['data']['source']

print(f"Configuration loaded:")
print(f"  Symbol: {symbol}")
print(f"  Date Range: {start_date} to {end_date}")
print(f"  Source: {source}")

## 2. Fetch NIFTY Data

In [None]:
# Initialize data acquisition
da = DataAcquisition(
    symbol=symbol,
    start_date=start_date,
    end_date=end_date,
    source=source
)

# Fetch data
print(f"Fetching data for {symbol}...")
df = da.fetch_data()

print(f"\nData fetched successfully!")
print(f"Shape: {df.shape}")
print(f"\nFirst few rows:")
df.head()

## 3. Data Quality Check

In [None]:
# Summary statistics
print_summary_stats(df, f"{symbol} Data Summary")

# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Check data types
print("\nData Types:")
print(df.dtypes)

In [None]:
# Descriptive statistics
df.describe()

## 4. Visualize Raw Data

In [None]:
# Plot price over time
fig, axes = plt.subplots(2, 1, figsize=(15, 10))

# Price
axes[0].plot(df.index, df['close'], linewidth=1.5)
axes[0].set_title(f'{symbol} Close Price', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Price', fontsize=12)
axes[0].grid(True, alpha=0.3)

# Volume
if 'volume' in df.columns:
    axes[1].bar(df.index, df['volume'], width=1, alpha=0.7)
    axes[1].set_title(f'{symbol} Trading Volume', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date', fontsize=12)
    axes[1].set_ylabel('Volume', fontsize=12)
    axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../plots/01_raw_data.png', dpi=300, bbox_inches='tight')
plt.show()

print("Plot saved to plots/01_raw_data.png")

## 5. Save Processed Data

In [None]:
# Save to CSV
output_path = f'../data/raw/{symbol}_data.csv'
da.save_data(output_path, format='csv')
print(f"Data saved to {output_path}")

# Also save as parquet for faster loading
output_path_parquet = f'../data/raw/{symbol}_data.parquet'
da.save_data(output_path_parquet, format='parquet')
print(f"Data saved to {output_path_parquet}")

## 6. Summary Report

In [None]:
# Generate summary
summary = da.get_summary()

print("\n" + "="*60)
print("DATA ACQUISITION SUMMARY")
print("="*60)
print(f"\nSymbol: {summary['symbol']}")
print(f"Date Range: {summary['start_date'].date()} to {summary['end_date'].date()}")
print(f"Total Trading Days: {summary['total_days']}")
print(f"\nPrice Range:")
print(f"  Min: {summary['price_range']['min']:.2f}")
print(f"  Max: {summary['price_range']['max']:.2f}")
print(f"  Mean: {summary['price_range']['mean']:.2f}")
if summary['volume_stats']['mean']:
    print(f"\nAverage Volume: {summary['volume_stats']['mean']:,.0f}")
print(f"\nOutliers Detected: {summary['outliers']}")
print("="*60)

## 7. Next Steps

✓ Data acquired and validated  
→ Proceed to **02_eda_feature_engineering.ipynb** for:
- Exploratory Data Analysis
- EMA and technical indicators
- Feature engineering