# NIFTY Data Analysis Workflow

This notebook demonstrates a comprehensive workflow for:
1. Fetching NIFTY data for 2025 using Kite Connect API
2. Saving the data as CSV
3. Analyzing candlestick patterns
4. Generating comprehensive reports

The workflow uses Pydantic for data validation and orchestration.

In [None]:
# Import required libraries
import sys
import os
import pandas as pd
from datetime import datetime, timedelta
from typing import Optional, Dict, Any, List
import logging
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

# Add parent directory to path for imports
sys.path.insert(0, os.path.join(os.path.dirname('.'), '..'))

from data_fetcher import KiteConnectDataFetcher
from candlestick_patterns import CandlestickPatternAnalyzer
from pydantic import BaseModel, Field

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("✅ Libraries imported successfully")

## Step 1: Initialize Components

In [None]:
# Initialize workflow components
data_fetcher = KiteConnectDataFetcher()
pattern_analyzer = CandlestickPatternAnalyzer()

# Ensure data directory exists
data_dir = Path("../../data")
data_dir.mkdir(exist_ok=True)

print("✅ Components initialized successfully")
print(f"📁 Data directory: {data_dir.absolute()}")

## Step 2: Fetch NIFTY Data for 2025

In [None]:
# Define date range for 2025
from_date = "2025-01-01"
to_date = "2025-12-31"

print(f"📊 Fetching NIFTY data from {from_date} to {to_date}")

# Fetch historical data
df = data_fetcher.get_historical_data(
    from_date=from_date,
    to_date=to_date,
    interval='day'
)

if df.empty:
    print("⚠️  No data fetched for the specified date range")
else:
    print(f"✅ Successfully fetched {len(df)} daily candles")
    print(f"📅 Date range: {df['date'].min()} to {df['date'].max()}")
    print(f"💰 Price range: {df['low'].min():.2f} - {df['high'].max():.2f}")
    
    # Display first few rows
    display(df.head())

## Step 3: Save Data as CSV

In [None]:
# Save to CSV
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"nifty_daily_2025_{timestamp}.csv"
csv_path = data_dir / csv_filename

# Ensure column names are correct
if 'date' in df.columns:
    df = df.rename(columns={
        'date': 'Date',
        'open': 'Open',
        'high': 'High',
        'low': 'Low',
        'close': 'Close',
        'volume': 'Volume'
    })

df.to_csv(csv_path, index=False)
print(f"💾 Data saved to: {csv_path}")
print(f"📁 File size: {csv_path.stat().st_size / 1024:.1f} KB")

## Step 4: Analyze Candlestick Patterns

In [None]:
# Analyze patterns
print("🔍 Analyzing candlestick patterns...")
df_with_patterns = pattern_analyzer.analyze_patterns(df)

# Get pattern summary
pattern_summary = pattern_analyzer.get_pattern_summary(df_with_patterns)

print(f"✅ Pattern analysis completed for {len(df_with_patterns)} candles")
print("\n📊 Pattern Summary:")
for pattern, count in pattern_summary.items():
    if count > 0:
        print(f"  - {pattern}: {count} occurrences")

# Display rows with patterns
pattern_rows = df_with_patterns[df_with_patterns['pattern'] != '']
if not pattern_rows.empty:
    print(f"\n🎯 Found {len(pattern_rows)} candles with patterns:")
    display(pattern_rows[['Date', 'Open', 'High', 'Low', 'Close', 'pattern']].head(10))

## Step 5: Generate Pattern Analysis Report

In [None]:
# Get pattern dates for each pattern type
pattern_dates = {}
for pattern_name in pattern_summary.keys():
    dates = pattern_analyzer.get_pattern_dates(df_with_patterns, pattern_name)
    pattern_dates[pattern_name] = dates

# Save results
output_filename = f"nifty_pattern_analysis_{timestamp}.csv"
output_path = data_dir / output_filename
df_with_patterns.to_csv(output_path, index=False)

print(f"💾 Pattern analysis results saved to: {output_path}")

# Generate comprehensive summary
total_patterns = sum(pattern_summary.values())
total_candles = len(df_with_patterns)
pattern_percentage = (total_patterns / total_candles * 100) if total_candles > 0 else 0

print("\n" + "="*60)
print("NIFTY CANDLESTICK PATTERN ANALYSIS SUMMARY")
print("="*60)
print(f"Total candles analyzed: {total_candles}")
print(f"Total patterns found: {total_patterns}")
print(f"Pattern occurrence rate: {pattern_percentage:.2f}%")
print("\nPattern Breakdown:")

for pattern, count in pattern_summary.items():
    if count > 0:
        print(f"- {pattern}: {count} occurrences")
        if pattern in pattern_dates and pattern_dates[pattern]:
            print(f"  Dates: {', '.join(pattern_dates[pattern][:5])}")
            if len(pattern_dates[pattern]) > 5:
                print(f"  ... and {len(pattern_dates[pattern]) - 5} more")

# Price movement analysis
if not df.empty:
    price_change = df['Close'].iloc[-1] - df['Close'].iloc[0]
    price_change_pct = (price_change / df['Close'].iloc[0]) * 100
    print(f"\nPrice Movement Analysis:")
    print(f"- Start price: {df['Close'].iloc[0]:.2f}")
    print(f"- End price: {df['Close'].iloc[-1]:.2f}")
    print(f"- Total change: {price_change:.2f} ({price_change_pct:+.2f}%)")
    print(f"- Highest price: {df['High'].max():.2f}")
    print(f"- Lowest price: {df['Low'].min():.2f}")

print("="*60)

## Step 6: Visualize Results

In [None]:
# Create visualizations
plt.style.use('seaborn-v0_8')
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Price chart with patterns
ax1 = axes[0, 0]
ax1.plot(df['Date'], df['Close'], label='Close Price', linewidth=2)
ax1.set_title('NIFTY Close Price (2025)', fontsize=14, fontweight='bold')
ax1.set_xlabel('Date')
ax1.set_ylabel('Price')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. Pattern distribution
ax2 = axes[0, 1]
patterns_with_counts = {k: v for k, v in pattern_summary.items() if v > 0}
if patterns_with_counts:
    ax2.bar(patterns_with_counts.keys(), patterns_with_counts.values(), color='skyblue')
    ax2.set_title('Pattern Distribution', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Pattern Type')
    ax2.set_ylabel('Count')
    plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

# 3. Volume analysis
ax3 = axes[1, 0]
ax3.bar(df['Date'], df['Volume'], alpha=0.7, color='lightgreen')
ax3.set_title('Trading Volume', fontsize=14, fontweight='bold')
ax3.set_xlabel('Date')
ax3.set_ylabel('Volume')
ax3.grid(True, alpha=0.3)

# 4. Price range (High-Low)
ax4 = axes[1, 1]
price_range = df['High'] - df['Low']
ax4.plot(df['Date'], price_range, color='orange', linewidth=2)
ax4.set_title('Daily Price Range (High - Low)', fontsize=14, fontweight='bold')
ax4.set_xlabel('Date')
ax4.set_ylabel('Price Range')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("📊 Visualizations generated successfully!")

## Step 7: Summary and Next Steps

In [None]:
print("🎉 NIFTY Data Analysis Workflow Completed Successfully!")
print("\n📁 Generated Files:")
print(f"  - Raw data: {csv_path}")
print(f"  - Pattern analysis: {output_path}")
print("\n📊 Key Insights:")
print(f"  - Analyzed {len(df)} daily candles")
print(f"  - Found {sum(pattern_summary.values())} candlestick patterns")
print(f"  - Pattern occurrence rate: {(sum(pattern_summary.values()) / len(df) * 100):.2f}%")

print("\n🚀 Next Steps:")
print("  - Review specific pattern dates for trading signals")
print("  - Analyze pattern effectiveness in different market conditions")
print("  - Extend analysis to other timeframes (hourly, weekly)")
print("  - Integrate with additional technical indicators")