In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib.dates as mdates
from datetime import datetime
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import warnings

warnings.filterwarnings("ignore")


class CandlestickChartGenerator:
    def __init__(self, csv_file_path):
        """
        Initialize with CSV file path
        """
        self.df = self.load_csv_data(csv_file_path)

    def load_csv_data(self, csv_file_path):
        """
        Load and process CSV data
        """
        try:
            # Read CSV file
            df = pd.read_csv(csv_file_path)

            # Clean column names (remove extra spaces)
            df.columns = df.columns.str.strip()

            # Convert Date column to datetime with multiple format support
            df["Date"] = self.parse_dates(df["Date"])

            # Clean and convert numeric columns
            numeric_columns = ["Price", "Open", "High", "Low"]
            for col in numeric_columns:
                if col in df.columns:
                    df[col] = df[col].astype(str).str.replace(",", "").astype(float)

            # Process Volume column
            if "Vol." in df.columns:
                df["Volume"] = df["Vol."].apply(self.convert_volume)

            # Process Change % column
            if "Change %" in df.columns:
                df["Change_Percent"] = df["Change %"].str.replace("%", "").astype(float)

            # Rename columns for consistency
            column_mapping = {
                "Price": "Close",
                "Vol.": "Volume_Raw",
                "Change %": "Change_Percent",
            }
            df = df.rename(columns=column_mapping)

            # Sort by date
            df = df.sort_values("Date")
            df.reset_index(drop=True, inplace=True)

            print(f"Successfully loaded {len(df)} records from CSV")
            print(
                f"Date range: {df['Date'].min().strftime('%d/%m/%Y')} to {df['Date'].max().strftime('%d/%m/%Y')}"
            )

            return df

        except Exception as e:
            print(f"Error loading CSV file: {e}")
            return pd.DataFrame()

    def parse_dates(self, date_series):
        """
        Parse dates with multiple format support
        """
        # Common date formats to try
        date_formats = [
            "%d/%m/%y",  # 25/02/25
            "%d-%m-%Y",  # 25-02-2025
            "%d/%m/%Y",  # 25/02/2025
            "%d-%m-%y",  # 25-02-25
            "%Y-%m-%d",  # 2025-02-25
            "%m/%d/%Y",  # 02/25/2025
            "%m-%d-%Y",  # 02-25-2025
            "%d.%m.%Y",  # 25.02.2025
            "%d.%m.%y",  # 25.02.25
        ]

        # Try each format
        for fmt in date_formats:
            try:
                return pd.to_datetime(date_series, format=fmt)
            except ValueError:
                continue

        # If none of the specific formats work, try pandas' automatic parsing
        try:
            return pd.to_datetime(date_series, dayfirst=True)
        except:
            # Last resort - try with infer_datetime_format
            try:
                return pd.to_datetime(date_series, infer_datetime_format=True)
            except:
                print(
                    f"Could not parse dates. Sample dates: {date_series.head().tolist()}"
                )
                raise ValueError("Unable to parse date format")

    def convert_volume(self, volume_str):
        """
        Convert volume string (e.g., '250.28M') to numeric value
        """
        try:
            volume_str = str(volume_str).strip()
            if "M" in volume_str:
                return float(volume_str.replace("M", "")) * 1000000
            elif "K" in volume_str:
                return float(volume_str.replace("K", "")) * 1000
            else:
                return float(volume_str.replace(",", ""))
        except:
            return 0

    def filter_by_month(self, month_name):
        """
        Filter data for a specific month across all years
        """
        month_dict = {
            "january": 1,
            "february": 2,
            "march": 3,
            "april": 4,
            "may": 5,
            "june": 6,
            "july": 7,
            "august": 8,
            "september": 9,
            "october": 10,
            "november": 11,
            "december": 12,
        }

        month_num = month_dict.get(month_name.lower())
        if month_num is None:
            raise ValueError(f"Invalid month name: {month_name}")

        filtered_df = self.df[self.df["Date"].dt.month == month_num].copy()
        return filtered_df

    def detect_trend_reversals(self, df):
        """
        Detect trend reversals in the price data
        """
        if len(df) < 3:
            return []

        reversals = []

        # Calculate moving averages for trend detection
        df_copy = df.copy()
        df_copy["MA_short"] = df_copy["Close"].rolling(window=3, min_periods=1).mean()
        df_copy["MA_long"] = df_copy["Close"].rolling(window=5, min_periods=1).mean()

        # Detect trend changes
        for i in range(2, len(df_copy)):
            prev_trend = (
                df_copy.iloc[i - 1]["MA_short"] > df_copy.iloc[i - 1]["MA_long"]
            )
            curr_trend = df_copy.iloc[i]["MA_short"] > df_copy.iloc[i]["MA_long"]

            # Check for trend reversal
            if prev_trend != curr_trend:
                reversals.append(df_copy.iloc[i]["Date"])

        # Alternative method: Price swing highs and lows
        for i in range(1, len(df_copy) - 1):
            curr_high = df_copy.iloc[i]["High"]
            curr_low = df_copy.iloc[i]["Low"]
            prev_high = df_copy.iloc[i - 1]["High"]
            prev_low = df_copy.iloc[i - 1]["Low"]
            next_high = df_copy.iloc[i + 1]["High"]
            next_low = df_copy.iloc[i + 1]["Low"]

            # Local high (potential reversal from uptrend to downtrend)
            if curr_high > prev_high and curr_high > next_high:
                reversals.append(df_copy.iloc[i]["Date"])

            # Local low (potential reversal from downtrend to uptrend)
            if curr_low < prev_low and curr_low < next_low:
                reversals.append(df_copy.iloc[i]["Date"])

        # Remove duplicates and sort
        reversals = sorted(list(set(reversals)))
        return reversals

    def create_candlestick_chart(self, df, month_name):
        """
        Create a candlestick chart for the given data
        """
        if df.empty:
            print(f"No data found for {month_name}")
            return None

        fig, ax = plt.subplots(figsize=(15, 10))

        # Main candlestick chart
        for i, row in df.iterrows():
            date = row["Date"]
            open_price = row["Open"]
            high = row["High"]
            low = row["Low"]
            close = row["Close"]

            # Determine color
            color = "green" if close >= open_price else "red"

            # Draw the high-low line
            ax.plot([date, date], [low, high], color="black", linewidth=1)

            # Draw the body rectangle
            height = abs(close - open_price)
            bottom = min(open_price, close)

            rect = Rectangle(
                (mdates.date2num(date) - 0.3, bottom),
                0.6,
                height,
                facecolor=color,
                edgecolor="black",
                alpha=0.7,
            )
            ax.add_patch(rect)

        # Detect and draw trend reversals
        reversals = self.detect_trend_reversals(df)

        # Draw vertical lines for trend reversals
        for reversal_date in reversals:
            ax.axvline(
                x=reversal_date,
                color="blue",
                linestyle="--",
                linewidth=2,
                alpha=0.7,
                label="Trend Reversal",
            )

        # Add legend for trend reversals (only once)
        if reversals:
            ax.plot(
                [],
                [],
                color="blue",
                linestyle="--",
                linewidth=2,
                label=f"Trend Reversals ({len(reversals)})",
            )

        # Format the main chart
        ax.set_title(
            f"{month_name.title()} Historical Candlestick Chart with Trend Reversals",
            fontsize=16,
            fontweight="bold",
        )
        ax.set_ylabel("Price", fontsize=12)
        ax.set_xlabel("Date", fontsize=12)
        ax.grid(True, alpha=0.3)

        # Format x-axis
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
        ax.xaxis.set_major_locator(mdates.DayLocator(interval=max(1, len(df) // 10)))
        plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

        # Add legend
        ax.legend(loc="upper left")

        plt.tight_layout()
        return fig

    def generate_monthly_report(self, month_name, output_filename=None):
        """
        Generate a comprehensive monthly report with candlestick charts
        """
        if output_filename is None:
            output_filename = f"{month_name.lower()}_candlestick_report.pdf"

        # Filter data for the specific month
        monthly_data = self.filter_by_month(month_name)

        if monthly_data.empty:
            print(f"No data found for {month_name}")
            return

        # Group by year for multiple years of data
        years = monthly_data["Date"].dt.year.unique()

        with PdfPages(output_filename) as pdf:
            # Create overview page
            fig, ax = plt.subplots(figsize=(12, 8))
            ax.axis("off")

            # Title
            ax.text(
                0.5,
                0.9,
                f"{month_name.title()} Historical Data Report",
                ha="center",
                va="center",
                fontsize=20,
                fontweight="bold",
            )

            # Summary statistics
            summary_text = f"""
            Total Trading Days: {len(monthly_data)}
            Date Range: {monthly_data['Date'].min().strftime('%d/%m/%Y')} to {monthly_data['Date'].max().strftime('%d/%m/%Y')}
            
            Price Statistics:
            Highest Price: {monthly_data['High'].max():,.2f}
            Lowest Price: {monthly_data['Low'].min():,.2f}
            Average Close: {monthly_data['Close'].mean():,.2f}
            Average Open: {monthly_data['Open'].mean():,.2f}
            
            Trend Analysis:
            """

            # Add trend reversal count
            reversals = self.detect_trend_reversals(monthly_data)
            summary_text += f"""
            Total Trend Reversals: {len(reversals)}
            """

            summary_text += f"""
            
            Years Covered: {', '.join(map(str, sorted(years)))}
            """

            ax.text(
                0.1,
                0.6,
                summary_text,
                ha="left",
                va="top",
                fontsize=12,
                bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"),
            )

            pdf.savefig(fig, bbox_inches="tight")
            plt.close()

            # Create charts for each year
            for year in sorted(years):
                year_data = monthly_data[monthly_data["Date"].dt.year == year]

                if not year_data.empty:
                    fig = self.create_candlestick_chart(
                        year_data, f"{month_name} {year}"
                    )
                    if fig:
                        pdf.savefig(fig, bbox_inches="tight")
                        plt.close()

            # Create combined chart if multiple years
            if len(years) > 1:
                fig = self.create_candlestick_chart(
                    monthly_data, f"{month_name} (All Years)"
                )
                if fig:
                    pdf.savefig(fig, bbox_inches="tight")
                    plt.close()

        print(f"Report generated successfully: {output_filename}")

    def display_data_info(self):
        """
        Display information about the loaded data
        """
        if self.df.empty:
            print("No data loaded")
            return

        print("\n=== DATA SUMMARY ===")
        print(f"Total records: {len(self.df)}")
        print(
            f"Date range: {self.df['Date'].min().strftime('%d/%m/%Y')} to {self.df['Date'].max().strftime('%d/%m/%Y')}"
        )
        print(f"Columns: {list(self.df.columns)}")

        print("\n=== AVAILABLE MONTHS ===")
        months = [
            "January",
            "February",
            "March",
            "April",
            "May",
            "June",
            "July",
            "August",
            "September",
            "October",
            "November",
            "December",
        ]

        for i, month in enumerate(months, 1):
            month_data = self.df[self.df["Date"].dt.month == i]
            if not month_data.empty:
                years = sorted(month_data["Date"].dt.year.unique())
                print(
                    f"{month}: {len(month_data)} records across {len(years)} years ({years})"
                )

        print("\n=== SAMPLE DATA ===")
        print(self.df.head())

In [8]:
# Create chart generator with CSV file
generator = CandlestickChartGenerator(
    "/Users/mayurgd/Documents/CodingSpace/market_analysis/graph_generator/Nifty 50 Historical Data.csv"
)

# Display data information
generator.display_data_info()

# Generate report for February (since sample data is February)
generator.generate_monthly_report("June", "june_candlestick_report.pdf")

Successfully loaded 5000 records from CSV
Date range: 03/01/2005 to 25/02/2025

=== DATA SUMMARY ===
Total records: 5000
Date range: 03/01/2005 to 25/02/2025
Columns: ['Date', 'Close', 'Open', 'High', 'Low', 'Volume_Raw', 'Change_Percent', 'Volume', 'Change_Percent']

=== AVAILABLE MONTHS ===
January: 445 records across 21 years ([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025])
February: 415 records across 21 years ([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025])
March: 412 records across 20 years ([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024])
April: 378 records across 20 years ([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024])
May: 426 records across 20 years ([2005, 2006, 2007, 2008