<a href="https://colab.research.google.com/github/mridul-sahu/advance_tax_calculations/blob/main/Stocks_Calculations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import pandas as pd
import numpy as np
import requests
import io
from typing import Tuple, Dict, Any, List
from datetime import datetime, timedelta

# ==============================================================================
# --- 1. CONFIGURATION & HELPERS ---
# ==============================================================================

def clean_currency(value: Any) -> float:
    """Removes currency symbols and commas from a string, then converts to float."""
    if isinstance(value, str):
        return float(value.replace('$', '').replace(',', ''))
    return float(value)

def get_inr_conversion_rate(transaction_date: datetime, rates: Dict[str, float], warnings: List) -> Tuple[str, float]:
    """
    Finds the TTBR for a transaction based on Indian tax rules. If the exact
    month-end date is not available, it uses the most recent previous date.
    Logs a warning when a fallback is used.
    """
    # Go to the previous month and find the last calendar day
    rate_date = transaction_date - pd.DateOffset(months=1)
    rate_date_eomonth = rate_date + pd.offsets.MonthEnd(0)

    # Search backwards from the month-end date for the latest available rate
    current_date = rate_date_eomonth
    for _ in range(7): # Check up to 7 days back to handle long weekends/holidays
        rate_key = current_date.strftime('%Y-%m-%d')
        if rate_key in rates:
            # If the date used is not the exact month-end, log a warning
            if current_date != rate_date_eomonth:
                warnings.append({
                    'Required Date': rate_date_eomonth.strftime('%Y-%m-%d'),
                    'Fallback Date Used': rate_key,
                    'Rate': rates[rate_key],
                    'Reason': 'Exact month-end rate not available (likely holiday/weekend).'
                })
            return rate_key, rates[rate_key]
        current_date -= timedelta(days=1)

    # If no rate is found after checking back, fail with a clear error
    raise ValueError(f"CRITICAL: Missing TTBR rate for required date: '{rate_date_eomonth.strftime('%Y-%m-%d')}' or any recent prior day. "
                     f"Please ensure the source CSV contains a valid rate for this period.")

# ==============================================================================
# --- 2. DATA LOADING AND PREPARATION ---
# ==============================================================================

def download_and_load_ttbr_rates(url: str) -> Dict[str, float]:
    """
    Downloads the TTBR rates CSV from the given URL and loads it into a
    dictionary for fast lookups.
    """
    print(f"Downloading TTBR rates from {url}...")
    try:
        raw_url = url.replace('github.com', 'raw.githubusercontent.com').replace('/blob/', '/')
        response = requests.get(raw_url)
        response.raise_for_status()

        rates_csv = io.StringIO(response.text)
        rates_df = pd.read_csv(rates_csv)

        rates_df['DATE'] = pd.to_datetime(rates_df['DATE'])

        print("✅ TTBR rates downloaded successfully.")
        return pd.Series(rates_df['TT BUY'].values, index=rates_df['DATE'].dt.strftime('%Y-%m-%d')).to_dict()
    except requests.exceptions.RequestException as e:
        raise ConnectionError(f"Failed to download the exchange rate file. Error: {e}") from e
    except KeyError as e:
        raise KeyError(f"The exchange rate CSV does not have the expected columns ('DATE', 'TT BUY'). Error: {e}")

def load_and_clean_data(sales_file: str, acq_file: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    try:
        sales_df = pd.read_csv(sales_file, skiprows=2, skipfooter=1, engine='python')
        acq_df = pd.read_csv(acq_file, skiprows=1, skipfooter=1, engine='python')
    except FileNotFoundError as e:
        raise FileNotFoundError(f"File not found: {e}. Ensure CSVs are in the correct path.") from e
    sales_df.dropna(how='all', inplace=True)
    acq_df.dropna(how='all', inplace=True)
    sales_df.columns = ['Sale_Date', 'Sale_Price', 'Shares_Sold', 'Symbol', 'Gross_Proceeds', 'Acquisition_Date_in_Report']
    acq_df.columns = ['Vest_Date', 'Order_Number', 'Plan', 'Type', 'Status', 'Acquisition_Price', 'Quantity', 'Net_Cash_Proceeds', 'Shares_Acquired', 'Tax_Payment_Method']
    for col in ['Sale_Price', 'Gross_Proceeds', 'Acquisition_Price', 'Net_Cash_Proceeds']:
        df = sales_df if col in sales_df.columns else acq_df
        df[col] = df[col].apply(clean_currency)
    sales_df['Shares_Sold'] = pd.to_numeric(sales_df['Shares_Sold'])
    acq_df['Shares_Acquired'] = pd.to_numeric(acq_df['Shares_Acquired'])
    sales_df['Sale_Date'] = pd.to_datetime(sales_df['Sale_Date'])
    acq_df['Vest_Date'] = pd.to_datetime(acq_df['Vest_Date'])
    return sales_df.sort_values(by='Sale_Date').reset_index(drop=True), acq_df.sort_values(by='Vest_Date').reset_index(drop=True)

# ==============================================================================
# --- 3. CORE CALCULATION LOGIC ---
# ==============================================================================

def perform_fifo_matching(sales_df: pd.DataFrame, acq_df: pd.DataFrame, ttbr_rates: Dict) -> Tuple[pd.DataFrame, pd.DataFrame, Dict, List]:
    """
    Matches sales to acquisitions using FIFO and calculates profit/loss in both
    USD and accurately converted INR using date-specific TTBRs.
    """
    acquisitions_info = acq_df[['Vest_Date', 'Acquisition_Price', 'Shares_Acquired']].copy()
    acquisitions_info.rename(columns={'Vest_Date': 'Acquisition_Date'}, inplace=True)
    acquisitions_info['Remaining_Shares'] = acquisitions_info['Shares_Acquired']

    results_list = []
    used_rates = {}
    warnings = []
    current_acq_index = 0

    for sale in sales_df.itertuples():
        shares_to_match = sale.Shares_Sold
        for acq_index in range(current_acq_index, len(acquisitions_info)):
            if shares_to_match <= 1e-4: break
            if acquisitions_info.loc[acq_index, 'Acquisition_Date'] > sale.Sale_Date: continue

            shares_from_lot = min(shares_to_match, acquisitions_info.loc[acq_index, 'Remaining_Shares'])
            if shares_from_lot > 0:
                acq = acquisitions_info.loc[acq_index]

                sale_rate_key, sale_rate = get_inr_conversion_rate(sale.Sale_Date, ttbr_rates, warnings)
                acq_rate_key, acq_rate = get_inr_conversion_rate(acq.Acquisition_Date, ttbr_rates, warnings)
                used_rates[acq_rate_key] = acq_rate
                used_rates[sale_rate_key] = sale_rate

                cost_of_acquisition_inr = (acq.Acquisition_Price * shares_from_lot) * acq_rate
                sale_proceeds_inr = (sale.Sale_Price * shares_from_lot) * sale_rate
                profit_loss_inr = sale_proceeds_inr - cost_of_acquisition_inr

                holding_duration = (sale.Sale_Date - acq.Acquisition_Date).days

                results_list.append({
                    'Sale_Date': sale.Sale_Date, 'Shares_Sold': shares_from_lot,
                    'Sale_Price_USD': sale.Sale_Price, 'Acquisition_Price_USD': acq.Acquisition_Price,
                    'Sale_TTBR': sale_rate, 'Acquisition_TTBR': acq_rate,
                    'Sale_Proceeds_INR': sale_proceeds_inr, 'Cost_of_Acquisition_INR': cost_of_acquisition_inr,
                    'Profit/Loss (INR)': profit_loss_inr,
                    'Holding Duration (Days)': holding_duration,
                    'Gain_Type': 'LTCG' if holding_duration > 730 else 'STCG'
                })

                acquisitions_info.loc[acq_index, 'Remaining_Shares'] -= shares_from_lot
                shares_to_match -= shares_from_lot

            if acquisitions_info.loc[acq_index, 'Remaining_Shares'] <= 1e-4:
                current_acq_index += 1

    summary_df = pd.DataFrame(results_list)
    acquisitions_info['Shares_Sold_from_Lot'] = acquisitions_info['Shares_Acquired'] - acquisitions_info['Remaining_Shares']
    return summary_df, acquisitions_info, used_rates, warnings


def calculate_tax_liability(df: pd.DataFrame) -> Dict[str, Any]:
    stcg_tax_rate, ltcg_tax_rate, surcharge_rate, cess_rate = 0.30, 0.125, 0.15, 0.04
    stcg = df[(df['Gain_Type'] == 'STCG') & (df['Profit/Loss (INR)'] > 0)]['Profit/Loss (INR)'].sum()
    stcl = abs(df[(df['Gain_Type'] == 'STCG') & (df['Profit/Loss (INR)'] < 0)]['Profit/Loss (INR)'].sum())
    ltcg = df[(df['Gain_Type'] == 'LTCG') & (df['Profit/Loss (INR)'] > 0)]['Profit/Loss (INR)'].sum()
    ltcl = abs(df[(df['Gain_Type'] == 'LTCG') & (df['Profit/Loss (INR)'] < 0)]['Profit/Loss (INR)'].sum())
    ltcg_after_ltcl = max(0, ltcg - ltcl)
    stcg_after_stcl = max(0, stcg - stcl)
    stcl_remaining_after_stcg = max(0, stcl - stcg)
    ltcg_after_all_setoffs = max(0, ltcg_after_ltcl - stcl_remaining_after_stcg)
    net_taxable_ltcg, net_taxable_stcg = ltcg_after_all_setoffs, stcg_after_stcl
    total_base_tax = (net_taxable_stcg * stcg_tax_rate) + (net_taxable_ltcg * ltcg_tax_rate)
    total_surcharge = total_base_tax * surcharge_rate
    total_cess = (total_base_tax + total_surcharge) * cess_rate
    total_tax_liability = total_base_tax + total_surcharge + total_cess
    return { "stcg": stcg, "stcl": stcl, "ltcg": ltcg, "ltcl": ltcl, "net_taxable_stcg": net_taxable_stcg, "net_taxable_ltcg": net_taxable_ltcg, "total_base_tax": total_base_tax, "total_surcharge": total_surcharge, "total_cess": total_cess, "total_tax_liability": total_tax_liability }

def calculate_advance_tax_schedule(summary_df: pd.DataFrame) -> pd.DataFrame:
    fy_start_year = pd.Timestamp.now().year if pd.Timestamp.now().month >= 4 else pd.Timestamp.now().year - 1
    q_end_dates_str = [f'15-06-{fy_start_year}', f'15-09-{fy_start_year}', f'15-12-{fy_start_year}', f'31-03-{fy_start_year + 1}']
    q_ends = [pd.to_datetime(d, format='%d-%m-%Y') for d in q_end_dates_str]
    due_dates_str = [f'15-06-{fy_start_year}', f'15-09-{fy_start_year}', f'15-12-{fy_start_year}', f'15-03-{fy_start_year + 1}']
    due_dates = [pd.to_datetime(d, format='%d-%m-%Y').date() for d in due_dates_str]
    cum_tax_q1 = calculate_tax_liability(summary_df[summary_df['Sale_Date'] <= q_ends[0]])['total_tax_liability']
    cum_tax_q2 = calculate_tax_liability(summary_df[summary_df['Sale_Date'] <= q_ends[1]])['total_tax_liability']
    cum_tax_q3 = calculate_tax_liability(summary_df[summary_df['Sale_Date'] <= q_ends[2]])['total_tax_liability']
    cum_tax_q4 = calculate_tax_liability(summary_df[summary_df['Sale_Date'] <= q_ends[3]])['total_tax_liability']
    paid_so_far = 0
    payment_q1 = cum_tax_q1 * 0.15; paid_so_far += payment_q1
    payment_q2 = (cum_tax_q2 * 0.45) - paid_so_far; paid_so_far += payment_q2
    payment_q3 = (cum_tax_q3 * 0.75) - paid_so_far; paid_so_far += payment_q3
    payment_q4 = (cum_tax_q4 * 1.00) - paid_so_far
    schedule = pd.DataFrame({'Installment Due Date': due_dates,'Amount to Pay (INR)': [payment_q1, payment_q2, payment_q3, payment_q4]})
    schedule['Amount to Pay (INR)'] = schedule['Amount to Pay (INR)'].clip(lower=0)
    return schedule

# ==============================================================================
# --- 4. VALIDATION ---
# ==============================================================================

def perform_validations(sales_df: pd.DataFrame, acq_df: pd.DataFrame, summary_df: pd.DataFrame, acq_status_df: pd.DataFrame, tax_data: Dict) -> Dict:
    print("\n--- Running Final Calculation Validations ---")
    sanity_errors = False
    if sales_df.empty:
        print("🟡 Sanity Check: Capital Gains Report is empty."); sanity_errors = True
    if acq_df.empty:
        print("❌ Sanity Check Fail: Releases Report is empty, cannot process sales."); sanity_errors = True
    if (sales_df['Shares_Sold'] < 0).any():
        print("❌ Sanity Check Fail: Negative values found in 'Shares_Sold'."); sanity_errors = True
    if (acq_df['Shares_Acquired'] < 0).any():
        print("❌ Sanity Check Fail: Negative values in 'Shares_Acquired'."); sanity_errors = True
    total_shares_sold_original = sales_df['Shares_Sold'].sum()
    total_shares_sold_summary = summary_df['Shares_Sold'].sum()
    share_match = np.isclose(total_shares_sold_original, total_shares_sold_summary)
    print(f"Share Count Match (Original vs Summary): {'✅ Pass' if share_match else '❌ Fail'}")
    oversold = (acq_status_df['Remaining_Shares'] < -1e-4).any()
    print(f"Overselling Check (No negative shares): {'✅ Pass' if not oversold else '❌ Fail'}")
    tax_check = np.isclose(tax_data['total_base_tax'] + tax_data['total_surcharge'] + tax_data['total_cess'], tax_data['total_tax_liability'])
    print(f"Tax Calculation Integrity (Components Sum to Total): {'✅ Pass' if tax_check else '❌ Fail'}")
    print("-----------------------------------------")
    return { "Sanity Checks": 'Pass' if not sanity_errors else 'Fail', "Share Match": 'Pass' if share_match else 'Fail', "Overselling": 'Pass' if not oversold else 'Fail', "Tax Integrity": 'Pass' if tax_check else 'Fail' }

# ==============================================================================
# --- 5. EXCEL REPORT GENERATION ---
# ==============================================================================

def generate_excel_report(summary_df, acq_status_df, tax_data, schedule_df, used_rates_df, warnings_df, sales_df, acq_df, validation_results, output_file):
    """Writes all the calculated dataframes to a formatted, multi-sheet Excel file."""
    tax_explanation_text = [
        ("How Your Tax Is Calculated: A Step-by-Step Guide", ""),
        ("", ""),
        ("Step 1: Convert all USD Transactions to INR", "The most critical step for accuracy is converting all USD amounts to INR using the official method prescribed by Indian Tax Law."),
        ("   - The Rule (Rule 115):", "The USD amount must be converted using the Telegraphic Transfer Buying Rate (TTBR) as published by the State Bank of India (SBI)."),
        ("   - The Date:", "The specific rate to be used is the one from the *last day of the month immediately preceding the month* of the transaction. For a sale on 10-Jul-2025, the rate for 30-Jun-2025 is used."),
        ("   - In this report:", "The 'Profit Loss Summary' sheet shows the exact TTBR used for every acquisition and sale, ensuring full transparency."),
        ("", ""),
        ("Step 2: Calculate Gross Gains and Losses", "The script calculates the total profit (gains) and loss for both Short-Term and Long-Term transactions based on the accurately converted INR values."),
        ("Step 3: Apply Tax Set-Off Rules", "Losses are used to offset gains in a specific order to determine the final taxable income."),
        ("   - Long-Term Capital Loss (LTCL):", "Can ONLY be set off against Long-Term Capital Gains (LTCG)."),
        ("   - Short-Term Capital Loss (STCL):", "Is first set off against Short-Term Capital Gains (STCG). If any STCL remains, it can then be set off against the remaining LTCG."),
        ("Step 4: Calculate Base Tax", "The final tax is calculated on the 'Net Taxable Gains' after the set-off rules have been applied."),
        ("   - Tax on Net STCG:", "Calculated at 30% (assumed highest income tax slab rate)."),
        ("   - Tax on Net LTCG:", "Calculated at 12.5%."),
        ("Step 5: Add Surcharge & Cess", "A 15% surcharge (assumed) and a 4% cess are applied to the base tax to arrive at the final liability."),
        ("Result:", "This gives the 'TOTAL TAX LIABILITY', which is the amount used to calculate your advance tax installments.")
    ]

    set_off_summary = pd.DataFrame({'Category': ['Short-Term', 'Long-Term'],'Gross Gains (INR)': [tax_data['stcg'], tax_data['ltcg']],'Gross Losses (INR)': [tax_data['stcl'], tax_data['ltcl']],'Net Taxable Gains (INR)': [tax_data['net_taxable_stcg'], tax_data['net_taxable_ltcg']]})
    tax_summary = pd.DataFrame({'Description': ['Total Base Tax', 'Total Surcharge @ 15%', 'Total Health & Education Cess @ 4%', 'TOTAL TAX LIABILITY'],'Amount (INR)': [tax_data['total_base_tax'], tax_data['total_surcharge'], tax_data['total_cess'], tax_data['total_tax_liability']]})
    notes_list = [("NOTES:", ""), ("1. Exchange Rate:", "Date-specific TTBRs downloaded and used as per Indian Tax Law."), ("", "Please ensure the source GitHub file is up-to-date."), ("2. Surcharge:", "A 15% rate is assumed. Adjust if your income bracket is different."), ("", ""), ("VALIDATION SUMMARY:", ""), ("Input Sanity Checks:", f"{validation_results['Sanity Checks']}"), ("Share Count Match:", f"{validation_results['Share Match']}"), ("Overselling Check:", f"{validation_results['Overselling']}"), ("Tax Integrity Check:", f"{validation_results['Tax Integrity']}")]
    notes_df = pd.DataFrame(notes_list)

    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        summary_df.to_excel(writer, sheet_name='Profit Loss Summary', index=False)
        pd.DataFrame(tax_explanation_text).to_excel(writer, sheet_name='Tax Calculation Explained', index=False, header=False)
        if not warnings_df.empty:
            warnings_df.to_excel(writer, sheet_name='TTBR Warnings', index=False)

        current_row = 0
        pd.DataFrame([['TAX SET-OFF CALCULATION']]).to_excel(writer, sheet_name='Tax Calculation', index=False, header=False, startrow=current_row); current_row += 1
        set_off_summary.to_excel(writer, sheet_name='Tax Calculation', index=False, startrow=current_row); current_row += len(set_off_summary) + 2
        pd.DataFrame([['FINAL TAX LIABILITY']]).to_excel(writer, sheet_name='Tax Calculation', index=False, header=False, startrow=current_row); current_row += 1
        tax_summary.to_excel(writer, sheet_name='Tax Calculation', index=False, startrow=current_row); current_row += len(tax_summary) + 2
        pd.DataFrame([['ADVANCE TAX PAYMENT SCHEDULE (Cumulative Method)']]).to_excel(writer, sheet_name='Tax Calculation', index=False, header=False, startrow=current_row); current_row += 1
        schedule_df.to_excel(writer, sheet_name='Tax Calculation', index=False, startrow=current_row); current_row += len(schedule_df) + 2
        notes_df.to_excel(writer, sheet_name='Tax Calculation', index=False, header=False, startrow=current_row)

        used_rates_df.to_excel(writer, sheet_name='TTBR Rates Used', index=False)
        sales_df.to_excel(writer, sheet_name='Original Sales Report', index=False)
        acq_df.to_excel(writer, sheet_name='Original Releases Report', index=False)

        acq_status_df = acq_status_df[['Acquisition_Date', 'Shares_Acquired', 'Shares_Sold_from_Lot', 'Remaining_Shares']]
        acq_status_df.to_excel(writer, sheet_name='Acquisition Lot Status', index=False)

    print(f"\n✅ Success! The '{output_file}' has been created with all validations.")

# ==============================================================================
# --- 6. MAIN EXECUTION ---
# ==============================================================================

def main():
    """Main function to run the entire capital gains and tax calculation process."""
    capital_gains_file = 'Capital Gains Report.csv'
    releases_file = 'Releases Report.csv'
    ttbr_rates_url = 'https://github.com/sahilgupta/sbi-fx-ratekeeper/blob/main/csv_files/SBI_REFERENCE_RATES_USD.csv'
    output_excel_file = 'capital_gains_summary_final.xlsx'

    try:
        # Step 1: Load all data
        ttbr_rates = download_and_load_ttbr_rates(ttbr_rates_url)
        sales_df, acq_df = load_and_clean_data(capital_gains_file, releases_file)

        # Step 2: Perform calculations
        summary_df, acq_status_df, used_rates, warnings = perform_fifo_matching(sales_df, acq_df, ttbr_rates)
        used_rates_df = pd.DataFrame(list(used_rates.items()), columns=['Date', 'TTBR']).sort_values('Date')
        warnings_df = pd.DataFrame(warnings).drop_duplicates().reset_index(drop=True)
        tax_data = calculate_tax_liability(summary_df)
        advance_tax_schedule = calculate_advance_tax_schedule(summary_df)

        # Step 3: Run all validations and print report
        validation_results = perform_validations(sales_df, acq_df, summary_df, acq_status_df, tax_data)

        # Step 4: Generate the final Excel report
        generate_excel_report(
            summary_df, acq_status_df, tax_data, advance_tax_schedule, used_rates_df, warnings_df,
            sales_df, acq_df, validation_results, output_excel_file
        )
    except (FileNotFoundError, ValueError, KeyError, ConnectionError, Exception) as e:
        print(f"\n❌ An unexpected error occurred: {e}")
        print("Please check your input files and the script configuration.")

In [35]:
main()

Downloading TTBR rates from https://github.com/sahilgupta/sbi-fx-ratekeeper/blob/main/csv_files/SBI_REFERENCE_RATES_USD.csv...
✅ TTBR rates downloaded successfully.

--- Running Final Calculation Validations ---
Share Count Match (Original vs Summary): ✅ Pass
Overselling Check (No negative shares): ✅ Pass
Tax Calculation Integrity (Components Sum to Total): ✅ Pass
-----------------------------------------

✅ Success! The 'capital_gains_summary_final.xlsx' has been created with all validations.
