In [None]:
import pandas as pd
import xlrd
from datetime import datetime

# Load the Excel file, set header row to 3 (index 3)
file_path = r"3621128_CapitalGainLoss_Details_for_Equity.xls"
df = pd.read_excel(file_path, header=3)

# Rename columns using the first row as header
df.columns = df.iloc[0]
df = df.drop(df.index[0]).reset_index(drop=True)

# Drop any rows that are summary/footers or have missing 'SELL TRANSACTION DATE'
df = df[df['SELL TRANSACTION DATE'].notna()]

# Ensure date column is in datetime format
df['SELL TRANSACTION DATE'] = pd.to_datetime(df['SELL TRANSACTION DATE'], errors='coerce')

# Define deadline
deadline = datetime(2024, 7, 23)

# Classify period
df['Period'] = df['SELL TRANSACTION DATE'].apply(
    lambda x: 'Before 23rd July 2024' if x < deadline else 'On or After 23rd July 2024'
)

# Define helper function to compute ITR2 fields
def compute_itr2_fields(sub_df):
    # Ensure numeric conversion for calculations
    sub_df['Full Value of Consideration'] = pd.to_numeric(sub_df['SELL TRANSACTION VALUE'], errors='coerce')
    sub_df['Cost of Acquisition'] = pd.to_numeric(sub_df['BUY TRANSACTION VALUE'], errors='coerce')
    sub_df['Expenses'] = 0  # Replace with actual column if available
    sub_df['Net Gain'] = sub_df['Full Value of Consideration'] - sub_df['Cost of Acquisition'] - sub_df['Expenses']
    sub_df['Deduction under Section 54'] = 0  # Usually zero for equity
    sub_df['Final Gain'] = sub_df['Net Gain'] - sub_df['Deduction under Section 54']
    return sub_df

# Filter Short-Term and Long-Term separately
st_df = df[df['CAPITAL GAIN/LOSS SHORT TERM'].astype(float) != 0]
lt_df = df[df['CAPITAL GAIN/LOSS LONG TERM'].astype(float) != 0]

# Compute ITR2 fields
st_df = compute_itr2_fields(st_df)
lt_df = compute_itr2_fields(lt_df)

# Group summaries
st_summary = st_df.groupby('Period')[[
    'Full Value of Consideration',
    'Cost of Acquisition',
    'Expenses',
    'Net Gain',
    'Deduction under Section 54',
    'Final Gain'
]].sum().reset_index()

lt_summary = lt_df.groupby('Period')[[
    'Full Value of Consideration',
    'Cost of Acquisition',
    'Expenses',
    'Net Gain',
    'Deduction under Section 54',
    'Final Gain'
]].sum().reset_index()

# Rename for clarity
st_summary.columns = ['Period', 'STCG_Consideration', 'STCG_Cost', 'STCG_Expenses', 'STCG_Net', 'STCG_Deduction', 'STCG_Final']
lt_summary.columns = ['Period', 'LTCG_Consideration', 'LTCG_Cost', 'LTCG_Expenses', 'LTCG_Net', 'LTCG_Deduction', 'LTCG_Final']

# Merge both summaries
itr2_summary = pd.merge(st_summary, lt_summary, on='Period', how='outer').fillna(0)

# Display final summary
print("\n📄 ITR2 Summary (Section 111A & 112A):")
print(itr2_summary)

# Optional: Save to Excel
# itr2_summary.to_excel("ITR2_Capital_Gains_Summary.xlsx", index=False)


📄 ITR2 Summary (Section 111A & 112A):
                       Period  STCG_Consideration   STCG_Cost  STCG_Expenses  \
0       Before 23rd July 2024          8004125.18  7917342.03              0   
1  On or After 23rd July 2024          2711814.58  2964927.73              0   

    STCG_Net  STCG_Deduction  STCG_Final  LTCG_Consideration  LTCG_Cost  \
0   86783.15               0    86783.15            43592.67   28677.92   
1 -253113.15               0  -253113.15                0.00       0.00   

   LTCG_Expenses  LTCG_Net  LTCG_Deduction  LTCG_Final  
0            0.0  14914.75             0.0    14914.75  
1            0.0      0.00             0.0        0.00  


  df['SELL TRANSACTION DATE'] = pd.to_datetime(df['SELL TRANSACTION DATE'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['Full Value of Consideration'] = pd.to_numeric(sub_df['SELL TRANSACTION VALUE'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['Cost of Acquisition'] = pd.to_numeric(sub_df['BUY TRANSACTION VALUE'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas