In [4]:
#4Company_combos_Quantity

import pandas as pd
from itertools import combinations

# ==========================================================
#                      USER INPUTS
# ==========================================================

input_file = input("\nEnter input Excel file path: ").strip()
output_file = input("Enter output Excel file path: ").strip()

def get_float(prompt):
    while True:
        try:
            return float(input(prompt).strip())
        except:
            print("Invalid value! Please enter a numeric value.")

def get_int(prompt):
    while True:
        try:
            return int(input(prompt).strip())
        except:
            print("Invalid value! Please enter an integer.")

INVESTMENT = get_float("\nEnter investment amount per company (example: 50000): ")
COMBO_SIZE = get_int("Enter number of companies per combination (example: 4): ")
OBSERVATION_DAYS = get_int("Enter number of observation days (example: 45): ")
MONTH_ROLLING = get_int("Enter number of months to skip before starting: ")

# Rolling type (year/month/week/day)
while True:
    ROLL_TYPE = input("Enter rolling type (year/month/week/day): ").strip().lower()
    if ROLL_TYPE in ["year", "month", "week", "day"]:
        break
    print("Invalid type! Enter year, month, week, or day.")

INCREMENT = get_int("Enter rolling increment (example: 1): ")

# ==========================================================
#                     LOAD DATA
# ==========================================================

print("\nLoading file...")
df = pd.read_excel(input_file)
df.columns = df.columns.str.strip()

# Detect date column
date_col_candidates = [c for c in df.columns if "date" in c.lower()]
if not date_col_candidates:
    raise ValueError("DATE column not found!")

DATE_COL = date_col_candidates[0]
df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors="coerce", dayfirst=True)
df = df.dropna(subset=[DATE_COL]).sort_values(DATE_COL)

# Detect numeric company columns
company_names = [col for col in df.columns if col != DATE_COL and pd.api.types.is_numeric_dtype(df[col])]
if not company_names:
    raise ValueError("No numeric company columns found!")

print(f"\nDetected {len(company_names)} companies")

# ==========================================================
#     START DATE AFTER SKIPPING FIRST X MONTHS
# ==========================================================

start_date = df[DATE_COL].min() + pd.DateOffset(months=MONTH_ROLLING)
current = start_date.replace(day=1)

print(f"\nDataset start: {df[DATE_COL].min().date()}")
print(f"Processing begins after {MONTH_ROLLING} months: {current.date()}")
print(f"Rolling type: {ROLL_TYPE} | Increment: {INCREMENT}")

# ==========================================================
#     FUNCTION: ROLLING STEP CALCULATION
# ==========================================================

def get_next_date(current):
    if ROLL_TYPE == "year":
        return current + pd.DateOffset(years=INCREMENT)
    elif ROLL_TYPE == "month":
        return current + pd.DateOffset(months=INCREMENT)
    elif ROLL_TYPE == "week":
        return current + pd.DateOffset(weeks=INCREMENT)
    elif ROLL_TYPE == "day":
        return current + pd.DateOffset(days=INCREMENT)

# ==========================================================
#             WRITE MULTIPLE SHEETS 
# ==========================================================

with pd.ExcelWriter(output_file, engine='openpyxl') as writer:

    last_date = df[DATE_COL].max()
    sheet_counter = 1  

    while current <= last_date:

        observe_start = current
        observe_end = current + pd.Timedelta(days=OBSERVATION_DAYS)

        data = df[(df[DATE_COL] >= observe_start) & (df[DATE_COL] <= observe_end)]

        if data.empty:
            current = get_next_date(current)
            continue

        selected_date = data[DATE_COL].min()
        row = data[data[DATE_COL] == selected_date].iloc[0]

        print(f"\nSheet: Week{sheet_counter} | Using date: {selected_date.date()}")

        combos_output = [
            [
                ",".join(combo),
                ",".join(
                    "NA" if pd.isna(row[c]) or row[c] == 0 else f"{INVESTMENT / row[c]:.2f}"
                    for c in combo
                ),
                selected_date
            ]
            for combo in combinations(company_names, COMBO_SIZE)
        ]

        out_df = pd.DataFrame(combos_output, columns=["Combo", "Quantities", "Calculation_Date"])

        sheet_name = f"Week{sheet_counter}"
        out_df.to_excel(writer, sheet_name=sheet_name, index=False)

        sheet_counter += 1
        current = get_next_date(current)

print("\nPROCESS COMPLETED SUCCESSFULLY!")
print("Saved at:", output_file)



Enter input Excel file path: C:\Users\Swarupa\Downloads\NIFTY50_CLOSE_MASTER.xlsx
Enter output Excel file path: D:\feb24_to_jan26\Combo_quantity.xlsx

Enter investment amount per company (example: 50000): 50000
Enter number of companies per combination (example: 4): 4
Enter number of observation days (example: 45): 45
Enter number of months to skip before starting: 3
Enter rolling type (year/month/week/day): week
Enter rolling increment (example: 1): 1

Loading file...

Detected 50 companies

Dataset start: 2024-02-14
Processing begins after 3 months: 2024-05-01
Rolling type: week | Increment: 1

Sheet: Week1 | Using date: 2024-05-02


Exception ignored in: <function ZipFile.__del__ at 0x000002A5F1E85D00>
Traceback (most recent call last):
  File "C:\Users\Swarupa\anaconda3\envs\ttp_env1\Lib\zipfile.py", line 1908, in __del__
    self.close()
  File "C:\Users\Swarupa\anaconda3\envs\ttp_env1\Lib\zipfile.py", line 1925, in close
    self.fp.seek(self.start_dir)
ValueError: seek of closed file



Sheet: Week2 | Using date: 2024-05-08

Sheet: Week3 | Using date: 2024-05-15

Sheet: Week4 | Using date: 2024-05-22

Sheet: Week5 | Using date: 2024-05-29

Sheet: Week6 | Using date: 2024-06-05

Sheet: Week7 | Using date: 2024-06-12

Sheet: Week8 | Using date: 2024-06-19

Sheet: Week9 | Using date: 2024-06-26

Sheet: Week10 | Using date: 2024-07-03

Sheet: Week11 | Using date: 2024-07-10

Sheet: Week12 | Using date: 2024-07-18

Sheet: Week13 | Using date: 2024-07-24

Sheet: Week14 | Using date: 2024-07-31

Sheet: Week15 | Using date: 2024-08-07

Sheet: Week16 | Using date: 2024-08-14

Sheet: Week17 | Using date: 2024-08-21

Sheet: Week18 | Using date: 2024-08-28

Sheet: Week19 | Using date: 2024-09-04

Sheet: Week20 | Using date: 2024-09-11

Sheet: Week21 | Using date: 2024-09-18

Sheet: Week22 | Using date: 2024-09-25

Sheet: Week23 | Using date: 2024-10-03

Sheet: Week24 | Using date: 2024-10-09

Sheet: Week25 | Using date: 2024-10-16

Sheet: Week26 | Using date: 2024-10-23

Sheet: 

In [5]:
#4Company_Total_Price

import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import timedelta
import os

# ======================================================
#               USER INPUTS 
# ======================================================

file_prices = input("\nEnter price file path: ").strip()
file_combos = input("Enter combo quantity file path: ").strip()
output_folder = input("Enter output folder path: ").strip()

while True:
    try:
        OBS_DAYS = int(input("\nEnter observation days (example: 45): ").strip())
        break
    except:
        print("Invalid input! Enter a number.")

# ---------------- Rolling Frequency Input ----------------
valid_roll_types = ["day", "week", "month", "year"]

while True:
    ROLL_TYPE = input("\nEnter rolling type (day/week/month/year): ").strip().lower()
    if ROLL_TYPE in valid_roll_types:
        break
    print("Invalid! Choose from day / week / month / year")

while True:
    try:
        ROLL_VALUE = int(input("Enter rolling increment : ").strip())
        break
    except:
        print("Invalid number! Try again.")

# Make output folder
os.makedirs(output_folder, exist_ok=True)

# ======================================================
#                LOAD PRICE DATA
# ======================================================

df_price = pd.read_excel(file_prices)
date_col = df_price.columns[0]
company_cols = df_price.columns[1:]

df_price[date_col] = pd.to_datetime(df_price[date_col], dayfirst=True)
df_price = df_price.sort_values(date_col)

# Skip 3 months
first_date = df_price[date_col].min()
skip_date = first_date + pd.DateOffset(months=3)
df_filtered = df_price[df_price[date_col] > skip_date].copy()

print("\nDataset start:", first_date.date())
print("After skipping first 3 months:", skip_date.date())

# ======================================================
#                LOAD COMBO DATA
# ======================================================

df_combo = pd.read_excel(file_combos)
df_combo.columns = df_combo.columns.str.strip()

df_combo[['C1','C2','C3','C4']] = df_combo['Combo'].str.split(',', expand=True)

def split_qty(q):
    arr = []
    for x in str(q).split(","):
        try:
            arr.append(float(x.strip()))
        except:
            arr.append(0.0)
    # Ensure 4 elements
    while len(arr) < 4:
        arr.append(0.0)
    return arr[:4]

df_combo[['Q1','Q2','Q3','Q4']] = df_combo['Quantities'].apply(split_qty).tolist()

# Pre-map company index
col_index = {col: i for i, col in enumerate(company_cols)}

combo_indices = df_combo[['C1','C2','C3','C4']].applymap(
    lambda x: col_index.get(x, -1)
).values.astype(int)

qty_matrix = df_combo[['Q1','Q2','Q3','Q4']].values.astype(float)

# ======================================================
#         HELPER FUNCTION FOR ROLLING DATE OFFSET
# ======================================================

def shift_date(start, roll_type, value):
    """Return shifted date by selected rolling frequency."""
    if roll_type == "day":
        return start + timedelta(days=value)
    elif roll_type == "week":
        return start + timedelta(weeks=value)
    elif roll_type == "month":
        return start + pd.DateOffset(months=value)
    elif roll_type == "year":
        return start + pd.DateOffset(years=value)

# ======================================================
#               MAIN ROLLING WINDOW LOOP
# ======================================================

current_start = df_filtered[date_col].min()
window_count = 1

while current_start <= df_filtered[date_col].max():

    window_end = current_start + timedelta(days=OBS_DAYS - 1)

    df_window = df_filtered[
        (df_filtered[date_col] >= current_start) &
        (df_filtered[date_col] <= window_end)
    ].copy()

    if df_window.empty:
        break

    print(f"\nProcessing Window {window_count}")
    print("Start:", df_window[date_col].min().date())
    print("End  :", df_window[date_col].max().date())

    # ---------- Fast Calculation Section ----------
    results = []
    df_prices_np = df_window[company_cols].to_numpy(float)

    total_rows = len(df_window) * len(df_combo)
    pbar = tqdm(total=total_rows, desc=f"Window {window_count}", unit="rows")

    for idx, dp_row in enumerate(df_prices_np):
        date_value = df_window[date_col].iloc[idx]

        # Multiply entire combo price matrix at once
        price_matrix = dp_row[combo_indices]  
        price_matrix = np.where(combo_indices == -1, 0, price_matrix)

        total_prices = np.sum(price_matrix * qty_matrix, axis=1)

        # Create output rows fast
        for i in range(len(df_combo)):
            results.append([
                date_value,
                df_combo["Combo"].iloc[i],
                list(price_matrix[i]),
                total_prices[i]
            ])
            pbar.update(1)

    pbar.close()

    df_result = pd.DataFrame(results, columns=[
        "DATE", "Combo", "Price_Combo", "Total_Price"
    ])

    output_csv = os.path.join(
        output_folder,
        f"Window_{window_count}_{OBS_DAYS}Days.csv"
    )
    df_result.to_csv(output_csv, index=False)
    print("Saved:", output_csv)

    # ---------- Shift by User-defined Rolling Type ----------
    current_start = shift_date(current_start, ROLL_TYPE, ROLL_VALUE)
    window_count += 1

print("\nAll rolling CSV files generated successfully!")



Enter price file path: C:\Users\Swarupa\Downloads\NIFTY50_CLOSE_MASTER.xlsx
Enter combo quantity file path: D:\feb24_to_jan26\Combo_quantity.xlsx
Enter output folder path: D:\feb24_to_jan26\4Company_Total_Price

Enter observation days (example: 45): 45

Enter rolling type (day/week/month/year): week
Enter rolling increment : 1

Dataset start: 2024-02-14
After skipping first 3 months: 2024-05-14


  combo_indices = df_combo[['C1','C2','C3','C4']].applymap(



Processing Window 1
Start: 2024-05-15
End  : 2024-06-28


Window 1: 100%|█████████████████████████████████████████████████████████| 7139300/7139300 [01:33<00:00, 76218.93rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_1_45Days.csv

Processing Window 2
Start: 2024-05-22
End  : 2024-07-05


Window 2: 100%|█████████████████████████████████████████████████████████| 7369600/7369600 [01:58<00:00, 62169.71rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_2_45Days.csv

Processing Window 3
Start: 2024-05-29
End  : 2024-07-12


Window 3: 100%|█████████████████████████████████████████████████████████| 7369600/7369600 [01:36<00:00, 76010.07rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_3_45Days.csv

Processing Window 4
Start: 2024-06-05
End  : 2024-07-19


Window 4: 100%|█████████████████████████████████████████████████████████| 7139300/7139300 [01:56<00:00, 61062.70rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_4_45Days.csv

Processing Window 5
Start: 2024-06-12
End  : 2024-07-26


Window 5: 100%|█████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75725.08rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_5_45Days.csv

Processing Window 6
Start: 2024-06-19
End  : 2024-08-02


Window 6: 100%|█████████████████████████████████████████████████████████| 7369600/7369600 [01:37<00:00, 75576.74rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_6_45Days.csv

Processing Window 7
Start: 2024-06-26
End  : 2024-08-09


Window 7: 100%|█████████████████████████████████████████████████████████| 7369600/7369600 [01:59<00:00, 61470.01rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_7_45Days.csv

Processing Window 8
Start: 2024-07-03
End  : 2024-08-16


Window 8: 100%|█████████████████████████████████████████████████████████| 7139300/7139300 [01:35<00:00, 74873.74rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_8_45Days.csv

Processing Window 9
Start: 2024-07-10
End  : 2024-08-23


Window 9: 100%|█████████████████████████████████████████████████████████| 7139300/7139300 [01:58<00:00, 60435.83rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_9_45Days.csv

Processing Window 10
Start: 2024-07-18
End  : 2024-08-30


Window 10: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75373.71rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_10_45Days.csv

Processing Window 11
Start: 2024-07-24
End  : 2024-09-06


Window 11: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:01<00:00, 60744.79rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_11_45Days.csv

Processing Window 12
Start: 2024-07-31
End  : 2024-09-13


Window 12: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:38<00:00, 75143.67rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_12_45Days.csv

Processing Window 13
Start: 2024-08-07
End  : 2024-09-20


Window 13: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:02<00:00, 60044.98rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_13_45Days.csv

Processing Window 14
Start: 2024-08-14
End  : 2024-09-27


Window 14: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:37<00:00, 75271.83rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_14_45Days.csv

Processing Window 15
Start: 2024-08-21
End  : 2024-10-04


Window 15: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:38<00:00, 75088.17rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_15_45Days.csv

Processing Window 16
Start: 2024-08-28
End  : 2024-10-11


Window 16: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:00<00:00, 61374.64rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_16_45Days.csv

Processing Window 17
Start: 2024-09-04
End  : 2024-10-18


Window 17: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:37<00:00, 75669.27rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_17_45Days.csv

Processing Window 18
Start: 2024-09-11
End  : 2024-10-25


Window 18: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:00<00:00, 61304.85rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_18_45Days.csv

Processing Window 19
Start: 2024-09-18
End  : 2024-11-01


Window 19: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:37<00:00, 75374.48rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_19_45Days.csv

Processing Window 20
Start: 2024-09-25
End  : 2024-11-08


Window 20: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:01<00:00, 60445.77rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_20_45Days.csv

Processing Window 21
Start: 2024-10-03
End  : 2024-11-14


Window 21: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75385.40rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_21_45Days.csv

Processing Window 22
Start: 2024-10-09
End  : 2024-11-22


Window 22: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75280.02rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_22_45Days.csv

Processing Window 23
Start: 2024-10-16
End  : 2024-11-29


Window 23: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:55<00:00, 61810.34rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_23_45Days.csv

Processing Window 24
Start: 2024-10-23
End  : 2024-12-06


Window 24: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75327.44rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_24_45Days.csv

Processing Window 25
Start: 2024-10-30
End  : 2024-12-13


Window 25: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:57<00:00, 61001.68rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_25_45Days.csv

Processing Window 26
Start: 2024-11-06
End  : 2024-12-20


Window 26: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75725.11rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_26_45Days.csv

Processing Window 27
Start: 2024-11-13
End  : 2024-12-27


Window 27: 100%|████████████████████████████████████████████████████████| 6909000/6909000 [01:55<00:00, 59747.05rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_27_45Days.csv

Processing Window 28
Start: 2024-11-21
End  : 2025-01-03


Window 28: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:35<00:00, 75097.26rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_28_45Days.csv

Processing Window 29
Start: 2024-11-27
End  : 2025-01-10


Window 29: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:37<00:00, 75215.95rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_29_45Days.csv

Processing Window 30
Start: 2024-12-04
End  : 2025-01-17


Window 30: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:59<00:00, 61620.93rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_30_45Days.csv

Processing Window 31
Start: 2024-12-11
End  : 2025-01-24


Window 31: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:38<00:00, 75140.32rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_31_45Days.csv

Processing Window 32
Start: 2024-12-18
End  : 2025-01-31


Window 32: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:00<00:00, 61150.75rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_32_45Days.csv

Processing Window 33
Start: 2024-12-26
End  : 2025-02-07


Window 33: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [01:40<00:00, 75328.33rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_33_45Days.csv

Processing Window 34
Start: 2025-01-01
End  : 2025-02-14


Window 34: 100%|████████████████████████████████████████████████████████| 7830200/7830200 [02:07<00:00, 61564.40rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_34_45Days.csv

Processing Window 35
Start: 2025-01-08
End  : 2025-02-21


Window 35: 100%|████████████████████████████████████████████████████████| 7830200/7830200 [01:43<00:00, 75313.23rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_35_45Days.csv

Processing Window 36
Start: 2025-01-15
End  : 2025-02-28


Window 36: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [02:05<00:00, 60723.47rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_36_45Days.csv

Processing Window 37
Start: 2025-01-22
End  : 2025-03-07


Window 37: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [01:41<00:00, 75153.74rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_37_45Days.csv

Processing Window 38
Start: 2025-01-29
End  : 2025-03-13


Window 38: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [02:03<00:00, 59862.39rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_38_45Days.csv

Processing Window 39
Start: 2025-02-05
End  : 2025-03-21


Window 39: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75349.03rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_39_45Days.csv

Processing Window 40
Start: 2025-02-12
End  : 2025-03-28


Window 40: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:34<00:00, 75361.46rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_40_45Days.csv

Processing Window 41
Start: 2025-02-19
End  : 2025-04-04


Window 41: 100%|████████████████████████████████████████████████████████| 6909000/6909000 [01:54<00:00, 60258.56rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_41_45Days.csv

Processing Window 42
Start: 2025-02-27
End  : 2025-04-11


Window 42: 100%|████████████████████████████████████████████████████████| 6678700/6678700 [01:28<00:00, 75226.69rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_42_45Days.csv

Processing Window 43
Start: 2025-03-05
End  : 2025-04-17


Window 43: 100%|████████████████████████████████████████████████████████| 6448400/6448400 [01:49<00:00, 58738.80rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_43_45Days.csv

Processing Window 44
Start: 2025-03-12
End  : 2025-04-25


Window 44: 100%|████████████████████████████████████████████████████████| 6448400/6448400 [01:25<00:00, 75104.42rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_44_45Days.csv

Processing Window 45
Start: 2025-03-19
End  : 2025-05-02


Window 45: 100%|████████████████████████████████████████████████████████| 6448400/6448400 [01:25<00:00, 75752.35rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_45_45Days.csv

Processing Window 46
Start: 2025-03-26
End  : 2025-05-09


Window 46: 100%|████████████████████████████████████████████████████████| 6448400/6448400 [01:47<00:00, 60151.41rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_46_45Days.csv

Processing Window 47
Start: 2025-04-02
End  : 2025-05-16


Window 47: 100%|████████████████████████████████████████████████████████| 6678700/6678700 [01:28<00:00, 75652.75rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_47_45Days.csv

Processing Window 48
Start: 2025-04-09
End  : 2025-05-23


Window 48: 100%|████████████████████████████████████████████████████████| 6678700/6678700 [01:28<00:00, 75545.78rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_48_45Days.csv

Processing Window 49
Start: 2025-04-16
End  : 2025-05-30


Window 49: 100%|████████████████████████████████████████████████████████| 7139300/7139300 [01:54<00:00, 62309.63rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_49_45Days.csv

Processing Window 50
Start: 2025-04-23
End  : 2025-06-06


Window 50: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:37<00:00, 75525.34rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_50_45Days.csv

Processing Window 51
Start: 2025-04-30
End  : 2025-06-13


Window 51: 100%|████████████████████████████████████████████████████████| 7369600/7369600 [01:58<00:00, 62248.35rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_51_45Days.csv

Processing Window 52
Start: 2025-05-07
End  : 2025-06-20


Window 52: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [01:40<00:00, 75797.88rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_52_45Days.csv

Processing Window 53
Start: 2025-05-14
End  : 2025-06-27


Window 53: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [02:02<00:00, 62254.58rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_53_45Days.csv

Processing Window 54
Start: 2025-05-21
End  : 2025-07-04


Window 54: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [01:40<00:00, 75845.16rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_54_45Days.csv

Processing Window 55
Start: 2025-05-28
End  : 2025-07-11


Window 55: 100%|████████████████████████████████████████████████████████| 7599900/7599900 [02:03<00:00, 61780.14rows/s]


Saved: D:\feb24_to_jan26\4Company_Total_Price\Window_55_45Days.csv

Processing Window 56
Start: 2025-06-04
End  : 2025-07-18


Window 56:  73%|████████████████████████████████████████▉               | 5555782/7599900 [01:13<00:27, 74452.31rows/s]

In [9]:
#4Company_EURINR_Ratio_50DMA

import pandas as pd
import numpy as np
from tqdm import tqdm
import glob
import os
from collections import deque, defaultdict

# ====================== REQUIRED USER INPUTS ======================
INPUT_PRICE_FOLDER = input("Enter input PRICE folder path: ").strip()
INPUT_FX_FILE = input("Enter FX (EURINR) file path: ").strip()
OUTPUT_FOLDER = input("Enter OUTPUT folder path: ").strip()
MOVING_AVG_DAYS = int(input("Enter Moving Average days : "))

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# ====================== LOAD FX DATA ======================
fx = pd.read_excel(INPUT_FX_FILE, usecols=["DATE", "EURINR"])
fx["DATE"] = pd.to_datetime(fx["DATE"])
fx.set_index("DATE", inplace=True)

# ====================== GET ALL INPUT FILES ======================
csv_files = sorted(glob.glob(os.path.join(INPUT_PRICE_FOLDER, "*.csv")))
print(f"Total input CSV files detected: {len(csv_files)}")

# ====================== PROCESS EACH FILE ======================
for file_csv in csv_files:

    print(f"\nProcessing: {os.path.basename(file_csv)}")

    output_file = os.path.join(
        OUTPUT_FOLDER,
        f"{MOVING_AVG_DAYS}DMA_{os.path.basename(file_csv)}"
    )

    # Deque and cumulative sum per Combo
    ratio_queue = defaultdict(lambda: deque(maxlen=MOVING_AVG_DAYS))
    ratio_sum = defaultdict(float)

    header_written = False

    for chunk in tqdm(
        pd.read_csv(
            file_csv,
            chunksize=50000,
            parse_dates=["DATE"],
            usecols=["DATE", "Combo", "Total_Price"]
        ),
        desc="Streaming",
        unit=" rows"
    ):
        # Merge EURINR data
        chunk = chunk.join(fx, on="DATE", how="left")
        chunk.dropna(subset=["EURINR"], inplace=True)

        if chunk.empty:
            continue

        # Compute EURINR Ratio
        eur_ratio = chunk["Total_Price"].values / chunk["EURINR"].values
        dma_values = np.zeros(len(chunk))
        combos = chunk["Combo"].values

        # Rolling moving average calculation
        for i, combo in enumerate(combos):
            q = ratio_queue[combo]

            # Maintain rolling sum
            if len(q) == MOVING_AVG_DAYS:
                ratio_sum[combo] -= q[0]

            q.append(eur_ratio[i])
            ratio_sum[combo] += eur_ratio[i]

            dma_values[i] = ratio_sum[combo] / len(q)

        # Save chunk to output
        out = pd.DataFrame({
            "DATE": chunk["DATE"].values,
            "Combo": combos,
            "EURINR_Ratio": eur_ratio,
            f"{MOVING_AVG_DAYS}DMA_EURINR": dma_values
        })

        out.to_csv(output_file, mode="a", index=False, header=not header_written)
        header_written = True

    print(f"Output created: {output_file}")

print("\n ALL FILES PROCESSED SUCCESSFULLY")


Enter input PRICE folder path: D:\feb24_to_jan26\4Company_Total_Price
Enter FX (EURINR) file path: D:\feb24_to_jan26\EURINR_CLOSE.xlsx
Enter OUTPUT folder path: D:\feb24_to_jan26\4Company_EURINR_Ratio_50DMA
Enter Moving Average days : 50


In [10]:
#4Company_NIFTY_Ratio_50DMA

import pandas as pd
import numpy as np
from tqdm import tqdm
import glob
import os
from collections import deque, defaultdict

# ====================== REQUIRED USER INPUTS ======================
INPUT_PRICE_FOLDER = input("Enter input PRICE folder path: ").strip()
INPUT_NIFTY_FILE = input("Enter NIFTY file path: ").strip()
OUTPUT_FOLDER = input("Enter OUTPUT folder path: ").strip()
MOVING_AVG_DAYS = int(input("Enter Moving Average days: "))

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# ====================== LOAD NIFTY DATA ======================
nifty = pd.read_excel(INPUT_NIFTY_FILE, usecols=["DATE", "NIFTY"])
nifty["DATE"] = pd.to_datetime(nifty["DATE"])
nifty.set_index("DATE", inplace=True)

# ====================== GET ALL INPUT FILES ======================
csv_files = sorted(glob.glob(os.path.join(INPUT_PRICE_FOLDER, "*.csv")))
print(f"Total input CSV files detected: {len(csv_files)}")

# ====================== PROCESS EACH FILE ======================
for file_csv in csv_files:

    print(f"\nProcessing: {os.path.basename(file_csv)}")

    output_file = os.path.join(
        OUTPUT_FOLDER,
        f"{MOVING_AVG_DAYS}DMA_NIFTY_{os.path.basename(file_csv)}"
    )

    # Rolling queue + sum per Combo
    ratio_queue = defaultdict(lambda: deque(maxlen=MOVING_AVG_DAYS))
    ratio_sum = defaultdict(float)

    header_written = False

    # Stream CSV in chunks
    for chunk in tqdm(
        pd.read_csv(
            file_csv,
            chunksize=50000,
            parse_dates=["DATE"],
            usecols=["DATE", "Combo", "Total_Price"]
        ),
        desc="Streaming",
        unit=" rows"
    ):
        # Merge NIFTY data
        chunk = chunk.join(nifty, on="DATE", how="left")
        chunk.dropna(subset=["NIFTY"], inplace=True)

        if chunk.empty:
            continue

        # Compute Ratio
        nifty_ratio = chunk["Total_Price"].values / chunk["NIFTY"].values
        dma_values = np.zeros(len(chunk))

        combos = chunk["Combo"].values

        # Rolling Moving Average Calculation
        for i, combo in enumerate(combos):
            q = ratio_queue[combo]

            # Maintain rolling sum
            if len(q) == MOVING_AVG_DAYS:
                ratio_sum[combo] -= q[0]

            q.append(nifty_ratio[i])
            ratio_sum[combo] += nifty_ratio[i]

            dma_values[i] = ratio_sum[combo] / len(q)

        # Prepare output chunk
        out = pd.DataFrame({
            "DATE": chunk["DATE"].values,
            "Combo": combos,
            "NIFTY_Ratio": nifty_ratio,
            f"{MOVING_AVG_DAYS}DMA_NIFTY": dma_values
        })

        out.to_csv(output_file, mode="a", index=False, header=not header_written)
        header_written = True

    print(f"Output saved: {output_file}")

print("\n ALL FILES PROCESSED SUCCESSFULLY.")


Enter input PRICE folder path: D:\feb24_to_jan26\4Company_Total_Price
Enter NIFTY file path: D:\feb24_to_jan26\NIFTY_CLOSE.xlsx
Enter OUTPUT folder path: D:\feb24_to_jan26\4Company_NIFTY_Ratio_50DMA
Enter Moving Average days: 50


In [1]:
#EURINR_Ratio > 1.01*50DMA_EURINR

import pandas as pd
import glob
import os

# ====================== USER INPUTS ======================
INPUT_FOLDER = input("Enter folder path of EURINR DMA CSV files: ").strip()
OUTPUT_FOLDER = input("Enter output folder path for filtered results: ").strip()

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# ====================== PROCESS FILES ======================
csv_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.csv")))
print(f"Total CSV files detected: {len(csv_files)}")

for file in csv_files:

    file_name = os.path.basename(file)
    print(f"\nProcessing: {file_name}")

    # Load file
    df = pd.read_csv(file)

    # Detect DMA column automatically (example: "50DMA_EURINR")
    dma_col = [col for col in df.columns if "DMA_EURINR" in col][0]

    # ================= APPLY CONDITION =================
    df_filtered = df[df["EURINR_Ratio"] > (1.01 * df[dma_col])].copy()

    # Count filtered rows
    filtered_count = len(df_filtered)
    print(f" Combinations satisfying condition: {filtered_count}")

    if df_filtered.empty:
        print("  No combinations satisfy condition → Skipped saving.")
        continue

    # Save file
    output_file = os.path.join(
        OUTPUT_FOLDER,
        f"Filtered_{file_name}"
    )

    df_filtered.to_csv(output_file, index=False)
    print(f" Saved filtered file: {output_file}")

print("\nALL FILES FILTERED SUCCESSFULLY.")

Enter folder path of EURINR DMA CSV files: D:\feb24_to_jan26\4Company_EURINR_Ratio_50DMA
Enter output folder path for filtered results: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA
Total CSV files detected: 86

Processing: 50DMA_Window_10_45Days.csv
 Combinations satisfying condition: 1687703
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_10_45Days.csv

Processing: 50DMA_Window_11_45Days.csv
 Combinations satisfying condition: 2025542
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_11_45Days.csv

Processing: 50DMA_Window_12_45Days.csv
 Combinations satisfying condition: 2058125
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_12_45Days.csv

Processing: 50DMA_Window_13_45Days.csv
 Combinations satisfying condition: 3281064
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_13_45Days.csv

Processing: 50DMA_Win

 Combinations satisfying condition: 3857516
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_49_45Days.csv

Processing: 50DMA_Window_4_45Days.csv
 Combinations satisfying condition: 4998606
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_4_45Days.csv

Processing: 50DMA_Window_50_45Days.csv
 Combinations satisfying condition: 2795187
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_50_45Days.csv

Processing: 50DMA_Window_51_45Days.csv
 Combinations satisfying condition: 2337463
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_51_45Days.csv

Processing: 50DMA_Window_52_45Days.csv
 Combinations satisfying condition: 1834990
 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_52_45Days.csv

Processing: 50DMA_Window_53_45Days.csv
 Combinations satisfying condition: 548586
 Sav

 Saved filtered file: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA\Filtered_50DMA_Window_9_45Days.csv

ALL FILES FILTERED SUCCESSFULLY.


In [2]:
#NIFTY_Ratio > 1.01*50DMA_NIFTY

import pandas as pd
import glob
import os

# ====================== USER INPUTS ======================
INPUT_FOLDER = input("Enter folder path of NIFTY DMA CSV files: ").strip()
OUTPUT_FOLDER = input("Enter output folder path for filtered results: ").strip()

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# ====================== PROCESS FILES ======================
csv_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.csv")))
print(f"Total CSV files detected: {len(csv_files)}")

for file in csv_files:

    file_name = os.path.basename(file)
    print(f"\nProcessing: {file_name}")

    # Load file
    df = pd.read_csv(file)

    # Detect DMA column automatically (example: "50DMA_NIFTY")
    dma_col = [col for col in df.columns if "DMA_NIFTY" in col][0]

    # ================= APPLY CONDITION =================
    df_filtered = df[df["NIFTY_Ratio"] > (1.01 * df[dma_col])].copy()

    # Count filtered rows
    filtered_count = len(df_filtered)
    print(f" Combinations satisfying condition: {filtered_count}")

    if df_filtered.empty:
        print("  No combinations satisfy condition → Skipped saving.")
        continue

    # Save file
    output_file = os.path.join(
        OUTPUT_FOLDER,
        f"Filtered_{file_name}"
    )

    df_filtered.to_csv(output_file, index=False)
    print(f" Saved filtered file: {output_file}")

print("\nALL FILES FILTERED SUCCESSFULLY.")


Enter folder path of NIFTY DMA CSV files: D:\feb24_to_jan26\4Company_NIFTY_Ratio_50DMA
Enter output folder path for filtered results: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA
Total CSV files detected: 86

Processing: 50DMA_NIFTY_Window_10_45Days.csv
 Combinations satisfying condition: 1832489
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_10_45Days.csv

Processing: 50DMA_NIFTY_Window_11_45Days.csv
 Combinations satisfying condition: 1601007
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_11_45Days.csv

Processing: 50DMA_NIFTY_Window_12_45Days.csv
 Combinations satisfying condition: 1466253
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_12_45Days.csv

Processing: 50DMA_NIFTY_Window_13_45Days.csv
 Combinations satisfying condition: 1506240
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Wi

 Combinations satisfying condition: 1015790
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_47_45Days.csv

Processing: 50DMA_NIFTY_Window_48_45Days.csv
 Combinations satisfying condition: 1061990
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_48_45Days.csv

Processing: 50DMA_NIFTY_Window_49_45Days.csv
 Combinations satisfying condition: 1445612
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_49_45Days.csv

Processing: 50DMA_NIFTY_Window_4_45Days.csv
 Combinations satisfying condition: 1045123
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_4_45Days.csv

Processing: 50DMA_NIFTY_Window_50_45Days.csv
 Combinations satisfying condition: 1422949
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_50_45Days.csv

Processing: 50DMA_NIFTY_Window_51_45D

 Combinations satisfying condition: 212835
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_84_45Days.csv

Processing: 50DMA_NIFTY_Window_85_45Days.csv
 Combinations satisfying condition: 132652
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_85_45Days.csv

Processing: 50DMA_NIFTY_Window_86_45Days.csv
 Combinations satisfying condition: 7708
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_86_45Days.csv

Processing: 50DMA_NIFTY_Window_8_45Days.csv
 Combinations satisfying condition: 2029523
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_8_45Days.csv

Processing: 50DMA_NIFTY_Window_9_45Days.csv
 Combinations satisfying condition: 1814727
 Saved filtered file: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA\Filtered_50DMA_NIFTY_Window_9_45Days.csv

ALL FILES FILTERED SUCCESSFULLY.


In [4]:
#EURINR_Ratio <= 1.03*50DMA

import os
import glob
import pandas as pd

# ================= USER INPUTS =================
INPUT_FOLDER  = input("Enter folder path with CSVs: ").strip()
OUTPUT_FOLDER = input("Enter output folder path: ").strip()

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Helper to create output CSV filename
def get_output_csv_name(input_csv):
    base = os.path.basename(input_csv)
    return os.path.join(OUTPUT_FOLDER, base)

# Load all CSV files
csv_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.csv")))
print(f"\nTotal CSV files detected: {len(csv_files)}")

for csv_file in csv_files:
    output_csv = get_output_csv_name(csv_file)
    print(f"\nProcessing {os.path.basename(csv_file)} ...")

    df = pd.read_csv(csv_file, parse_dates=["DATE"])

    # Required columns (EURINR only)
    required_cols = ["DATE", "Combo", "EURINR_Ratio", "50DMA_EURINR"]
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        print(f"  Missing columns: {missing}. Skipping file.")
        continue

    # Apply EURINR condition only
    df_filtered = df[
        df["EURINR_Ratio"] <= 1.03 * df["50DMA_EURINR"]
    ]

    # Skip file if no rows satisfy the condition
    if df_filtered.empty:
        print("  No rows satisfy the EURINR condition. File skipped.")
        continue

    # Save filtered CSV
    df_filtered.to_csv(output_csv, index=False)
    print(f"  Filtered CSV saved: {output_csv} (rows: {len(df_filtered)})")

print("\nDONE! Only EURINR-based filtered combinations are saved.")


Enter folder path with CSVs: D:\feb24_to_jan26\4Company_EURINR_Ratio_GT_50DMA
Enter output folder path: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA

Total CSV files detected: 86

Processing Filtered_50DMA_Window_10_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_10_45Days.csv (rows: 1388798)

Processing Filtered_50DMA_Window_11_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_11_45Days.csv (rows: 1709981)

Processing Filtered_50DMA_Window_12_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_12_45Days.csv (rows: 1466243)

Processing Filtered_50DMA_Window_13_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_13_45Days.csv (rows: 2287445)

Processing Filtered_50DMA_Window_14_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR

  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_52_45Days.csv (rows: 1521870)

Processing Filtered_50DMA_Window_53_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_53_45Days.csv (rows: 543104)

Processing Filtered_50DMA_Window_54_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_54_45Days.csv (rows: 920287)

Processing Filtered_50DMA_Window_55_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_55_45Days.csv (rows: 1034524)

Processing Filtered_50DMA_Window_56_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_56_45Days.csv (rows: 1473011)

Processing Filtered_50DMA_Window_57_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA\Filtered_50DMA_Window_57_45Days.csv (rows: 1

In [5]:
#NIFTY_Ratio <= 1.03*50DMA

import os
import glob
import pandas as pd

# ================= USER INPUTS =================
INPUT_FOLDER  = input("Enter folder path with CSVs: ").strip()
OUTPUT_FOLDER = input("Enter output folder path: ").strip()

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Helper to create output CSV filename
def get_output_csv_name(input_csv):
    base = os.path.basename(input_csv)
    return os.path.join(OUTPUT_FOLDER, base)

# Load all CSV files
csv_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.csv")))
print(f"\nTotal CSV files detected: {len(csv_files)}")

for csv_file in csv_files:
    output_csv = get_output_csv_name(csv_file)
    print(f"\nProcessing {os.path.basename(csv_file)} ...")

    df = pd.read_csv(csv_file, parse_dates=["DATE"])

    # Required columns
    required_cols = ["DATE", "Combo", "NIFTY_Ratio", "50DMA_NIFTY"]
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        print(f"  Missing columns: {missing}. Skipping file.")
        continue

    # Apply NIFTY condition only
    df_filtered = df[
        df["NIFTY_Ratio"] <= 1.03 * df["50DMA_NIFTY"]
    ]

    # Skip file if no rows satisfy the condition
    if df_filtered.empty:
        print("  No rows satisfy the NIFTY condition. File skipped.")
        continue

    # Save filtered CSV
    df_filtered.to_csv(output_csv, index=False)
    print(f"  Filtered CSV saved: {output_csv} (rows: {len(df_filtered)})")

print("\nDONE! Only NIFTY-based filtered combinations are saved.")


Enter folder path with CSVs: D:\feb24_to_jan26\4Company_NIFTY_Ratio_GT_50DMA
Enter output folder path: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA

Total CSV files detected: 86

Processing Filtered_50DMA_NIFTY_Window_10_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_10_45Days.csv (rows: 1688583)

Processing Filtered_50DMA_NIFTY_Window_11_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_11_45Days.csv (rows: 1448075)

Processing Filtered_50DMA_NIFTY_Window_12_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_12_45Days.csv (rows: 1283768)

Processing Filtered_50DMA_NIFTY_Window_13_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_13_45Days.csv (rows: 1309897)

Processing Filtered_50DMA_NIFTY_Window_14_45Days.csv ...
  Filte

  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_4_45Days.csv (rows: 941312)

Processing Filtered_50DMA_NIFTY_Window_50_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_50_45Days.csv (rows: 1109536)

Processing Filtered_50DMA_NIFTY_Window_51_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_51_45Days.csv (rows: 1281879)

Processing Filtered_50DMA_NIFTY_Window_52_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_52_45Days.csv (rows: 1399721)

Processing Filtered_50DMA_NIFTY_Window_53_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA\Filtered_50DMA_NIFTY_Window_53_45Days.csv (rows: 990173)

Processing Filtered_50DMA_NIFTY_Window_54_45Days.csv ...
  Filtered CSV saved: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LT

In [6]:
#common_combinations_EURINR_NIFTY

import pandas as pd
import glob
import os

# ====================== USER INPUTS ==========================
EURINR_FOLDER = input("Enter folder path of EURINR files: ").strip()
NIFTY_FOLDER  = input("Enter folder path of NIFTY files: ").strip()
OUTPUT_FOLDER = input("Enter output folder path: ").strip()

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

eur_files = sorted(glob.glob(os.path.join(EURINR_FOLDER, "*.csv")))
nifty_files = sorted(glob.glob(os.path.join(NIFTY_FOLDER, "*.csv")))

print(f"EURINR files found : {len(eur_files)}")
print(f"NIFTY  files found : {len(nifty_files)}")

def extract_month(file_name):
    """
    Example filename:
        50DMA_EURINR_Month_1_45Days.csv
        50DMA_NIFTY_Month_1_45Days.csv
    Function extracts: "Month_1_45Days"
    """
    parts = file_name.split("_")
    return "_".join(parts[-2:]).replace(".csv", "")   # Month_x_45Days

# Loop through all EURINR files
for eur_file in eur_files:

    eur_name = os.path.basename(eur_file)
    eur_month = extract_month(eur_name)

    # Find matching NIFTY file for SAME MONTH
    matching_nifty = [f for f in nifty_files if extract_month(os.path.basename(f)) == eur_month]

    if not matching_nifty:
        print(f"\n No NIFTY file found for: {eur_name}")
        continue

    nifty_file = matching_nifty[0]

    print(f"\n==========================")
    print(f" Processing Month: {eur_month}")
    print(f" EURINR ? {eur_name}")
    print(f" NIFTY  ? {os.path.basename(nifty_file)}")
    print(f"==========================")

    # Load files
    df_eur = pd.read_csv(eur_file)
    df_nifty = pd.read_csv(nifty_file)

    # Convert DATE to datetime
    df_eur["DATE"] = pd.to_datetime(df_eur["DATE"])
    df_nifty["DATE"] = pd.to_datetime(df_nifty["DATE"])

    # Select common combos within SAME MONTH files
    common_combos = sorted(set(df_eur["Combo"]).intersection(df_nifty["Combo"]))

    print(f" EURINR combos: {df_eur['Combo'].nunique()}")
    print(f" NIFTY combos : {df_nifty['Combo'].nunique()}")
    print(f" Common combos: {len(common_combos)}")

    if len(common_combos) == 0:
        print(" No common combinations ? Skipping this month.")
        continue

    # Filter EURINR and NIFTY to only common combos
    df_eur_f = df_eur[df_eur["Combo"].isin(common_combos)]
    df_nifty_f = df_nifty[df_nifty["Combo"].isin(common_combos)]

    # Merge on DATE + Combo ? ONE DATE column only
    df_final = df_eur_f.merge(
        df_nifty_f,
        on=["DATE", "Combo"],
        suffixes=("_EURINR", "_NIFTY")
    )

    # Save monthly output
    output_path = os.path.join(OUTPUT_FOLDER, f"Common_{eur_month}.csv")
    df_final.to_csv(output_path, index=False)

    print(f" Saved: {output_path}")

print("\n ALL MONTHS PROCESSED SUCCESSFULLY!")


Enter folder path of EURINR files: D:\feb24_to_jan26\4Company_EURINR_Ratio_LTET_50DMA
Enter folder path of NIFTY files: D:\feb24_to_jan26\4Company_NIFTY_Ratio_LTET_50DMA
Enter output folder path: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY
EURINR files found : 86
NIFTY  files found : 86

 Processing Month: 10_45Days
 EURINR ? Filtered_50DMA_Window_10_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_10_45Days.csv
 EURINR combos: 224206
 NIFTY combos : 196463
 Common combos: 195238
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_10_45Days.csv

 Processing Month: 11_45Days
 EURINR ? Filtered_50DMA_Window_11_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_11_45Days.csv
 EURINR combos: 228708
 NIFTY combos : 198385
 Common combos: 197659
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_11_45Days.csv

 Processing Month: 12_45Days
 EURINR ? Filtered_50DMA_Window_12_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_12_45Days.csv
 EURINR combos: 215623


 EURINR combos: 195536
 NIFTY combos : 209057
 Common combos: 184924
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_32_45Days.csv

 Processing Month: 33_45Days
 EURINR ? Filtered_50DMA_Window_33_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_33_45Days.csv
 EURINR combos: 210736
 NIFTY combos : 208513
 Common combos: 195928
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_33_45Days.csv

 Processing Month: 34_45Days
 EURINR ? Filtered_50DMA_Window_34_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_34_45Days.csv
 EURINR combos: 157785
 NIFTY combos : 208172
 Common combos: 156563
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_34_45Days.csv

 Processing Month: 35_45Days
 EURINR ? Filtered_50DMA_Window_35_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_35_45Days.csv
 EURINR combos: 195006
 NIFTY combos : 207336
 Common combos: 188594
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_35_45Days.csv

 Processing Month: 

 EURINR combos: 214038
 NIFTY combos : 170223
 Common combos: 166370
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_55_45Days.csv

 Processing Month: 56_45Days
 EURINR ? Filtered_50DMA_Window_56_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_56_45Days.csv
 EURINR combos: 229608
 NIFTY combos : 198235
 Common combos: 197930
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_56_45Days.csv

 Processing Month: 57_45Days
 EURINR ? Filtered_50DMA_Window_57_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_57_45Days.csv
 EURINR combos: 197195
 NIFTY combos : 173425
 Common combos: 162372
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_57_45Days.csv

 Processing Month: 58_45Days
 EURINR ? Filtered_50DMA_Window_58_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_58_45Days.csv
 EURINR combos: 207143
 NIFTY combos : 170493
 Common combos: 163352
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_58_45Days.csv

 Processing Month: 

 EURINR combos: 159945
 NIFTY combos : 117781
 Common combos: 105447
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_78_45Days.csv

 Processing Month: 79_45Days
 EURINR ? Filtered_50DMA_Window_79_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_79_45Days.csv
 EURINR combos: 151384
 NIFTY combos : 117835
 Common combos: 108593
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_79_45Days.csv

 Processing Month: 7_45Days
 EURINR ? Filtered_50DMA_Window_7_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_7_45Days.csv
 EURINR combos: 219935
 NIFTY combos : 187433
 Common combos: 186975
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_7_45Days.csv

 Processing Month: 80_45Days
 EURINR ? Filtered_50DMA_Window_80_45Days.csv
 NIFTY  ? Filtered_50DMA_NIFTY_Window_80_45Days.csv
 EURINR combos: 147389
 NIFTY combos : 152356
 Common combos: 135109
 Saved: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY\Common_80_45Days.csv

 Processing Month: 81_4

In [7]:
#Top 25% combinations by Average_Ratio

import os
import glob
import pandas as pd

# ================= USER INPUTS =================
INPUT_FOLDER  = input("Enter folder path with filtered CSVs: ").strip()
OUTPUT_FILE   = input("Enter output Excel file path: ").strip()
TOP_PERCENT   = float(input("Enter Top Percentage (example: 25 for top 25%): ").strip())

# Ensure valid % value
if TOP_PERCENT <= 0 or TOP_PERCENT > 100:
    print("Invalid percentage! Please enter value between 1 and 100.")
    exit()

# Ensure output folder exists
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

# Create safe Excel sheet names
def get_sheet_name(csv_file):
    name = os.path.basename(csv_file).replace(".csv", "")
    return name[:31]  

# Load all filtered CSV files
csv_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.csv")))
print(f"\nTotal filtered CSV files detected: {len(csv_files)}")

with pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl") as writer:

    for csv_file in csv_files:
        print(f"\nProcessing {os.path.basename(csv_file)} ...")
        df = pd.read_csv(csv_file, parse_dates=["DATE"])

        required_cols = ["EURINR_Ratio", "NIFTY_Ratio", "Combo", "DATE"]
        missing = [c for c in required_cols if c not in df.columns]
        if missing:
            print(f" Missing columns {missing}. Skipping file.")
            continue

        # Internal ranking score 
        df["_Score"] = df["EURINR_Ratio"] + df["NIFTY_Ratio"]

        # Calculate count of top X%
        top_count = max(1, int((TOP_PERCENT / 100) * len(df)))

        # Select top rows
        df_top = df.nlargest(top_count, "_Score")

        # Remove internal column before saving
        df_top = df_top.drop(columns=["_Score"])

        # Write sheet
        sheet_name = get_sheet_name(csv_file)
        df_top.to_excel(writer, sheet_name=sheet_name, index=False)

        print(f"  Saved Top {TOP_PERCENT}% → Sheet: {sheet_name} (Rows: {len(df_top)})")

print("\nDONE!")
print(f"All Top {TOP_PERCENT}% combinations saved to:", OUTPUT_FILE)


Enter folder path with filtered CSVs: D:\feb24_to_jan26\common_combinations_EURINR_NIFTY
Enter output Excel file path: D:\feb24_to_jan26\Top_25%_Combinations_by_Average_Ratio.xlsx
Enter Top Percentage (example: 25 for top 25%): 25

Total filtered CSV files detected: 86

Processing Common_10_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_10_45Days (Rows: 150362)

Processing Common_11_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_11_45Days (Rows: 151584)

Processing Common_12_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_12_45Days (Rows: 144572)

Processing Common_13_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_13_45Days (Rows: 151802)

Processing Common_14_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_14_45Days (Rows: 160995)

Processing Common_15_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_15_45Days (Rows: 155948)

Processing Common_16_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_16_45Days (Rows: 171457)

Processing Common_17_45Days.csv ...
  Saved Top 25.0% → S

  Saved Top 25.0% → Sheet: Common_8_45Days (Rows: 157897)

Processing Common_9_45Days.csv ...
  Saved Top 25.0% → Sheet: Common_9_45Days (Rows: 113401)

DONE!
All Top 25.0% combinations saved to: D:\feb24_to_jan26\Top_25%_Combinations_by_Average_Ratio.xlsx


In [8]:
#Lowest_Average_Realized_Correlation(4Stock_Combinations_Correlation)

import pandas as pd
import numpy as np
import itertools
from tqdm import tqdm

# ================= USER INPUT =================
input_file = r"D:\feb24_to_jan26\NIFTY50_CLOSE_MASTER.xlsx"
output_file = r"D:\feb24_to_jan26\4Stock_Combinations_Correlation.xlsx"


print("Loading data...")
df = pd.read_excel(input_file)

# Convert DATE column
df['DATE'] = pd.to_datetime(df['DATE'])

# Set DATE as index
df.set_index('DATE', inplace=True)

# Calculate daily returns
returns = df.pct_change().dropna()

stocks = returns.columns.tolist()
print(f"Total Stocks: {len(stocks)}")

results = []

# Generate all 4-stock combinations
combinations = list(itertools.combinations(stocks, 4))
print(f"Total 4-stock combinations: {len(combinations)}")

for combo in tqdm(combinations, desc="Calculating correlations"):

    data = returns[list(combo)]
    corr_matrix = data.corr()

    # All 6 pairwise correlations
    pairs = list(itertools.combinations(combo, 2))
    pair_corrs = [corr_matrix.loc[a, b] for a, b in pairs]

    #  YOUR EXACT FORMULA
    avg_realized_corr = (0.25 * sum(pair_corrs)) / 1.5

    results.append({
        "Combo": ", ".join(combo),
        "Average_Realized_Correlation": avg_realized_corr
    })

# Create result dataframe
result_df = pd.DataFrame(results)

# Rank by least correlation
result_df = result_df.sort_values(by="Average_Realized_Correlation")

# Save output
result_df.to_excel(output_file, index=False)

print(" DONE")
print(" The TOP row in Excel is your LEAST CORRELATED 4-stock portfolio")


Loading data...


  returns = df.pct_change().dropna()


Total Stocks: 50
Total 4-stock combinations: 230300


Calculating correlations: 100%|██████████████████████████████████████████████| 230300/230300 [02:44<00:00, 1402.24it/s]


 DONE
 The TOP row in Excel is your LEAST CORRELATED 4-stock portfolio


In [1]:
#Top_25%_Lowest_Average_Correlation

import pandas as pd
import os

# ---------------- USER INPUT ----------------
file_corr = input("Enter the path of the Correlation Excel file: ").strip()
file_data = input("Enter the path of the data Excel file (multiple sheets): ").strip()
output_file = input("Enter the path for the output Excel file: ").strip()

# Ensure output directory exists
os.makedirs(os.path.dirname(output_file), exist_ok=True)

def normalize_combo(combo):
    if pd.isna(combo):
        return combo
    stocks = [s.strip() for s in combo.split(",")]
    stocks_sorted = sorted(stocks)
    return ", ".join(stocks_sorted)

print("Loading correlation file...")
df_corr = pd.read_excel(file_corr)
df_corr["Combo_Normalized"] = df_corr["Combo"].apply(normalize_combo)

print("Loading data file with multiple sheets...")
xls_data = pd.ExcelFile(file_data)
sheet_names = xls_data.sheet_names

output_sheets = {}

for sheet in sheet_names:
    print(f"Processing sheet: {sheet} ...")
    df_data = pd.read_excel(xls_data, sheet_name=sheet)
    
    # Normalize combo
    df_data["Combo_Normalized"] = df_data["Combo"].apply(normalize_combo)
    
    # Merge correlation
    df_merged = df_data.merge(
        df_corr[["Combo_Normalized", "Average_Realized_Correlation"]],
        on="Combo_Normalized",
        how="left"
    )
    
    # Drop rows without correlation if needed
    df_merged = df_merged.dropna(subset=["Average_Realized_Correlation"])
    
    # Remove helper column
    df_merged.drop(columns=["Combo_Normalized"], inplace=True)
    
    output_sheets[sheet] = df_merged

print("Saving output file with multiple sheets...")
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    for sheet, df_out in output_sheets.items():
        df_out.to_excel(writer, sheet_name=sheet, index=False)

print("DONE: Correlation values applied and output saved with multiple sheets.")

Enter the path of the Correlation Excel file: D:\feb24_to_jan26\4Stock_Combinations_Correlation.xlsx
Enter the path of the data Excel file (multiple sheets): D:\feb24_to_jan26\Top_25%_Combinations_by_Average_Ratio.xlsx
Enter the path for the output Excel file: D:\feb24_to_jan26\Top_25%_Lowest_Average_Correlation.xlsx
Loading correlation file...
Loading data file with multiple sheets...
Processing sheet: Common_10_45Days ...
Processing sheet: Common_11_45Days ...
Processing sheet: Common_12_45Days ...
Processing sheet: Common_13_45Days ...
Processing sheet: Common_14_45Days ...
Processing sheet: Common_15_45Days ...
Processing sheet: Common_16_45Days ...
Processing sheet: Common_17_45Days ...
Processing sheet: Common_18_45Days ...
Processing sheet: Common_19_45Days ...
Processing sheet: Common_1_45Days ...
Processing sheet: Common_20_45Days ...
Processing sheet: Common_21_45Days ...
Processing sheet: Common_22_45Days ...
Processing sheet: Common_23_45Days ...
Processing sheet: Common_24

In [4]:
#Correlation_with_Japan_Bond_Yield( for each Combo)

import pandas as pd

# ---------------- USER INPUT ----------------
file_prices = input("Enter the path of the Prices CSV file: ").strip()
file_japan  = input("Enter the path of the Japan Yield Excel file: ").strip()
output_file = input("Enter the path for the output CSV file: ").strip()

# ---------------- LOAD REQUIRED COLUMNS ----------------
df_prices = pd.read_csv(
    file_prices,
    usecols=['DATE', 'Combo', 'Total_Price']
)

df_japan = pd.read_excel(
    file_japan,
    usecols=['DATE', 'Japan_Yield']
)

# ---------------- DATE CONVERSION ----------------
df_prices['DATE'] = pd.to_datetime(df_prices['DATE'], errors='coerce')
df_japan['DATE']  = pd.to_datetime(df_japan['DATE'], errors='coerce')

df_prices.dropna(subset=['DATE'], inplace=True)
df_japan.dropna(subset=['DATE'], inplace=True)

# ---------------- SORT & MERGE ----------------
df_prices.sort_values('DATE', inplace=True)
df_japan.sort_values('DATE', inplace=True)

merged = pd.merge(df_prices, df_japan, on='DATE', how='inner')

# ---------------- CORRELATION PER COMBO ----------------
results = (
    merged
    .groupby('Combo')[['Total_Price', 'Japan_Yield']]
    .corr()
    .iloc[0::2, -1]   
    .reset_index(level=1, drop=True)
    .reset_index(name='Correlation_with_Japan_Yield')
)

# ---------------- SAVE OUTPUT ----------------
results.to_csv(output_file, index=False)

print("\n Correlation calculation completed successfully.")
print(f" Output saved to: {output_file}")


Enter the path of the Prices CSV file: D:\feb24_to_jan26\4Company_Total_Price\Window_1_45Days.csv
Enter the path of the Japan Yield Excel file: D:\feb24_to_jan26\Japan 10-Year Bond Yield Historical  Data.xlsx
Enter the path for the output CSV file: D:\feb24_to_jan26\Correlation_with_Japan_Bond_Yield.csv

 Correlation calculation completed successfully.
 Output saved to: D:\feb24_to_jan26\Correlation_with_Japan_Bond_Yield.csv


In [5]:
#NIFTY50_Top_25%_Highest_Correlation_Japan_Bond_Yield

import pandas as pd
import os

# ---------------- USER INPUT ----------------
file_corr = input("Enter the path of the Correlation CSV file: ").strip()
file_data = input("Enter the path of the data Excel file (multiple sheets): ").strip()
output_file = input("Enter the path for the output Excel file: ").strip()

# Ensure output directory exists
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# -------- Normalize combo sorting -------
def normalize_combo(combo):
    if pd.isna(combo):
        return combo
    stocks = [s.strip() for s in combo.split(",")]
    return ", ".join(sorted(stocks))

# -------- Load correlation CSV --------
print("Loading correlation CSV file...")
df_corr = pd.read_csv(file_corr)
df_corr["Combo_Normalized"] = df_corr["Combo"].apply(normalize_combo)

# -------- Take highest Japan Yield correlation --------
df_corr_grouped = (
    df_corr.groupby("Combo_Normalized", as_index=False)["Correlation_with_Japan_Yield"]
           .max()
)

# -------- Load multi-sheet Excel --------
print("Loading data file with multiple sheets...")
xls_data = pd.ExcelFile(file_data)
sheet_names = xls_data.sheet_names

output_sheets = {}

# -------- Process each sheet --------
for sheet in sheet_names:
    print(f"Processing sheet: {sheet} ...")
    df_data = pd.read_excel(xls_data, sheet_name=sheet)

    # Normalize combo in data
    df_data["Combo_Normalized"] = df_data["Combo"].apply(normalize_combo)

    # Merge highest correlation
    df_merged = df_data.merge(
        df_corr_grouped,
        on="Combo_Normalized",
        how="left"
    )

    # Rename output column
    df_merged.rename(
        columns={"Correlation_with_Japan_Yield": "Highest_Correlation_Japan_Bond_Yield"},
        inplace=True
    )

    # Remove rows where correlation not available
    df_merged = df_merged.dropna(subset=["Highest_Correlation_Japan_Bond_Yield"])

    # Remove helper column
    df_merged.drop(columns=["Combo_Normalized"], inplace=True)

    output_sheets[sheet] = df_merged

# -------- Save Excel with multiple sheets --------
print("Saving output file with multiple sheets...")
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    for sheet, df_out in output_sheets.items():
        df_out.to_excel(writer, sheet_name=sheet, index=False)

print("DONE: Output Excel generated with all sheets updated.")

Enter the path of the Correlation CSV file: D:\feb24_to_jan26\Correlation_with_Japan_Bond_Yield.csv
Enter the path of the data Excel file (multiple sheets): D:\feb24_to_jan26\Top_25%_Lowest_Average_Correlation.xlsx
Enter the path for the output Excel file: D:\feb24_to_jan26\Top_25%_Highest_Correlation_Japan_Bond_Yield.xlsx
Loading correlation CSV file...
Loading data file with multiple sheets...
Processing sheet: Common_10_45Days ...
Processing sheet: Common_11_45Days ...
Processing sheet: Common_12_45Days ...
Processing sheet: Common_13_45Days ...
Processing sheet: Common_14_45Days ...
Processing sheet: Common_15_45Days ...
Processing sheet: Common_16_45Days ...
Processing sheet: Common_17_45Days ...
Processing sheet: Common_18_45Days ...
Processing sheet: Common_19_45Days ...
Processing sheet: Common_1_45Days ...
Processing sheet: Common_20_45Days ...
Processing sheet: Common_21_45Days ...
Processing sheet: Common_22_45Days ...
Processing sheet: Common_23_45Days ...
Processing sheet:

In [7]:
#Top_10_Combo_Highest_Correlation_Japan_Bond_Yield

import pandas as pd
import os

# ---------------- USER INPUT ----------------
input_excel = input("Enter the input Excel file path (multiple sheets): ").strip()
output_excel = input("Enter the output Excel file path: ").strip()

# Ensure output directory exists
os.makedirs(os.path.dirname(output_excel), exist_ok=True)

REQUIRED_COL = "Highest_Correlation_Japan_Bond_Yield"
COMBO_COL = "Combo"
TOP_N = 10

print("Loading input Excel file...")
xls = pd.ExcelFile(input_excel)

valid_sheet_count = 0

with pd.ExcelWriter(output_excel, engine="openpyxl") as writer:
    for sheet in xls.sheet_names:
        print(f"\nProcessing sheet: {sheet}")

        df = pd.read_excel(xls, sheet_name=sheet)

        # ---- Column validation ----
        if REQUIRED_COL not in df.columns or COMBO_COL not in df.columns:
            print(f" Skipped → Missing required columns")
            continue

        # ---- Sort & deduplicate ----
        df_sorted = (
            df.sort_values(REQUIRED_COL, ascending=False)
              .drop_duplicates(subset=COMBO_COL, keep="first")
        )

        # ---- Check top 10 availability ----
        if len(df_sorted) < TOP_N:
            print(f" Skipped → Only {len(df_sorted)} unique combos (need {TOP_N})")
            continue

        # ---- Take top 10 ----
        df_top = df_sorted.head(TOP_N)

        # ---- Write sheet ----
        df_top.to_excel(writer, sheet_name=sheet, index=False)
        valid_sheet_count += 1
        print(" Sheet written")

# ---- Final status ----
if valid_sheet_count == 0:
    print("\n No sheets met the criteria. Output file may be empty.")
else:
    print(f"\n DONE: {valid_sheet_count} valid sheets written successfully.")

Enter the input Excel file path (multiple sheets): D:\feb24_to_jan26\Top_25%_Highest_Correlation_Japan_Bond_Yield.xlsx
Enter the output Excel file path: D:\feb24_to_jan26\Top_10_Combo_Highest_Correlation_Japan_Bond_Yield.xlsx
Loading input Excel file...

Processing sheet: Common_10_45Days
 Sheet written

Processing sheet: Common_11_45Days
 Sheet written

Processing sheet: Common_12_45Days
 Sheet written

Processing sheet: Common_13_45Days
 Sheet written

Processing sheet: Common_14_45Days
 Sheet written

Processing sheet: Common_15_45Days
 Sheet written

Processing sheet: Common_16_45Days
 Sheet written

Processing sheet: Common_17_45Days
 Sheet written

Processing sheet: Common_18_45Days
 Sheet written

Processing sheet: Common_19_45Days
 Sheet written

Processing sheet: Common_1_45Days
 Sheet written

Processing sheet: Common_20_45Days
 Sheet written

Processing sheet: Common_21_45Days
 Sheet written

Processing sheet: Common_22_45Days
 Sheet written

Processing sheet: Common_23_45Da

In [8]:
#only_select_combos

import pandas as pd
import os
import re

# ================= USER INPUT =================
INPUT_FILE  = r"D:\feb24_to_jan26\Top_10_Combo_Highest_Correlation_Japan_Bond_Yield.xlsx"
OUTPUT_FILE = r"D:\feb24_to_jan26\performance_gain_loss\Top_10\only_select_10_combos.xlsx"

# ================= LOAD ALL SHEETS =================
xls = pd.ExcelFile(INPUT_FILE)

os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

def extract_number(sheet_name):
    """
    Extracts the numeric part from Common_<number>_45Days
    """
    match = re.search(r"Common_(\d+)_", sheet_name)
    return int(match.group(1)) if match else float("inf")

# Sort sheet names numerically
sorted_sheets = sorted(xls.sheet_names, key=extract_number)

with pd.ExcelWriter(OUTPUT_FILE, engine="xlsxwriter") as writer:
    for sheet_name in sorted_sheets:
        df = pd.read_excel(xls, sheet_name=sheet_name)

        # Keep ONLY Combo column
        out_df = df[["Combo"]] if "Combo" in df.columns else pd.DataFrame({"Combo": []})

        # Write with SAME sheet name
        out_df.to_excel(writer, sheet_name=sheet_name, index=False)

print("Output file created with sheets ordered serially (numeric), names unchanged.")


Output file created with sheets ordered serially (numeric), names unchanged.


In [9]:
#Combo_With_other_columns

import pandas as pd

# ================= FILE PATHS =================
FIRST_FILE  = r"D:\feb24_to_jan26\performance_gain_loss\Input_date_data_for_performance.xlsx"       
SECOND_FILE = r"D:\feb24_to_jan26\performance_gain_loss\Top_10\only_select_10_combos.xlsx"
OUTPUT_FILE = r"D:\feb24_to_jan26\performance_gain_loss\Top_10\10_Combo_With_other_columns.xlsx"

# ================= LOAD FILES =================
first_xls  = pd.ExcelFile(FIRST_FILE)
second_xls = pd.ExcelFile(SECOND_FILE)

with pd.ExcelWriter(OUTPUT_FILE, engine="xlsxwriter") as writer:

    for i, sheet_name in enumerate(second_xls.sheet_names):

        # Read files
        first_df  = pd.read_excel(first_xls, sheet_name=first_xls.sheet_names[i])
        second_df = pd.read_excel(second_xls, sheet_name=sheet_name)

        # Identify column names safely
        col2_name = first_df.columns[1]
        col3_name = first_df.columns[2]

        # Keep only required columns from first file
        lookup_df = first_df[["Combo", col2_name, col3_name]]

        # ================= COMBO-BASED MERGE =================
        final_df = second_df.merge(
            lookup_df,
            on="Combo",
            how="left"
        )

        # Write output
        final_df.to_excel(writer, sheet_name=sheet_name, index=False)

print(" Error fixed: columns added using Combo matching.")


 Error fixed: columns added using Combo matching.


In [10]:
#Combo_Top_10_total_price

import os
import pandas as pd
import re
import numpy as np

# ================= USER INPUT =================
COMBO_FILE  = input("Enter Combo Excel file path: ").strip()
PRICE_FILE  = input("Enter Price Excel file path: ").strip()
OUTPUT_FILE = input("Enter Output Excel file path: ").strip()
INVESTMENT  = float(input("Enter investment per company (e.g. 50000): "))

# ================= CLEAN OLD OUTPUT =================
if os.path.exists(OUTPUT_FILE):
    os.remove(OUTPUT_FILE)

# ================= LOAD PRICE DATA =================
price_df = pd.read_excel(PRICE_FILE)
price_df["DATE"] = pd.to_datetime(price_df["DATE"], format="%d-%b-%y")
price_df.set_index("DATE", inplace=True)
price_index = price_df.index

# ================= HELPER FUNCTIONS =================
def extract_date(col_name):
    match = re.search(r"\d{4}-\d{2}-\d{2}", col_name)
    if not match:
        raise ValueError(f"Date not found in column name: {col_name}")
    return pd.to_datetime(match.group())

def nearest_date(index, target):
    return index[index.get_indexer([target], method="nearest")[0]]

# ================= PROCESS COMBO FILE =================
combo_xls = pd.ExcelFile(COMBO_FILE)

with pd.ExcelWriter(OUTPUT_FILE, engine="xlsxwriter") as writer:

    for sheet_name in combo_xls.sheet_names:

        combo_df = pd.read_excel(combo_xls, sheet_name=sheet_name)

        # ---- Extract dates from column names ----
        start_date = extract_date(combo_df.columns[1])
        end_date   = extract_date(combo_df.columns[2])

        if start_date not in price_index:
            start_date = nearest_date(price_index, start_date)
        if end_date not in price_index:
            end_date = nearest_date(price_index, end_date)

        start_prices = price_df.loc[start_date]
        end_prices   = price_df.loc[end_date]

        output_rows = []

        for combo in combo_df["Combo"]:
            companies = combo.split(",")

            # ---- Quantity calculation ----
            quantities = INVESTMENT / start_prices[companies].values

            start_total = int(round(INVESTMENT * len(companies)))
            end_total   = int(round(np.sum(quantities * end_prices[companies].values)))

            output_rows.append({
                "Combo": combo,
                "start_price": start_total,
                "final_price": end_total
            })

        out_df = pd.DataFrame(output_rows)

        #  OUTPUT SHEET NAME = INPUT SHEET NAME
        out_df.to_excel(writer, sheet_name=sheet_name[:31], index=False)

print("\n Process completed successfully")
print(f" Output file created at: {OUTPUT_FILE}")


Enter Combo Excel file path: D:\feb24_to_jan26\performance_gain_loss\Top_10\10_Combo_With_other_columns.xlsx
Enter Price Excel file path: D:\feb24_to_jan26\NIFTY50_CLOSE_MASTER.xlsx
Enter Output Excel file path: D:\feb24_to_jan26\performance_gain_loss\Combo_Top_10_total_price.xlsx
Enter investment per company (e.g. 50000): 90000

 Process completed successfully
 Output file created at: D:\feb24_to_jan26\performance_gain_loss\Combo_Top_10_total_price.xlsx


In [11]:
# Combo_Top_10_Gain_Loss

import pandas as pd
import numpy as np

# ===================== USER INPUT =====================
input_file = input("Enter input Excel file path: ").strip()
output_file = input("Enter output Excel file path: ").strip()

# ===================== LOAD INPUT =====================
xls = pd.ExcelFile(input_file)

summary_rows = []
detailed_sheets = {}

# ===================== PROCESS EACH INPUT SHEET =====================
for sheet_name in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name=sheet_name)

    # -------- FIND PRICE COLUMNS DYNAMICALLY --------
    start_col = next((c for c in df.columns if c.startswith("start_price")), None)
    final_col = next((c for c in df.columns if c.startswith("final_price")), None)

    if start_col is None or final_col is None:
        raise KeyError(
            f"Sheet '{sheet_name}' missing start_price or final_price column"
        )

    # ----- ADD CALCULATION COLUMNS -----
    df["Gain_Loss_Amt"] = df[final_col] - df[start_col]
    df["Gain_Amt"] = np.where(df["Gain_Loss_Amt"] > 0, df["Gain_Loss_Amt"], 0)
    df["Loss_Amt"] = np.where(df["Gain_Loss_Amt"] < 0, abs(df["Gain_Loss_Amt"]), 0)
    df["No_of_Gains"] = np.where(df["Gain_Loss_Amt"] > 0, 1, 0)
    df["No_of_Loss"] = np.where(df["Gain_Loss_Amt"] < 0, 1, 0)

    # ----- SELECT COLUMNS FOR DETAILED SHEET -----
    detailed_df = df[
        ["Combo", start_col, final_col, "Gain_Loss_Amt", "Gain_Amt", "Loss_Amt"]
    ]

    detailed_sheets[sheet_name] = detailed_df

    # ----- CREATE SHEET-WISE SUMMARY -----
    summary_rows.append({
        "Sheet_Name": sheet_name,
        "No_of_Gains": int(df["No_of_Gains"].sum()),
        "No_of_Loss": int(df["No_of_Loss"].sum()),
        "Total_Gains_Amt": round(df["Gain_Amt"].sum(), 2),
        "Total_Loss_Amt": round(df["Loss_Amt"].sum(), 2),
        "Net_Gain_Loss_Amt": round(df["Gain_Amt"].sum() - df["Loss_Amt"].sum(), 2)
    })

# ===================== WRITE OUTPUT =====================
with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:

    # Summary first
    summary_df = pd.DataFrame(summary_rows)
    summary_df.to_excel(writer, sheet_name="Summary_All_Sheets", index=False)

    # Detailed sheets
    for sheet_name, df in detailed_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(" Output created successfully")


Enter input Excel file path: D:\feb24_to_jan26\performance_gain_loss\Combo_Top_10_total_price.xlsx
Enter output Excel file path: D:\feb24_to_jan26\performance_gain_loss\Combo_Top_10_Gain_Loss.xlsx
 Output created successfully


In [13]:
#Summary_With_Percentage_Columns

import pandas as pd

# ================= USER INPUT =================
input_file = input("Enter input Excel file path: ").strip()
output_file = input("Enter output Excel file path: ").strip()

# ================= LOAD INPUT  =================
df = pd.read_excel(input_file)

# ================= CONSTANTS =================
CAPITAL = 360000 * 10
TOTAL_COMBOS = 10
TOTAL_WEEKS = len(df)   

# ================= ADD OUTPUT COLUMNS =================
df["%Returns"] = ((df["Net_Gain_Loss_Amt"] / CAPITAL) * 100).round(2)
df["%No_of_Gains"] = ((df["No_of_Gains"] / TOTAL_COMBOS) * 100).round(2)

df["%No_of_Loss"] = ((df["No_of_Loss"] / TOTAL_COMBOS) * 100).round(2)

# ================= OVERALL SUMMARY =================
overall_df = pd.DataFrame({
    "Metric": [
        "%Total_Returns",
        "%Total_No_of_Gains",
        "%Total_No_of_Loss"
    ],
    "Value": [
        round((df["Net_Gain_Loss_Amt"].sum() / (CAPITAL * TOTAL_WEEKS)) * 100,2),
        round((df["No_of_Gains"].sum() / (TOTAL_COMBOS * TOTAL_WEEKS)) * 100,2),
        round((df["No_of_Loss"].sum() / (TOTAL_COMBOS * TOTAL_WEEKS)) * 100,2)
    ]
})

# ================= WRITE OUTPUT =================
with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    df.to_excel(writer, sheet_name="Weekly_Data", index=False)
    overall_df.to_excel(writer, sheet_name="Overall_Summary", index=False)

print("\n Calculation completed successfully.")
print(" Output file created at:", output_file)


Enter input Excel file path: D:\feb24_to_jan26\performance_gain_loss\Top_10\Only_summary_10.xlsx
Enter output Excel file path: D:\feb24_to_jan26\performance_gain_loss\Top_10\summary_10week.xlsx

 Calculation completed successfully.
 Output file created at: D:\feb24_to_jan26\performance_gain_loss\Top_10\summary_10week.xlsx


In [3]:
#NIFTY_Performance_output_in_single_fie

import pandas as pd
import numpy as np

# ================= USER INPUT =================
nifty_file  = input("Enter NIFTY Excel file path: ").strip()
combo_file  = input("Enter Combo Excel file path (multiple sheets): ").strip()
output_file = input("Enter Output Excel file path: ").strip()

# ================= LOAD NIFTY DATA =================
nifty_df = pd.read_excel(nifty_file)
nifty_df['DATE'] = pd.to_datetime(nifty_df['DATE'])
nifty_df.set_index('DATE', inplace=True)
nifty_df.sort_index(inplace=True)

# ---------- Helper ----------
def get_nearest_nifty_value(target_date):
    if target_date in nifty_df.index:
        return nifty_df.loc[target_date, 'NIFTY']

    nearest_idx = nifty_df.index.get_indexer(
        [target_date], method='nearest'
    )[0]

    return nifty_df.iloc[nearest_idx]['NIFTY']

# ================= PROCESS MULTIPLE SHEETS =================
xls = pd.ExcelFile(combo_file)

summary_rows = []

for sheet_name in xls.sheet_names:

    df = pd.read_excel(xls, sheet_name=sheet_name)

    # Identify start & end date columns
    start_col = [c for c in df.columns if c.startswith("start_date")][0]
    end_col   = [c for c in df.columns if c.startswith("end_date")][0]

    # Extract dates from column headers
    start_date = pd.to_datetime(start_col.replace("start_date_", ""))
    end_date   = pd.to_datetime(end_col.replace("end_date_", ""))

    # Fetch NIFTY values
    start_nifty = get_nearest_nifty_value(start_date)
    end_nifty   = get_nearest_nifty_value(end_date)

    # Calculate %Returns 
    returns = (
        (((end_nifty - start_nifty) / start_nifty) * 100).round(2)
        if start_nifty != 0 else np.nan
    )

    summary_rows.append({
        'Sheet_Name': sheet_name,
        'NIFTY_Start': start_nifty,
        'NIFTY_Final': end_nifty,
        '%Returns': returns
    })

# ================= SAVE SUMMARY FILE =================
summary_df = pd.DataFrame(summary_rows)
summary_df.to_excel(output_file, index=False)

print("\n summary file generated successfully.")


Enter NIFTY Excel file path: D:\feb24_to_jan26\NIFTY_CLOSE.xlsx
Enter Combo Excel file path (multiple sheets): D:\feb24_to_jan26\performance_gain_loss\10_Combo_With_other_columns.xlsx
Enter Output Excel file path: D:\feb24_to_jan26\performance_gain_loss\NIFTY_Performance_Single_file_86.xlsx

 summary file generated successfully.


In [None]:
#returns_comparison_on_our_data_nifty_data

import pandas as pd

# ================= FILE PATHS =================
nifty_file = r"C:\swarupa\performance_gain_loss\Top_10_Combos\NIFTY_Performance_202_Weeks.xlsx"
ourdata_file = r"C:\Users\Swarupa\Downloads\Top_10_summary_202week.xlsx"

# ================= COLUMN NAME =================
return_col = "%Returns"   # change here if needed

# ================= LOAD NIFTY DATA =================
nifty_df = pd.read_excel(nifty_file)

# Ensure numeric
nifty_df[return_col] = pd.to_numeric(nifty_df[return_col], errors="coerce")

nifty_gain_count = (nifty_df[return_col] > 0).sum()
nifty_loss_count = (nifty_df[return_col] <= 0).sum()
nifty_total = nifty_df[return_col].count()

# ================= LOAD OUR DATA (ALL SHEETS) =================
xls = pd.ExcelFile(ourdata_file)

our_dfs = []
for sheet in xls.sheet_names:
    df = pd.read_excel(ourdata_file, sheet_name=sheet)
    if return_col in df.columns:
        df[return_col] = pd.to_numeric(df[return_col], errors="coerce")
        df["Sheet_Name"] = sheet
        our_dfs.append(df)

our_df = pd.concat(our_dfs, ignore_index=True)

our_gain_count = (our_df[return_col] > 0).sum()
our_loss_count = (our_df[return_col] <= 0).sum()
our_total = our_df[return_col].count()

# ================= SUMMARY TABLE =================
summary = pd.DataFrame({
    "Metric": ["Total Observations", "Positive Returns Count", "Negative / Zero Returns Count"],
    "NIFTY": [nifty_total, nifty_gain_count, nifty_loss_count],
    "OUR_DATA": [our_total, our_gain_count, our_loss_count]
})

print("\n===== PERFORMANCE COMPARISON =====\n")
print(summary)

# ================= OPTIONAL: SAVE RESULT =================
output_file = r"C:\Users\Swarupa\Downloads\Return_Comparison_Summary_202.xlsx"
summary.to_excel(output_file, index=False)

print(f"\nSummary saved to: {output_file}")


In [7]:
#NIFTY_Performance_output_in_multiple_sheets

import pandas as pd
import numpy as np

# ================= USER INPUT =================
nifty_file  = input("Enter NIFTY Excel file path: ").strip()
combo_file  = input("Enter Combo Excel file path (multiple sheets): ").strip()
output_file = input("Enter Output Excel file path: ").strip()

# ================= LOAD NIFTY DATA =================
nifty_df = pd.read_excel(nifty_file, usecols=['DATE', 'NIFTY'])
nifty_df['DATE'] = pd.to_datetime(nifty_df['DATE'])
nifty_df = nifty_df.sort_values('DATE').set_index('DATE')

nifty_index  = nifty_df.index
nifty_values = nifty_df['NIFTY'].values

# ---------- FAST NEAREST VALUE FUNCTION ----------
def get_nearest_nifty_value(target_date):
    pos = nifty_index.searchsorted(target_date)

    if pos == 0:
        return nifty_values[0]
    if pos == len(nifty_index):
        return nifty_values[-1]

    before = nifty_index[pos - 1]
    after  = nifty_index[pos]

    return nifty_values[pos - 1] if abs(target_date - before) <= abs(after - target_date) else nifty_values[pos]

# ================= PROCESS MULTIPLE SHEETS =================
xls = pd.ExcelFile(combo_file)

with pd.ExcelWriter(output_file, engine='openpyxl') as writer:

    sheet_counter = 1   

    for sheet_name in xls.sheet_names:

        df = pd.read_excel(xls, sheet_name=sheet_name)

        # Identify start & end date columns
        start_cols = [c for c in df.columns if c.startswith("start_date_")]
        end_cols   = [c for c in df.columns if c.startswith("end_date_")]

        if not start_cols or not end_cols:
            continue

        # Extract dates from column names
        start_date = pd.to_datetime(start_cols[0].replace("start_date_", ""))
        end_date   = pd.to_datetime(end_cols[0].replace("end_date_", ""))

        # Fetch NIFTY values
        start_nifty = get_nearest_nifty_value(start_date)
        end_nifty   = get_nearest_nifty_value(end_date)

        # Calculate %Returns
        returns = round(((end_nifty - start_nifty) / start_nifty) * 100, 2)

        # Create output dataframe
        output_df = pd.DataFrame([{
            'NIFTY_Start': start_nifty,
            'NIFTY_Final': end_nifty,
            '%Returns': returns
        }])

        #  New sheet name format
        output_sheet_name = f"Common_{sheet_counter}_45Days"

        output_df.to_excel(writer, sheet_name=output_sheet_name, index=False)

        sheet_counter += 1

print("\n Output generated with sheet names like: Common_1_45Days, Common_2_45Days, ...")


Enter NIFTY Excel file path: D:\feb24_to_jan26\NIFTY_CLOSE.xlsx
Enter Combo Excel file path (multiple sheets): D:\feb24_to_jan26\performance_gain_loss\Input_date_data_for_performance.xlsx
Enter Output Excel file path: D:\feb24_to_jan26\performance_gain_loss\nifty_performance_86.xlsx

 Output generated with sheet names like: Common_1_45Days, Common_2_45Days, ...


In [10]:
#METRIC_PERFORMANCE

import pandas as pd
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment

# ================= USER INPUT =================
input_file_1 = input("Enter FIRST input Excel file (multi-sheet combos): ").strip()
input_file_2 = input("Enter SECOND input Excel file (METRICS): ").strip()
nifty_file   = input("Enter NIFTY Excel file (multi-sheet): ").strip()
output_file  = input("Enter output Excel file path: ").strip()

INVESTMENT_AMOUNT = 90000

# ================= STYLES =================
thin = Side(style="thin")
border = Border(left=thin, right=thin, top=thin, bottom=thin)

header_font  = Font(bold=True, size=13)
bold_font    = Font(bold=True)
center_align = Alignment(horizontal="center", vertical="center")
total_fill   = PatternFill("solid", fgColor="FFFF99")

# ================= HELPERS =================
def normalize(name):
    return name.strip().lower().replace(" ", "")

def apply_borders(ws, start_row, start_col, rows, cols):
    for r in range(start_row, start_row + rows):
        for c in range(start_col, start_col + cols):
            ws.cell(r, c).border = border

def add_merged_header(ws, row, start_col, col_count, title):
    ws.merge_cells(
        start_row=row,
        start_column=start_col,
        end_row=row,
        end_column=start_col + col_count - 1
    )
    cell = ws.cell(row, start_col, title)
    cell.font = header_font
    cell.alignment = center_align

# ================= CORE LOGIC =================
def create_combo_summary(df):
    companies = df["Combo"].dropna().str.split(",").explode().str.strip()
    summary = companies.value_counts().reset_index()
    summary.columns = ["Company", "Count"]

    summary.insert(0, "Sr.No.", range(1, len(summary) + 1))
    summary["Invest"] = INVESTMENT_AMOUNT
    summary["Value"]  = summary["Count"] * INVESTMENT_AMOUNT

    total_value = summary["Value"].sum()
    summary["%Alloc"] = summary["Value"] / total_value

    total_row = pd.DataFrame({
        "Sr.No.": [""],
        "Company": ["TOTAL_INVESTMENT"],
        "Count": [""],
        "Invest": [""],
        "Value": [total_value],
        "%Alloc": [""]
    })

    return pd.concat([summary, total_row], ignore_index=True)

def get_metrics_table(metrics_df, sheet):
    cols = [
        "No_of_Gains", "No_of_Loss",
        "Total_Gains_Amt", "Total_Loss_Amt",
        "Net_Gain_Loss_Amt", "%Returns"
    ]
    row = metrics_df[metrics_df["Sheet_Name"] == sheet]
    if row.empty:
        return pd.DataFrame(columns=["Metric", "Value"])

    out = row[cols].T.reset_index()
    out.columns = ["Metric", "Value"]
    return out

def get_vertical_nifty_table(nifty_df):
    if nifty_df.empty:
        return pd.DataFrame(columns=["Metric", "Value"])

    first_row = nifty_df.iloc[0]
    out = first_row.reset_index()
    out.columns = ["Metric", "Value"]
    return out

# ================= LOAD FILES =================
combo_xls  = pd.ExcelFile(input_file_1)
metrics_df = pd.read_excel(input_file_2)
nifty_xls  = pd.ExcelFile(nifty_file)

nifty_map = {normalize(s): s for s in nifty_xls.sheet_names}

# ================= WRITE OUTPUT =================
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:

    for sheet in combo_xls.sheet_names:
        df = pd.read_excel(combo_xls, sheet)
        if "Combo" not in df.columns:
            continue

        combo_table   = create_combo_summary(df)
        metrics_table = get_metrics_table(metrics_df, sheet)

        norm = normalize(sheet)
        if norm in nifty_map:
            nifty_df = pd.read_excel(nifty_xls, nifty_map[norm])
            nifty_table = get_vertical_nifty_table(nifty_df)
        else:
            nifty_table = pd.DataFrame(columns=["Metric", "Value"])

        # ========== LAYOUT ==========
        col_combo   = 1
        col_metrics = col_combo + len(combo_table.columns) + 2
        col_nifty   = col_metrics

        row_combo   = 3
        row_metrics = 3
        row_nifty   = row_metrics + len(metrics_table) + 4

        # ========== WRITE TABLES ==========
        combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
        metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
        nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)

        ws = writer.book[sheet]

        # ========== HEADERS ==========
        add_merged_header(ws, 1, col_combo, len(combo_table.columns), sheet)
        add_merged_header(ws, 1, col_metrics, len(metrics_table.columns), "METRICS")
        add_merged_header(ws, row_nifty-1, col_nifty, len(nifty_table.columns), "NIFTY_SUMMARY")

        # ========== BORDERS ==========
        apply_borders(ws, row_combo, col_combo, len(combo_table)+1, len(combo_table.columns))
        apply_borders(ws, row_metrics, col_metrics, len(metrics_table)+1, len(metrics_table.columns))
        apply_borders(ws, row_nifty, col_nifty, len(nifty_table)+1, len(nifty_table.columns))

        # ========== TOTAL HIGHLIGHT ==========
        for r in range(row_combo+1, row_combo + len(combo_table)+1):
            if ws.cell(r, col_combo+1).value == "TOTAL_INVESTMENT":
                for c in range(col_combo, col_combo + len(combo_table.columns)):
                    ws.cell(r, c).fill = total_fill
                    ws.cell(r, c).font = bold_font

print(" Excel created successfully.")


Enter FIRST input Excel file (multi-sheet combos): D:\feb24_to_jan26\performance_gain_loss\Combo_Top_10_Gain_Loss.xlsx
Enter SECOND input Excel file (METRICS): C:\Users\Swarupa\Downloads\Top_10_summary_86Weeks.xlsx
Enter NIFTY Excel file (multi-sheet): D:\feb24_to_jan26\performance_gain_loss\nifty_performance_86.xlsx
Enter output Excel file path: D:\feb24_to_jan26\performance_gain_loss\METRIC_PERFORMACE_86_WEEKS.xlsx


  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer, sheet, index=False, startrow=row_metrics-1, startcol=col_metrics-1)
  nifty_table.to_excel(writer, sheet, index=False, startrow=row_nifty-1, startcol=col_nifty-1)
  combo_table.to_excel(writer, sheet, index=False, startrow=row_combo-1, startcol=col_combo-1)
  metrics_table.to_excel(writer,

 Excel created successfully.


In [1]:
#Equity_Curve_dates

import pandas as pd
from dateutil.relativedelta import relativedelta

# ================= USER INPUT =================
input_file  = r"D:\feb24_to_jan26\NIFTY50_CLOSE_MASTER.xlsx"
output_file = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\Weekly_Rolling_Dates.xlsx"

# ================= LOAD EXCEL =================
xls = pd.ExcelFile(input_file)

week_ranges = []

# ================= PROCESS ALL SHEETS =================
for sheet_name in xls.sheet_names:
    df = pd.read_excel(input_file, sheet_name=sheet_name)

    if 'DATE' not in df.columns:
        continue

    df['DATE'] = pd.to_datetime(df['DATE'])
    df = df.sort_values('DATE').reset_index(drop=True)

    # ---- Skip first 3 months ----
    first_date = df['DATE'].iloc[0]
    skip_date = first_date + relativedelta(months=3)
    df = df[df['DATE'] >= skip_date]

    if df.empty:
        continue

    start_date = df['DATE'].iloc[0]
    end_date   = df['DATE'].iloc[-1]

    current_start = start_date

    while current_start <= end_date:
        current_end = min(current_start + pd.Timedelta(days=6), end_date)
        week_ranges.append((current_start.date(), current_end.date()))
        current_start += pd.Timedelta(days=7)

# ================= PREPARE DATE ROW =================
date_row = []
for start, end in week_ranges:
    date_row.extend([f"Start_{start}", f"End_{end}"])

date_df = pd.DataFrame([date_row])

# ================= WRITE TO EXCEL =================
with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    date_df.to_excel(
        writer,
        sheet_name="Weekly_Data",
        index=False,
        header=False,
        startrow=1   
    )

    workbook  = writer.book
    worksheet = writer.sheets["Weekly_Data"]

    # ---- Merge WK headers  ----
    col = 0
    for i in range(len(week_ranges)):
        worksheet.merge_range(0, col, 0, col + 1, f"WK_{i+1}")
        col += 2

    # Optional formatting
    worksheet.set_row(0, None, workbook.add_format({"align": "center", "bold": True}))
    worksheet.set_row(1, None, workbook.add_format({"align": "center"}))
    worksheet.set_column(0, col, 18)

print(" Excel created correctly with WK headers and date values.")


 Excel created correctly with WK headers and date values.


In [2]:
#Equity_Curve_Combo_with_Dates

import pandas as pd
from openpyxl import load_workbook

# ================= USER INPUT =================
file_1 = input("Enter first Excel file path (multiple sheets): ").strip()
file_2 = input("Enter second Excel file path (single-sheet template): ").strip()
output_file = input("Enter output Excel file path: ").strip()

# ================= READ ALL COMBOS FROM FILE 1 =================
all_sheets = pd.read_excel(file_1, sheet_name=None)

rows = []

for sheet_name, df in all_sheets.items():
    if 'Combo' not in df.columns:
        continue

    combos = df['Combo'].dropna().tolist()

    for i, combo in enumerate(combos):
        rows.append([
            sheet_name if i == 0 else '',
            combo
        ])

# ================= LOAD SECOND FILE =================
wb = load_workbook(file_2)
ws = wb.active  

# ================= INSERT TWO COLUMNS BEFORE WK_1 =================
ws.insert_cols(1, 2)

# ================= ADD HEADERS =================
ws['A1'] = 'Sheet_Name'
ws['B1'] = 'Combo'

# ================= WRITE DATA =================
start_row = 3  

for i, row in enumerate(rows):
    ws.cell(row=start_row + i, column=1).value = row[0] 
    ws.cell(row=start_row + i, column=2).value = row[1]  

# ================= SAVE OUTPUT =================
wb.save(output_file)

print(" output created ")


Enter first Excel file path (multiple sheets): D:\feb24_to_jan26\performance_gain_loss\only_select_10_combos.xlsx
Enter second Excel file path (single-sheet template): D:\feb24_to_jan26\performance_gain_loss\Equity Curve\Weekly_Rolling_Dates.xlsx
Enter output Excel file path: D:\feb24_to_jan26\performance_gain_loss\Equity Curve\Weekly_Combos_with_dates.xlsx
 output created 


In [4]:
#final_quantity

import pandas as pd
import re

# ================= USER INPUT =================
combo_file  = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\Weekly_Combos_with_dates.xlsx"
price_file  = r"D:\feb24_to_jan26\NIFTY50_CLOSE_MASTER.xlsx"
output_file = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\final_quantity_output.xlsx"

INVEST_PER_COMPANY = 20000

# ================= LOAD FILES =================
combo_df = pd.read_excel(combo_file)
price_df = pd.read_excel(price_file)

# --- Prepare price data ---
price_df['DATE'] = pd.to_datetime(price_df['DATE'])
price_df.set_index('DATE', inplace=True)

# ================= FIND START DATE COLUMNS =================
start_cols = [
    col for col in combo_df.columns
    if col.startswith("Start_")
]

# ================= QUANTITY CALCULATION =================
for col in start_cols:

    # Extract date safely from column name
    match = re.search(r'Start_(\d{4}-\d{2}-\d{2})', col)
    if not match:
        continue

    start_date = pd.to_datetime(match.group(1))

    new_values = []

    for _, row in combo_df.iterrows():

        companies = [c.strip() for c in row['Combo'].split(',')]
        quantities = []

        if start_date in price_df.index:
            for company in companies:
                if company in price_df.columns:
                    price = price_df.at[start_date, company]
                    if pd.notna(price) and price > 0:
                        qty = round(INVEST_PER_COMPANY / price, 2)
                        quantities.append(str(qty))
                    else:
                        quantities.append("")
                else:
                    quantities.append("")
        else:
            quantities = [""] * len(companies)

        # Put all 4 quantities in ONE cell 
        new_values.append(",".join(quantities))

    # Write ONLY to Start-date column
    combo_df[col] = new_values

# ================= SAVE OUTPUT =================
combo_df.to_excel(output_file, index=False)

print(" Quantities calculated successfully")
print(" Start columns updated")
print(" End columns unchanged")


 Quantities calculated successfully
 Start columns updated
 End columns unchanged


In [12]:
#Equity_Curve_Excel

import pandas as pd
import numpy as np
import re

# ================= FILES =================
INPUT_FILE  = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\final_quantity_output.xlsx"
PRICE_FILE  = r"D:\feb24_to_jan26\NIFTY50_CLOSE_MASTER.xlsx"
OUTPUT_FILE = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\equity_curve_excel.xlsx"


START_PER_SUBGROUP = 80000         
SUBGROUPS_PER_WEEK = 10
START_TOTAL = START_PER_SUBGROUP * SUBGROUPS_PER_WEEK  
MAX_ROWS = 6

# ================= LOAD DATA =================
raw_df = pd.read_excel(INPUT_FILE)
price_df = pd.read_excel(PRICE_FILE)

price_df["DATE"] = pd.to_datetime(price_df["DATE"])
price_df = price_df.sort_values("DATE").set_index("DATE")

ALL_COLS = raw_df.columns.tolist() + ["Buffer"]

# ================= PRICE LOOKUP =================
def get_price(company, date):
    if company not in price_df.columns:
        return np.nan
    idx = price_df.index.searchsorted(date, side="right") - 1
    return price_df.iloc[idx][company] if idx >= 0 else np.nan

# ================= DETECT START COLUMNS =================
start_cols = sorted(
    [c for c in raw_df.columns if c.startswith("Start_")],
    key=lambda x: pd.to_datetime(x.split("_")[1])
)

BASE_START = pd.to_datetime(start_cols[0].split("_")[1])
TOTAL_WEEKS = len(start_cols)

# ================= PRECOMPUTE WEEK DATA  =================

week_data = {}

for w in range(1, TOTAL_WEEKS + 1):

    wk_start = BASE_START + pd.Timedelta(days=7 * (w - 1))
    wk_end   = wk_start + pd.Timedelta(days=6)
    sc = f"Start_{wk_start.date()}"

    # ONLY 10 combos of THIS WEEK
    week_df = raw_df[raw_df["Sheet_Name"] == f"Common_{w}_45Days"]

    qty_rows = []
    end_val = 0.0

    for _, r in week_df.iterrows():

        cell = r.get(sc, np.nan)
        if pd.isna(cell):
            continue

        companies = [c.strip() for c in r["Combo"].split(",")]
        qtys = [float(x) for x in str(cell).split(",") if x.strip()]

        qty_rows.append((companies, qtys))

        for c, q in zip(companies, qtys):
            p = get_price(c, wk_end)
            if not np.isnan(p):
                end_val += q * p

    week_data[w] = {
        "qty": qty_rows,           
        "end": round(end_val, 2)
    }

# ================= BUILD OUTPUT =================
output_rows = []

for w in range(1, TOTAL_WEEKS + 1):

    wkN_start = BASE_START + pd.Timedelta(days=7 * (w - 1))
    wkN_end   = wkN_start + pd.Timedelta(days=6)

    sc = f"Start_{wkN_start.date()}"
    ec = f"End_{wkN_end.date()}"

    min_w = max(1, w - MAX_ROWS + 1)

    for idx, src_w in enumerate(range(w, min_w - 1, -1)):

        row = {c: np.nan for c in ALL_COLS}
        row["Sheet_Name"] = f"WK_{w}"
        row["Combo"] = "TOTAL"

        # ----- START VALUE -----
        if idx == 0:
            start_val = START_TOTAL
        else:
            start_val = week_data[src_w]["end"]

        row[sc] = start_val

        # ----- END VALUE -----
        end_val = 0.0
        for companies, qtys in week_data[src_w]["qty"]:
            for c, q in zip(companies, qtys):
                p = get_price(c, wkN_end)
                if not np.isnan(p):
                    end_val += q * p

        end_val = round(end_val, 2)
        row[ec] = end_val

        # ----- BUFFER -----
        row["Buffer"] = round(end_val - start_val, 2)

        output_rows.append(row)

# ================= SAVE =================
final_df = pd.DataFrame(output_rows, columns=ALL_COLS)
final_df.to_excel(OUTPUT_FILE, index=False)

print(" FINAL STRICT-WEEK OUTPUT GENERATED SUCCESSFULLY")


 FINAL STRICT-WEEK OUTPUT GENERATED SUCCESSFULLY


In [4]:
#Equity_Curve_Plotting

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# ================= USER INPUT =================
INPUT_FILE = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\equity_curve_excel.xlsx"
OUTPUT_PDF = r"D:\feb24_to_jan26\performance_gain_loss\Equity Curve\Equity_Curve_6Week_Rolling_WITH_BUFFER.pdf"
WINDOW = 6
BUFFER_COL = "Buffer"   

# ================= LOAD DATA =================
df = pd.read_excel(INPUT_FILE)

# ================= DETECT START / END PAIRS =================
cols = list(df.columns)
wk_pairs = []
i = 0
while i < len(cols) - 1:
    if str(cols[i]).startswith("Start_") and str(cols[i + 1]).startswith("End_"):
        wk_pairs.append((cols[i], cols[i + 1]))
        i += 2
    else:
        i += 1

# ================= BUILD WK BLOCKS & BUFFERS =================
wk_blocks = []
wk_buffers = []

for start_col, end_col in wk_pairs:
    block = df[[start_col, end_col]].dropna(how="all").reset_index(drop=True)
    wk_blocks.append(block)

    if BUFFER_COL in df.columns:
        buf_series = df.loc[
            df[start_col].notna() | df[end_col].notna(),
            BUFFER_COL
        ].dropna()
        wk_buffers.append(buf_series.iloc[0] if not buf_series.empty else None)
    else:
        wk_buffers.append(None)

# ================= PLOTTING =================
with PdfPages(OUTPUT_PDF) as pdf:

    for win_start in range(len(wk_blocks) - WINDOW + 1):

        blocks = wk_blocks[win_start: win_start + WINDOW]
        buffers = wk_buffers[win_start: win_start + WINDOW]

        x_labels = [wk_pairs[win_start + i][1] for i in range(WINDOW)]
        x_dates = [pd.to_datetime(c.replace("End_", "")) for c in x_labels]

        fig, (ax_top, ax_bot) = plt.subplots(
            2, 1, figsize=(13, 9),
            gridspec_kw={"height_ratios": [3, 1]},
            sharex=True
        )

        # ================= TOP: EQUITY CURVES =================
        for line in range(WINDOW):

            y_vals = []
            x_vals = []

            # ---- START ----
            if line < len(blocks[line]):
                y_vals.append(blocks[line].iloc[0, 0])
                x_vals.append(x_dates[line])
            else:
                continue

            # ---- END  ----
            for wk in range(line + 1, WINDOW):
                if len(blocks[wk]) == 0:
                    continue
                row_idx = min(line, len(blocks[wk]) - 1)
                y_vals.append(blocks[wk].iloc[row_idx, 1])
                x_vals.append(x_dates[wk])

            ax_top.plot(
                x_vals,
                y_vals,
                marker="o",
                linewidth=2,
                label=f"Line-{line+1} (Start WK-{win_start+line+1})"
            )

        ax_top.set_title(
            f"6-Week Rolling Equity Curve (WK {win_start+1} → WK {win_start+WINDOW})"
        )
        ax_top.set_ylabel("Portfolio Value")
        ax_top.grid(True)
        ax_top.legend(loc="upper left", fontsize=9)

        # ================= BOTTOM: BUFFER =================
        buf_x = []
        buf_y = []

        for i, val in enumerate(buffers):
            if val is not None:
                buf_x.append(x_dates[i])
                buf_y.append(val)

        ax_bot.plot(
            buf_x,
            buf_y,
            marker="o",
            linewidth=2,
            color="black",
            label="Buffer"
        )
        ax_bot.axhline(
            0,
            linestyle="--",
            linewidth=1,
            label="Zero Line"
        )

        ax_bot.set_ylabel("Buffer")
        ax_bot.set_xlabel("Week (Excel End Date Headers)")
        ax_bot.grid(True)
        ax_bot.legend(loc="upper left", fontsize=9)

        ax_bot.set_xticks(x_dates)
        ax_bot.set_xticklabels(x_labels, rotation=45)

        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

print(" Equity curves + Buffer generated successfully")
print(f" Output file: {OUTPUT_PDF}")


 Equity curves + Buffer generated successfully
 Output file: D:\feb24_to_jan26\performance_gain_loss\Equity Curve\Equity_Curve_6Week_Rolling_WITH_BUFFER.pdf
