### Step 1:- It reads an Excel file containing company names and their oldest prices (01-01-2020).
It then:
1) Takes an investment amount as input (e.g., ₹50,000 per company).

2) Generates all possible 4-company combinations from the list.

3) For each combination, it:

* Calculates how many shares (quantity) can be bought for each company using
Quantity = Investment / Oldest Price

* Replaces invalid or zero prices with "NA".

4) Stores each combo as:

* Combo → list of 4 company names

* Quantity → corresponding share quantities

5) Saves the final result to a new Excel file:

* 4company_combo_qty.xlsx

In simple terms: It creates all possible 4-company portfolios and calculates how many shares you could buy for each company using the oldest price and a fixed investment amount.




In [4]:
#4company_combo_oldest_day_qty

import pandas as pd
from itertools import combinations

input_file = r"C:\Users\Swarupa\Desktop\Code Details\50 comp_oldest_price (input file).xlsx"

INVESTMENT = float(input("Enter Investment amount per company (example 50000) : "))

df = pd.read_excel(input_file)

company_col = 'Company Name '
price_col   = '01-01-2020(oldest price)'

df = df[[company_col, price_col]]

rows = []

for combo in combinations(df.index, 4):
    comps = df.loc[list(combo)]
    combo_name = ",".join(comps[company_col])

    qty_values = []
    for _, r in comps.iterrows():
        price = r[price_col]
        if price == 0 or pd.isna(price):
            qty_values.append("NA")
        else:
            qty_values.append(f"{INVESTMENT/price:.2f}")

    qty_text = ",".join(qty_values)   # only quantities

    rows.append([combo_name, qty_text])

out_df = pd.DataFrame(rows, columns=['Combo','Quantity'])

output_file = r"C:\Users\Swarupa\Desktop\Code Details\4company_combo_qty.xlsx"
out_df.to_excel(output_file, index=False)

print("\nDone! File saved at:", output_file)


Enter Investment amount per company (example 50000) : 50000

Done! File saved at: C:\Users\Swarupa\Desktop\Code Details\4company_combo_qty.xlsx


### Step 2:-  It calculates the total price for each 4-company combo on each date based on company prices and quantities. It does the following:
1) Loads price data for 50 companies over different dates and company combos with their quantities.

2) Matches the company names in the combos with their corresponding prices for each date.

3) Calculates total prices for each combo by multiplying the price of each company by its quantity for that date.

4) Saves the results in a CSV file with columns: Date, Combo, Price per Combo, and Total Price.



In [3]:
# 4Combo_TotalPrices

import pandas as pd
import numpy as np
from tqdm import tqdm
import csv

# ----------------------------------------
# Input paths
# ----------------------------------------
file_prices = r"C:\Users\Swarupa\Desktop\Code Details\50 comp data (input file).xlsx"
file_combos = r"C:\Users\Swarupa\Desktop\Code Details\4company_combo_oldest_day_qty.xlsx"
output_file = r"C:\Users\Swarupa\Desktop\Code Details\output_result.csv"

# ----------------------------------------
# Load price data
# ----------------------------------------
df_price = pd.read_excel(file_prices)
date_col = df_price.columns[0]
company_cols = df_price.columns[1:]

dates = df_price[date_col].values
price_matrix = df_price[company_cols].values   

# ----------------------------------------
# Load combos
# ----------------------------------------
df_combo = pd.read_excel(file_combos)
df_combo.columns = df_combo.columns.str.strip()

# Split companies
df_combo[['C1','C2','C3','C4']] = df_combo['Combo'].str.split(',', expand=True)

# Split quantities 
def split_qty(qty):
    out = []
    for x in str(qty).split(","):
        x = x.strip()
        try:
            out.append(float(x))
        except:
            out.append(0.0)
    while len(out) < 4:
        out.append(0.0)
    return out[:4]

df_combo[['Q1','Q2','Q3','Q4']] = df_combo['Quantity'].apply(split_qty).tolist()

# ----------------------------------------
# Prepare index lookup for companies
# ----------------------------------------
col_index = {col: i for i, col in enumerate(company_cols)}

# Convert combos to index rows
combo_indices = df_combo[['C1','C2','C3','C4']].applymap(lambda x: col_index.get(x, -1)).values
qty_matrix = df_combo[['Q1','Q2','Q3','Q4']].values

num_dates = len(dates)
num_combos = len(df_combo)

# ----------------------------------------
# CSV Header
# ----------------------------------------
with open(output_file, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["DATE", "Combo", "Price_Combo", "TotalPrice"])

# ----------------------------------------
# PROCESS WITH PROGRESS BAR
# ----------------------------------------
total_ops = num_dates * num_combos

with open(output_file, "a", newline="") as f:
    writer = csv.writer(f)
    pbar = tqdm(total=total_ops, desc="Processing", unit="rows")

    for d_idx, date_value in enumerate(dates):

        # Extract all prices for this date 
        price_row = price_matrix[d_idx]          

        # Get 4 prices for all combos
        p = price_row[combo_indices]             

        # Compute total price
        total_price = np.sum(p * qty_matrix, axis=1)

        # Write each combo for this date
        for i in range(num_combos):
            writer.writerow([
                date_value,
                df_combo["Combo"].iloc[i],
                list(p[i]),    
                total_price[i]
            ])
            pbar.update(1)

    pbar.close()

print("\nFAST output generated at:")
print(output_file)


  combo_indices = df_combo[['C1','C2','C3','C4']].applymap(lambda x: col_index.get(x, -1)).values
Processing: 100%|█████████████████████████████████████████████████| 322650300/322650300 [1:58:36<00:00, 45335.90rows/s]


FAST output generated at:
C:\Users\Swarupa\Desktop\Code Details\output_result.csv





In [2]:
import pandas as pd

csv_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\4company_combo_total_price.csv"

df = pd.read_csv(csv_file)

print(df.head())          # show first 5 rows

         DATE                                         Combo  \
0  2020-01-01  Adani Port,Apollo Hosp,Asian Paint,Axis bank   
1  2020-01-02  Adani Port,Apollo Hosp,Asian Paint,Axis bank   
2  2020-01-03  Adani Port,Apollo Hosp,Asian Paint,Axis bank   
3  2020-01-06  Adani Port,Apollo Hosp,Asian Paint,Axis bank   
4  2020-01-07  Adani Port,Apollo Hosp,Asian Paint,Axis bank   

                          Price_Combo   TotalPrice  
0    [377.65, 1426.35, 1793.2, 748.7]  199987.0295  
1  [383.15, 1494.65, 1790.65, 756.95]  203588.9855  
2     [382.5, 1486.1, 1751.4, 742.95]  201174.0380  
3    [380.2, 1462.5, 1707.15, 723.25]  197493.0820  
4     [384.8, 1478.7, 1724.4, 725.75]  199317.8120  


### Step 3:- It processes a CSV file of 4-company combos and calculates:
1) EURINR Ratio: For each combo on each date, it computes the ratio of TotalPrice to the EURINR exchange rate.

2) 50-Day Moving Average (DMA): It calculates a rolling 50-day average of the EURINR Ratio for each combo, keeping a continuous history.

3) It processes the data in chunks to handle large files efficiently and appends the results (Date, Combo, EURINR Ratio, 50DMA) to a new CSV file.

The output is saved in a CSV file with the calculated EURINR Ratio and 50DMA for each combo across the dates.

In [7]:
# 4Company_EURINR_Ratio_50DMA

import pandas as pd
import numpy as np
from tqdm import tqdm

# ---------------------------------------------------
# INPUT FILES
# ---------------------------------------------------
file_csv = r"C:\Users\Swarupa\Desktop\Code Details\output_result.csv"
file_eurinr = r"C:\Users\Swarupa\Desktop\Code Details\EURINR Values(input file).xlsx"
final_output = r"C:\Users\Swarupa\Desktop\Code Details\output_with_EURINR_50DMA_EURINR.csv"

# ---------------------------------------------------
# LOAD EURINR VALUES
# ---------------------------------------------------
eur = pd.read_excel(file_eurinr)
eur["DATE"] = pd.to_datetime(eur["DATE"])

# Convert for fast lookup
eur_dict = dict(zip(eur["DATE"], eur["EURINR"]))

# ---------------------------------------------------
# PARAMETERS
# ---------------------------------------------------
chunksize = 50000
state = {}             
header_written = False

# ---------------------------------------------------
# PROCESS CSV IN STREAMING MODE
# ---------------------------------------------------
print("\nProcessing file...\n")

for chunk in tqdm(pd.read_csv(file_csv, chunksize=chunksize, parse_dates=["DATE"]),
                  desc="Processing", unit="rows"):

    # Sort chunk
    chunk.sort_values(["Combo", "DATE"], inplace=True)

    # Correct EURINR ratio
    chunk["EURINR"] = chunk["DATE"].map(eur_dict)
    chunk["EURINR_Ratio"] = chunk["TotalPrice"] / chunk["EURINR"]

    # ------------------------------------------------
    # 50-DAY DMA CALCULATION
    # ------------------------------------------------
    DMA_list = []

    for combo, subdf in chunk.groupby("Combo"):
        past = state.get(combo, [])

        series = np.concatenate([past, subdf["EURINR_Ratio"].to_numpy()])

        roll = (
            pd.Series(series)
            .rolling(window=50, min_periods=50)
            .mean()
            .to_numpy()
        )

        DMA_current = roll[len(past):]

        DMA_list.append(pd.Series(DMA_current, index=subdf.index))

        # keep last 49 values for continuity
        state[combo] = series[-49:].tolist()

    # merge results
    chunk["50DMA_EURINR"] = pd.concat(DMA_list).sort_index()

    # ------------------------------------------------
    # KEEP ONLY REQUIRED COLUMNS
    # ------------------------------------------------
    final_chunk = chunk[["DATE", "Combo", "EURINR_Ratio", "50DMA_EURINR"]]

    # ------------------------------------------------
    # SAVE OUTPUT
    # ------------------------------------------------
    final_chunk.to_csv(
        final_output,
        mode="a",
        index=False,
        header=not header_written
    )
    header_written = True

print("\n DONE — Output saved to:")
print(final_output)



Processing file...



Processing: 1643rows [11:47,  2.32rows/s]


 DONE — Output saved to:
C:\Users\Swarupa\Desktop\Code Details\output_with_EURINR_50DMA_EURINR.csv





In [10]:
import pandas as pd

file_path = r"C:\Users\Swarupa\Desktop\Code Details\output_with_EURINR_50DMA_EURINR.csv"

chunksize = 10000  # read 10k rows at a time
reader = pd.read_csv(file_path, chunksize=chunksize)

for i, chunk in enumerate(reader):
    print(f"Chunk {i+1}: {chunk.shape}")
    display(chunk.head(3))  # show first 3 rows of each chunk
    if i == 2:  # stop after 3 chunks
        break

Chunk 1: (10000, 4)


Unnamed: 0,DATE,Combo,EURINR_Ratio,50DMA_EURINR
0,2020-01-01,"Adani Port,Apollo Hosp,Asian Paint,Axis bank",inf,
1,2020-01-02,"Adani Port,Apollo Hosp,Asian Paint,Axis bank",inf,
2,2020-01-03,"Adani Port,Apollo Hosp,Asian Paint,Axis bank",inf,


Chunk 2: (10000, 4)


Unnamed: 0,DATE,Combo,EURINR_Ratio,50DMA_EURINR
10000,2020-10-08,"Adani Port,Apollo Hosp,Asian Paint,Coal india",2407.323307,2211.843197
10001,2020-10-09,"Adani Port,Apollo Hosp,Asian Paint,Coal india",2407.600137,2217.930659
10002,2020-10-12,"Adani Port,Apollo Hosp,Asian Paint,Coal india",2423.13059,2224.679052


Chunk 3: (10000, 4)


Unnamed: 0,DATE,Combo,EURINR_Ratio,50DMA_EURINR
20000,2021-07-19,"Adani Port,Apollo Hosp,Asian Paint,HUL",4176.82473,4083.451442
20001,2021-07-20,"Adani Port,Apollo Hosp,Asian Paint,HUL",4231.35116,4086.027698
20002,2021-07-22,"Adani Port,Apollo Hosp,Asian Paint,HUL",4266.221725,4089.844562


### Step 4:- It checks which 4-company combos satisfy the condition:  EURINR_Ratio > 50DMA_EURINR for all the latest 3 selected dates.
Steps done by the code:

1) Loads the CSV that contains each combo’s EURINR Ratio and 50-day moving average.

2) Filters the data only for the latest 3 dates you specified.

3) For each combo:

* Ensures all 3 dates exist.

* Checks if EURINR_Ratio > 50DMA_EURINR on all 3 dates.

4) For combos that pass, it creates a pivoted row with the values arranged horizontally.

5) Saves the final shortlisted combos to an Excel file.

In short:
It finds all combos where EURINR_Ratio is greater than 50DMA_EURINR on all selected latest 3 days and exports the result.

In [20]:
#Latest_3days_EURINR_Ratio>50DMA_EURINR

import pandas as pd

# ----------- USER INPUTS -----------
input_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\output_with_EURINR_50DMA_EURINR.csv"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_3days_TotalPrice_GT_50DMA_EURINR.xlsx"

dates_to_check = ["2025-08-19", "2025-08-18", "2025-08-14"]


print("Loading file...")
df = pd.read_csv(input_file, usecols=["DATE", "Combo", "EURINR_Ratio", "50DMA_EURINR"])

# Ensure DATE is string
df["DATE"] = df["DATE"].astype(str)

print("Filtering latest 3 dates...")
df = df[df["DATE"].isin(dates_to_check)]

print("Applying condition EURINR_Ratio > 50DMA_EURINR...")

# Keep only rows satisfying condition
df_valid = df[df["EURINR_Ratio"] > df["50DMA_EURINR"]]

# Count valid rows per combo
combo_counts = df_valid.groupby("Combo")["DATE"].nunique()

# Only keep combos present in ALL 3 dates
valid_combos = combo_counts[combo_counts == 3].index

final_df = df_valid[df_valid["Combo"].isin(valid_combos)]

# Sort output: Combo wise + latest date first
final_df = final_df.sort_values(by=["Combo", "DATE"], ascending=[True, False])

print("Saving optimized output...")
final_df.to_excel(output_file, index=False)

print("\n DONE! file created:")
print(output_file)


Loading file...
Filtering latest 3 dates...
Applying condition EURINR_Ratio > 50DMA_EURINR...
Saving optimized output...

 DONE! file created:
D:\Trading Stratergies\Stratergy 2 All Codes\Latest_3days_TotalPrice_GT_50DMA_EURINR.xlsx


### Step 5:-  It processes a CSV file of 4-company combos and calculates:
1) NIFTY Ratio: For each combo on each date, it computes the ratio of TotalPrice to the NIFTY  exchange rate.

2) 50-Day Moving Average (DMA): It calculates a rolling 50-day average of the NIFTY Ratio for each combo, keeping a continuous history.

3) It processes the data in chunks to handle large files efficiently and appends the results (Date, Combo, NIFTY Ratio, 50DMA) to a new CSV file.

The output is saved in a CSV file with the calculated NIFTY Ratio and 50DMA for each combo across the dates.

In [3]:
# 4Company_NIFTY_Ratio_50DMA

import pandas as pd
import numpy as np
from tqdm import tqdm

# ---------------------------------------------------
# INPUT FILES
# ---------------------------------------------------
file_csv = r"C:\Users\Swarupa\Desktop\Code Details\output_result.csv"
file_nifty = r"C:\Users\Swarupa\Desktop\Code Details\Nifty_Index_values(input file).xlsx"
final_output = r"C:\Users\Swarupa\Desktop\Code Details\output_with_NIFTY_50DMA_Nifty.csv"

# ---------------------------------------------------
# LOAD NIFTY VALUES
# ---------------------------------------------------
nifty = pd.read_excel(file_nifty)
nifty["DATE"] = pd.to_datetime(nifty["DATE"])

# Convert for fast lookup
nifty_dict = dict(zip(nifty["DATE"], nifty["NIFTY"]))

# ---------------------------------------------------
# PARAMETERS
# ---------------------------------------------------
chunksize = 50000
state = {}             
header_written = False

# ---------------------------------------------------
# PROCESS CSV IN STREAMING MODE
# ---------------------------------------------------
print("\nProcessing file...\n")

for chunk in tqdm(pd.read_csv(file_csv, chunksize=chunksize, parse_dates=["DATE"]),
                  desc="Processing", unit="rows"):

    # Sort chunk
    chunk.sort_values(["Combo", "DATE"], inplace=True)

    # Correct NIFTY ratio
    chunk["NIFTY"] = chunk["DATE"].map(nifty_dict)
    chunk["NIFTY_Ratio"] = chunk["TotalPrice"] / chunk["NIFTY"]

    # ------------------------------------------------
    # 50-DAY DMA CALCULATION (for NIFTY_Ratio)
    # ------------------------------------------------
    DMA_list = []

    for combo, subdf in chunk.groupby("Combo"):
        past = state.get(combo, [])

        series = np.concatenate([past, subdf["NIFTY_Ratio"].to_numpy()])

        roll = (
            pd.Series(series)
            .rolling(window=50, min_periods=50)
            .mean()
            .to_numpy()
        )

        DMA_current = roll[len(past):]

        DMA_list.append(pd.Series(DMA_current, index=subdf.index))

        # keep last 49 values for continuity
        state[combo] = series[-49:].tolist()

    # merge results
    chunk["50DMA_NIFTY"] = pd.concat(DMA_list).sort_index()

    # ------------------------------------------------
    # KEEP ONLY REQUIRED COLUMNS
    # ------------------------------------------------
    final_chunk = chunk[["DATE", "Combo", "NIFTY_Ratio", "50DMA_NIFTY"]]

    # ------------------------------------------------
    # SAVE OUTPUT
    # ------------------------------------------------
    final_chunk.to_csv(
        final_output,
        mode="a",
        index=False,
        header=not header_written
    )
    header_written = True

print("\n DONE — Output saved to:")
print(final_output)



Processing file...



Processing: 1643rows [12:21,  2.22rows/s]


 DONE — Output saved to:
C:\Users\Swarupa\Desktop\Code Details\output_with_NIFTY_50DMA_Nifty.csv





### Step 6:- It checks which 4-company combos satisfy the condition: NIFTY_Ratio > 50DMA_NIFTY for all the latest 3 selected dates.
Steps done by the code:

1) Loads the CSV that contains each combo’s NIFTY Ratio and 50-day moving average.

2) Filters the data only for the latest 3 dates you specified.

3) For each combo:

* Ensures all 3 dates exist.

* Checks if NIFTY_Ratio > 50DMA_NIFTY on all 3 dates.

4) For combos that pass, it creates a pivoted row with the values arranged horizontally.

5) Saves the final shortlisted combos to an Excel file.

In short:
It finds all combos where NIFTY_Ratio is greater than 50DMA_NIFTY on all selected latest 3 days and exports the result.

In [21]:
#Latest_3days_NIFTY_Ratio>50DMA_NIFTY

import pandas as pd

# ----------- USER INPUTS -----------
input_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\output_with_NIFTY_50DMA_Nifty.csv"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_3days_NIFTY_Ratio_GT_50DMA_NIFTY.xlsx"

dates_to_check = ["2025-08-19", "2025-08-18", "2025-08-14"]
# -----------------------------------

print("Loading file...")
df = pd.read_csv(input_file, usecols=["DATE", "Combo", "NIFTY_Ratio", "50DMA_NIFTY"])

# Ensure DATE is string
df["DATE"] = df["DATE"].astype(str)

print("Filtering latest 3 dates...")
df = df[df["DATE"].isin(dates_to_check)]

print("Applying condition NIFTY_Ratio > 50DMA_NIFTY...")

# Keep only rows satisfying condition
df_valid = df[df["NIFTY_Ratio"] > df["50DMA_NIFTY"]]

# Count valid rows per combo
combo_counts = df_valid.groupby("Combo")["DATE"].nunique()

# Only keep combos present in ALL 3 dates
valid_combos = combo_counts[combo_counts == 3].index

final_df = df_valid[df_valid["Combo"].isin(valid_combos)]

# Sort neatly: combo-wise + latest date first
final_df = final_df.sort_values(by=["Combo", "DATE"], ascending=[True, False])

print("Saving optimized output...")
final_df.to_excel(output_file, index=False)

print("\n DONE! NIFTY file created:")
print(output_file)


Loading file...
Filtering latest 3 dates...
Applying condition NIFTY_Ratio > 50DMA_NIFTY...
Saving optimized output...

 DONE! NIFTY file created:
D:\Trading Stratergies\Stratergy 2 All Codes\Latest_3days_NIFTY_Ratio_GT_50DMA_NIFTY.xlsx


### Step 7:- It finds common 4-company combinations that appear in both:


* The EURINR filtered file


* The NIFTY filtered file


Steps done:


1) Loads both Excel files containing combos that satisfy their respective conditions.


2) Removes any duplicate combo rows.


3) Performs an INNER JOIN on the “Combo” column → keeping only combos present in both datasets.


4) Saves the matched common combos to a new Excel file.


In short:
It identifies and exports the combos that pass BOTH EURINR and NIFTY conditions.

In [22]:
#Common_Combinations_EURINR_NIFTY

import pandas as pd

# ---------------- USER INPUTS ----------------
eurinr_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_3days_TotalPrice_GT_50DMA_EURINR.xlsx"
nifty_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_3days_NIFTY_Ratio_GT_50DMA_NIFTY.xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Common_Combinations_EURINR_NIFTY.xlsx"


print("Loading EURINR file...")
df_eur = pd.read_excel(eurinr_file, engine="openpyxl")

print("Loading NIFTY file...")
df_nifty = pd.read_excel(nifty_file, engine="openpyxl")

# Ensure DATE is datetime
df_eur["DATE"] = pd.to_datetime(df_eur["DATE"])
df_nifty["DATE"] = pd.to_datetime(df_nifty["DATE"])

# Drop duplicate combos if any
df_eur = df_eur.drop_duplicates(subset=["Combo", "DATE"])
df_nifty = df_nifty.drop_duplicates(subset=["Combo", "DATE"])

print("Finding common combos...")

# Merge on Combo AND DATE to get side-by-side values
merged_df = pd.merge(
    df_eur, 
    df_nifty, 
    on=["Combo", "DATE"], 
    how="inner", 
    suffixes=("_EURINR", "_NIFTY")
)

# Optional: sort by Combo & latest date first
merged_df = merged_df.sort_values(by=["Combo", "DATE"], ascending=[True, False])

# Convert DATE back to string for Excel readability
merged_df["DATE"] = merged_df["DATE"].dt.strftime("%Y-%m-%d")

print("Saving output...")
merged_df.to_excel(output_file, index=False)

print("\n DONE! Common combos with EURINR + NIFTY values saved at:")
print(output_file)


Loading EURINR file...
Loading NIFTY file...
Finding common combos...
Saving output...

 DONE! Common combos with EURINR + NIFTY values saved at:
D:\Trading Stratergies\Stratergy 2 All Codes\Common_Combinations_EURINR_NIFTY.xlsx


### Step 9:- This code:

1) Loads the Excel file that contains all combinations with their EURINR & NIFTY Ratios and 50DMA values for multiple dates.

2) Extracts all dates from the column names automatically (e.g., 2025-08-19, 2025-08-18, etc.).

3) For each row (each combo) it checks every date to see if:

* EURINR_Ratio ≤ 1.03 × 50DMA_EURINR

* NIFTY_Ratio ≤ 1.03 × 50DMA_NIFTY

4) If the row satisfies the condition for at least one date, that row is kept.

5) A progress bar shows checking status.

6) Finally, all matching rows are saved into a new Excel file.

In [23]:
#Ratio<=1.03*50DMA(latest_day)

import pandas as pd

# ---------------- INPUT / OUTPUT ----------------
input_file  = r"D:\Trading Stratergies\Stratergy 2 All Codes\Common_Combinations_EURINR_NIFTY.xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_day_Ratio_should_not_be_GT_1.03_times_50DMA.xlsx"


# Load data
df = pd.read_excel(input_file, engine="openpyxl")

# Ensure DATE is datetime
df["DATE"] = pd.to_datetime(df["DATE"])

# Detect latest date automatically
latest_date = df["DATE"].max()
print("Latest date detected:", latest_date.date())

# Filter rows for latest date
df_latest = df[df["DATE"] == latest_date]

# Apply vectorized condition
condition = (
    (df_latest["EURINR_Ratio"] <= 1.03 * df_latest["50DMA_EURINR"]) &
    (df_latest["NIFTY_Ratio"]  <= 1.03 * df_latest["50DMA_NIFTY"])
)

filtered_df = df_latest.loc[condition].reset_index(drop=True)

# Save output
filtered_df.to_excel(output_file, index=False, engine="openpyxl")

print("\nFiltering Completed")
print("Total rows for latest date :", len(df_latest))
print("Rows kept after filter    :", len(filtered_df))
print("Output saved to:", output_file)


Latest date detected: 2025-08-19

Filtering Completed
Total rows for latest date : 11938
Rows kept after filter    : 1942
Output saved to: D:\Trading Stratergies\Stratergy 2 All Codes\Latest_day_Ratio_should_not_be_GT_1.03_times_50DMA.xlsx


In [24]:
#Top10_Highest_Average_Ratio

import pandas as pd

# ---------------- INPUT / OUTPUT ----------------
input_file  = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_day_Ratio_should_not_be_GT_1.03_times_50DMA.xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Top10_Highest_Average_Ratio.xlsx"

# Load data
df = pd.read_excel(input_file, engine="openpyxl")

# Compute average ratio per row (EURINR + NIFTY)
df["Avg_Ratio"] = (df["EURINR_Ratio"] + df["NIFTY_Ratio"]) / 2

# Group by Combo and calculate mean of Avg_Ratio
combo_avg = df.groupby("Combo")["Avg_Ratio"].mean().reset_index()

# Sort descending to get highest average ratios
combo_avg = combo_avg.sort_values(by="Avg_Ratio", ascending=False)

# Keep Top 10 combos
top10_combos = combo_avg.head(10)

# Optional: merge with original data to get all details for these combos
top10_details = df[df["Combo"].isin(top10_combos["Combo"])].reset_index(drop=True)

# Save to Excel
top10_details.to_excel(output_file, index=False, engine="openpyxl")

print("Top 10 highest average ratio combos saved to:", output_file)


Top 10 highest average ratio combos saved to: D:\Trading Stratergies\Stratergy 2 All Codes\Top10_Highest_Average_Ratio.xlsx


In [1]:
#Average_Realized_Correlation(4Stock_Combinations_Correlation)

import pandas as pd
import numpy as np
import itertools
from tqdm import tqdm

# ================= USER INPUT =================
input_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\50 comp data (input file).xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\4Stock_Combinations_Correlation.xlsx"


print("Loading data...")
df = pd.read_excel(input_file)

# Convert DATE column
df['DATE'] = pd.to_datetime(df['DATE'])

# Set DATE as index
df.set_index('DATE', inplace=True)

# Calculate daily returns
returns = df.pct_change().dropna()

stocks = returns.columns.tolist()
print(f"Total Stocks: {len(stocks)}")

results = []

# Generate all 4-stock combinations
combinations = list(itertools.combinations(stocks, 4))
print(f"Total 4-stock combinations: {len(combinations)}")

for combo in tqdm(combinations, desc="Calculating correlations"):

    data = returns[list(combo)]
    corr_matrix = data.corr()

    # All 6 pairwise correlations
    pairs = list(itertools.combinations(combo, 2))
    pair_corrs = [corr_matrix.loc[a, b] for a, b in pairs]

    #  YOUR EXACT FORMULA
    avg_realized_corr = (0.25 * sum(pair_corrs)) / 1.5

    results.append({
        "Combo": ", ".join(combo),
        "Average_Realized_Correlation": avg_realized_corr
    })

# Create result dataframe
result_df = pd.DataFrame(results)

# Rank by least correlation
result_df = result_df.sort_values(by="Average_Realized_Correlation")

# Save output
result_df.to_excel(output_file, index=False)

print(" DONE")
print(" The TOP row in Excel is your LEAST CORRELATED 4-stock portfolio")


Loading data...


  returns = df.pct_change().dropna()


Total Stocks: 50
Total 4-stock combinations: 230300


Calculating correlations: 100%|██████████████████████████████████████████████| 230300/230300 [02:58<00:00, 1289.87it/s]


 DONE
 The TOP row in Excel is your LEAST CORRELATED 4-stock portfolio


In [25]:
#Average_Correlation_EURINR_NIFTY

import pandas as pd

# ---------------- FILE PATHS ----------------
file_corr = r"D:\Trading Stratergies\Stratergy 2 All Codes\4Stock_Combinations_Correlation.xlsx"    
file_data = r"D:\Trading Stratergies\Stratergy 2 All Codes\Latest_day_Ratio_should_not_be_GT_1.03_times_50DMA.xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Average_Correlation_EURINR_NIFTY.xlsx"


def normalize_combo(combo):
    if pd.isna(combo):
        return combo
    stocks = [s.strip() for s in combo.split(",")]
    stocks_sorted = sorted(stocks)
    return ", ".join(stocks_sorted)


print("Loading files...")
df_corr = pd.read_excel(file_corr)
df_data = pd.read_excel(file_data)

# Normalize combo column in both files
print("Normalizing Combo format...")
df_corr["Combo_Normalized"] = df_corr["Combo"].apply(normalize_combo)
df_data["Combo_Normalized"] = df_data["Combo"].apply(normalize_combo)

print("Merging datasets...")
final_df = df_data.merge(
    df_corr[["Combo_Normalized", "Average_Realized_Correlation"]],
    on="Combo_Normalized",
    how="left"   # keeps all second file rows
)

# Optional: Drop rows where correlation not found
final_df = final_df.dropna(subset=["Average_Realized_Correlation"])

# Remove helper column
final_df.drop(columns=["Combo_Normalized"], inplace=True)

print("Saving output...")
final_df.to_excel(output_file, index=False)

print(" FIXED: Correlation values correctly applied.")


Loading files...
Normalizing Combo format...
Merging datasets...
Saving output...
 FIXED: Correlation values correctly applied.


In [26]:
#Top10_Lowest_Correlation

import pandas as pd

# -------- USER FILE PATHS --------
input_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Average_Correlation_EURINR_NIFTY.xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Top10_Lowest_Correlation.xlsx"

print("Loading merged data file...")
df = pd.read_excel(input_file)

print("Sorting by lowest Average_Realized_Correlation...")

# Sort ascending = lowest correlation first
df_sorted = df.sort_values(by="Average_Realized_Correlation", ascending=True)

# Select top 10 lowest
top10_lowest = df_sorted.head(10)

print("Saving Top 10 lowest correlation combinations...")
top10_lowest.to_excel(output_file, index=False)

print(" Done! Top 10 Lowest Correlation file created.")

Loading merged data file...
Sorting by lowest Average_Realized_Correlation...
Saving Top 10 lowest correlation combinations...
 Done! Top 10 Lowest Correlation file created.


In [8]:
#Calculate beta for all data 

import pandas as pd
import numpy as np
from tqdm import tqdm

# ---------- FILE PATHS ----------
file_path = r"D:\Trading Stratergies\ndpl\50 comp data.xlsx"
output_path = r"D:\Trading Stratergies\Stratergy 2 All Codes\expanding_beta_all_data.xlsx"

# Load data
df = pd.read_excel(file_path)

# Sort by date
df = df.sort_values("DATE").reset_index(drop=True)

# Market returns
market_ret = df["Nifty Index"].pct_change()

# Columns to exclude
exclude = ["DATE", "EURINR", "Nifty Index"]

# Output dataframe
beta_result = pd.DataFrame()
beta_result["DATE"] = df["DATE"]

# Calculate expanding beta for each stock
for col in tqdm(df.columns, desc="Calculating Expanding Beta"):
    if col not in exclude:

        stock_ret = df[col].pct_change()

        temp = pd.DataFrame({
            "Stock": stock_ret,
            "Market": market_ret
        })

        # Expanding beta = uses ALL data till that date
        beta_series = (
            temp["Stock"].expanding().cov(temp["Market"]) /
            temp["Market"].expanding().var()
        )

        beta_result[col + "_Beta"] = beta_series

# Save result
beta_result.to_excel(output_path, index=False)

print(" Beta calculated for each stock and each date using all available data.")
print(" Output saved to:", output_path)


Calculating Expanding Beta: 100%|█████████████████████████████████████████████████████| 53/53 [00:00<00:00, 534.18it/s]


 Beta calculated for each stock and each date using all available data.
 Output saved to: D:\Trading Stratergies\Stratergy 2 All Codes\expanding_beta_all_data.xlsx


In [19]:
#4_company_beta_total

import pandas as pd
import numpy as np
from itertools import combinations
from tqdm import tqdm
import csv

# -------- FILE PATHS --------
beta_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\expanding_beta_all_data.xlsx"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\beta_total.csv"

print("Loading data...")
df = pd.read_excel(beta_file)

# ------------------------------
beta_cols = [col for col in df.columns if col.upper() != "DATE"]
print(f"Total Stocks: {len(beta_cols)}")

beta_values = df[beta_cols].to_numpy(dtype=np.float64)
col_indices = np.arange(len(beta_cols))

# Generate combinations 
combos = np.array(list(combinations(col_indices, 4)))
print(f"Total 4-stock combinations: {len(combos)}")

with open(output_file, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Combo", "Beta_Sum"])

    for i in tqdm(range(beta_values.shape[0]), desc="Processing Rows"):
        row = beta_values[i]

        #  Fast batch extraction of values for all combos
        vals = row[combos]  

        # Masks
        all_nan = np.isnan(vals).all(axis=1)
        valid_vals = vals[~all_nan]

        beta_sums = np.nansum(valid_vals, axis=1)
        valid_combos = combos[~all_nan]

        for combo_idx, beta_sum in zip(valid_combos, beta_sums):
            combo_name = ", ".join(beta_cols[j] for j in combo_idx)
            writer.writerow([combo_name, beta_sum])

print("CSV created successfully!")
print("File saved at:", output_file)


Loading data...
Total Stocks: 50
Total 4-stock combinations: 230300


Processing Rows: 100%|█████████████████████████████████████████████████████████████| 1401/1401 [31:37<00:00,  1.35s/it]

CSV created successfully!
File saved at: D:\Trading Stratergies\Stratergy 2 All Codes\beta_total.csv





In [1]:
#Beta_Total_EURINR_NIFTY

import csv

beta_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\beta_total.csv"
data_file = r"C:\Users\Swarupa\Downloads\Latest_day_Ratio_should_not_be_GT_1.03_times_50DMA.csv"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Beta_Sum_EURINR_NIFTY.csv"

def normalize_combo(combo):
    if not combo:
        return None
    combo = combo.replace("_Beta", "")
    parts = [c.strip() for c in combo.split(",")]
    parts.sort()
    return ",".join(parts)

# -------- LOAD BETA INTO DICTIONARY ----------
beta_dict = {}

with open(beta_file, newline='', encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        clean = normalize_combo(row["Combo"])
        beta_dict[clean] = row["Beta_Sum"]

print(" Beta dictionary built")

# -------- STREAM SECOND FILE ----------
with open(data_file, newline='', encoding="utf-8") as f_in, \
     open(output_file, "w", newline='', encoding="utf-8") as f_out:

    reader = csv.DictReader(f_in)
    fieldnames = reader.fieldnames + ["Beta_Sum"]
    writer = csv.DictWriter(f_out, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        clean = normalize_combo(row["Combo"])
        beta = beta_dict.get(clean)

        if beta is not None:
            row["Beta_Sum"] = beta
            writer.writerow(row)

print(" COMPLETED ")


 Beta dictionary built
 COMPLETED — ZERO MEMORY RISK


In [2]:
#Top10_Lowest_Beta_Total

import pandas as pd

# -------- USER FILE PATHS --------
input_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Beta_Sum_EURINR_NIFTY.csv"
output_file = r"D:\Trading Stratergies\Stratergy 2 All Codes\Top10_Lowest_Beta_Sum.xlsx"

print("Loading merged data file...")
df = pd.read_csv(input_file)

print("Sorting by lowest Beta_Sum...")

# Sort ascending = lowest Beta_Sum first
df_sorted = df.sort_values(by="Beta_Sum", ascending=True)

# Select top 10 lowest Beta_Sum combos
top10_lowest = df_sorted.head(10)

print("Saving Top 10 lowest Beta_Sum combinations...")
top10_lowest.to_excel(output_file, index=False)

print(" Done! Top 10 Lowest Beta_Sum file created.")


Loading merged data file...
Sorting by lowest Beta_Sum...
Saving Top 10 lowest Beta_Sum combinations...
 Done! Top 10 Lowest Beta_Sum file created.
