
# Demand Planning Analysis (EA, Batches, RCCP & BOM Mapping)

This notebook reads your **Aug'25 DP file**, **SNP RCCP file**, and **DP Shortage (BOM)** to produce:
- Long-form `Product_Demand` (Month | EA | Batches) for rows 3–78 of `L2Ph1_Detail` (columns S:AM for EA, AN:BI for Batches).
- `RCCP_PlannedProduction` and `RCCP_OutboundProdDemand` from `DP RCCP` (columns J:AD).
- Aggregated component demand per FG using `DP Shortage` (rows 23–542), producing `BOM_OPD_Demand`.
- Final Excel workbook saved to `OUTPUT_FILE`.

> Month headers are normalized (e.g., **OCT 2025** → `10.2025`, **Mar-25** → `3.2025`).

---


In [1]:

# ==== Configuration ====
# File Paths - UPDATE THESE IF FILES ARE IN DIFFERENT LOCATIONS
AUG25_FILE = "/home/supriyo/Downloads/Biocon_nw/Aug'25_L2_DP_Plan_Circulation_V2.xlsx"
SNP_FILE = "/home/supriyo/Downloads/Biocon_nw/ParkourSC_SNP.xlsx"
BOM_FILE = "/home/supriyo/Downloads/Biocon_nw/20251006-DP Material Shortage - Working file.xlsx"
OUTPUT_FILE = "/home/supriyo/Downloads/Biocon_nw/Demand_Planning_Analysis.xlsx"  # will fall back to /mnt/data if not found

# Sheet Names
AUG25_SHEET = "L2Ph1_Detail"
SNP_SHEET = "DP RCCP"
BOM_SHEET = "DP Shortage"

# Products Configuration
PRODUCTS_AVAILABLE = [700001012, 700001123, 700000536, 700001318, 700001301]
PRODUCTS_NOT_AVAILABLE = [700004130]

# DP slicing configuration
DP_ROWS_START = 3
DP_ROWS_END = 78
EA_COL_RANGE = ("S", "AM")  # Oct-2025 .. Jun-2027
BATCH_COL_RANGE = ("AN", "BI")

# RCCP column layout
RCCP_PRODUCT_ID_COL = "C"  # Product ID
RCCP_KEY_FIGURE_COL = "H"  # Key Figure
RCCP_MONTHS_RANGE = ("J", "AD")  # Mar-2025 .. Apr-2027

# Shortage sheet slicing + columns
SHORTAGE_FILTER_ROWS = (23, 542)
SHORTAGE_PRODUCT_ID_COL = "A"  # FG Product ID
SHORTAGE_COMPONENT_COL = "F"   # Component/Material ID

print("✓ Configuration loaded")
print(f"  Input files: 3")
print(f"  Output file: {OUTPUT_FILE}")
print(f"  Products to analyze: {len(PRODUCTS_AVAILABLE)}")


✓ Configuration loaded
  Input files: 3
  Output file: /home/supriyo/Downloads/Biocon_nw/Demand_Planning_Analysis.xlsx
  Products to analyze: 5


In [2]:
import os
import re
from typing import List, Dict, Tuple
import pandas as pd
import numpy as np

# --- Excel column letter to index ---
def excel_col_to_index(col_letter: str) -> int:
    col_letter = col_letter.upper().strip()
    exp = 0
    col_index = 0
    for char in reversed(col_letter):
        col_index += (ord(char) - ord('A') + 1) * (26 ** exp)
        exp += 1
    return col_index - 1  # 0-based

# --- Month normalization ---
MONTH_ABBR_TO_NUM = {
    'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
    'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12
}

def normalize_month_header(name: str) -> str:
    """
    Convert e.g., 'OCT 2025' -> '10.2025'; 'Mar-25' -> '3.2025'; '10/2025' -> '10.2025'
    Returns original string if it doesn't match expected patterns.
    """
    if pd.isna(name):
        return name
    s = str(name).strip()

    m = re.match(r'^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-zA-Z]*\s+(\d{4})$', s, flags=re.IGNORECASE)
    if m:
        mon = m.group(1)[:3].upper()
        yr = int(m.group(2))
        return f"{MONTH_ABBR_TO_NUM[mon]}.{yr}"

    m = re.match(r'^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-zA-Z]*[-_/](\d{2,4})$', s, flags=re.IGNORECASE)
    if m:
        mon = m.group(1)[:3].upper()
        y = m.group(2)
        yr = int(y) + 2000 if len(y) == 2 else int(y)
        return f"{MONTH_ABBR_TO_NUM[mon]}.{yr}"

    m = re.match(r'^(\d{1,2})[\./-](\d{2,4})$', s)
    if m:
        mon = int(m.group(1))
        y = m.group(2)
        yr = int(y) + 2000 if len(y) == 2 else int(y)
        return f"{mon}.{yr}"

    return s

def restrict_rows(df: pd.DataFrame, start_row: int, end_row: int) -> pd.DataFrame:
    """Restrict dataframe to Excel-like inclusive rows (header assumed at row 1)."""
    return df.iloc[start_row-1:end_row]

def convert_numeric(df: pd.DataFrame) -> pd.DataFrame:
    return df.apply(pd.to_numeric, errors='coerce')

def find_col_by_letter(df: pd.DataFrame, letter: str) -> str:
    idx = excel_col_to_index(letter)
    if idx < 0 or idx >= len(df.columns):
        raise IndexError(f"Column {letter} -> idx {idx} is out of bounds for {len(df.columns)} cols.")
    return df.columns[idx]

def melt_months(df: pd.DataFrame, id_vars: List[str], month_cols: List[str], value_name: str) -> pd.DataFrame:
    tmp = df[id_vars + month_cols].copy()
    melted = tmp.melt(id_vars=id_vars, value_vars=month_cols, var_name="MonthRaw", value_name=value_name)
    melted["Month"] = melted["MonthRaw"].apply(normalize_month_header)
    melted.drop(columns=["MonthRaw"], inplace=True)
    melted[value_name] = pd.to_numeric(melted[value_name], errors="coerce").fillna(0)
    return melted

def ensure_output_path(path: str) -> str:
    """Ensure directory exists; if not, fallback to /mnt/data/Demand_Planning_Analysis.xlsx"""
    directory = os.path.dirname(path)
    if directory and not os.path.exists(directory):
        print(f"Output directory not found: {directory}. Falling back to /mnt/data.")
        return "/mnt/data/Demand_Planning_Analysis.xlsx"
    return path


In [4]:

# --- SKU to hierarchy mapping ---
product_mapping: Dict[str, Dict[str,str]] = {}

mCB_skus = ['800004403', '800004402', '800008019', '800008020', '800008034', '800007997', '800007345', '800007516',
            '800002513', '800007608', '800007630', '800002984', '800004986', '800007310', '800007311', '800006648',
            '800007634', '800008073', '800006523', '800002297', '800002872', '800006741', '800007380']

sMCB_skus = ['800006506', '800006505', '800006527', '800006526', '800006525', '800007546', '800007583', '800007839',
             '800006524', '800006627', '800007872']

vial_skus = ['800004400', '800004401', '800006626', '800006740', '800007996']

aspart_dlp_skus = ['800008016', '800002958', '800002948', '800006528', '800002989', '800003528', '800006592', '800006691']

aspart_vial_skus = ['800008017', '800006529']

rhi_skus = ['800001300', '800001298', '800001299']

for sku in mCB_skus:
    product_mapping[sku] = {'assembly': '700003964', 'filling': '700001012', 'root': '700001470', 'family': 'Glargine_mCB_DLP'}

for sku in sMCB_skus:
    product_mapping[sku] = {'assembly': '700004129', 'filling': '700004130', 'root': '700004130', 'family': 'Glargine_sMCB_DLP_EU'}

for sku in vial_skus:
    product_mapping[sku] = {'assembly': '700001123', 'filling': '700001123', 'root': '700001123', 'family': 'Glargine_Vial'}

for sku in aspart_dlp_skus:
    product_mapping[sku] = {'assembly': '700002770', 'filling': '700001301', 'root': '700001301', 'family': 'Aspart_DLP'}

for sku in aspart_vial_skus:
    product_mapping[sku] = {'assembly': '700001318', 'filling': '700001318', 'root': '700001318', 'family': 'Aspart_Vial'}

for sku in rhi_skus:
    product_mapping[sku] = {'assembly': '700000536', 'filling': '700000536', 'root': '700000536', 'family': 'RHI'}

print(f"Hierarchy defined with {len(product_mapping)} SKU mappings")


Hierarchy defined with 52 SKU mappings


In [5]:

# --- Process Aug'25 DP file (EA & Batches) ---
if not os.path.exists(AUG25_FILE):
    print(f"DP file not found: {AUG25_FILE}. The pipeline will continue but Product_Demand will be empty.")
    product_demand_long = pd.DataFrame(columns=["Market_SKU","Batch_Size","Product_Id","Family","Month","EA","Batches"])
else:
    df = pd.read_excel(AUG25_FILE, sheet_name=AUG25_SHEET, header=0)
    df_slice = restrict_rows(df, DP_ROWS_START, DP_ROWS_END)

    # Identify Market SKU & Batch Size columns by letters (E and K)
    market_sku_col = find_col_by_letter(df, "E")
    batch_size_col = find_col_by_letter(df, "K")

    ea_start_idx = excel_col_to_index(EA_COL_RANGE[0])
    ea_end_idx = excel_col_to_index(EA_COL_RANGE[1])
    batch_start_idx = excel_col_to_index(BATCH_COL_RANGE[0])
    batch_end_idx = excel_col_to_index(BATCH_COL_RANGE[1])

    ea_month_cols = list(df.columns[ea_start_idx:ea_end_idx+1])
    batch_month_cols = list(df.columns[batch_start_idx:batch_end_idx+1])

    # Coerce numeric for month columns
    df_slice[ea_month_cols] = convert_numeric(df_slice[ea_month_cols])
    df_slice[batch_month_cols] = convert_numeric(df_slice[batch_month_cols])

    id_vars = [market_sku_col, batch_size_col]

    ea_long = melt_months(df_slice, id_vars=id_vars, month_cols=ea_month_cols, value_name="EA")
    batch_long = melt_months(df_slice, id_vars=id_vars, month_cols=batch_month_cols, value_name="Batches")

    merged = pd.merge(
        ea_long[[market_sku_col, batch_size_col, "Month", "EA"]],
        batch_long[[market_sku_col, batch_size_col, "Month", "Batches"]],
        on=[market_sku_col, batch_size_col, "Month"],
        how="outer"
    ).fillna(0)

    merged.rename(columns={market_sku_col: "Market_SKU", batch_size_col: "Batch_Size"}, inplace=True)

    # Map Market_SKU -> Product_Id (prefer 'filling'), and add Family
    def map_product_id(sku: str):
        sk = str(sku) if not pd.isna(sku) else ""
        info = product_mapping.get(sk, {})
        return pd.Series([info.get("filling") or info.get("assembly") or info.get("root"), info.get("family")])

    merged[["Product_Id","Family"]] = merged["Market_SKU"].apply(map_product_id)
    product_demand_long = merged[~merged["Product_Id"].isna()].copy()

    # Keep only Oct-2025 .. Jun-2027
    def key(mstr):
        try:
            m, y = mstr.split(".")
            return (int(y), int(m))
        except Exception:
            return (0, 0)
    product_demand_long = product_demand_long[product_demand_long["Month"].apply(lambda v: (2025,10) <= key(v) <= (2027,6))]
    product_demand_long.sort_values(by=["Market_SKU","Month"], inplace=True)

print("Product_Demand rows:", len(product_demand_long))


Product_Demand rows: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_slice[ea_month_cols] = convert_numeric(df_slice[ea_month_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_slice[batch_month_cols] = convert_numeric(df_slice[batch_month_cols])
  ).fillna(0)


In [6]:

# --- Process SNP RCCP (Planned Production & Outbound Production Demand) ---
if not os.path.exists(SNP_FILE):
    print(f"RCCP file not found: {SNP_FILE}. RCCP outputs will be empty.")
    rccp_planned = pd.DataFrame(columns=["Product_ID","Key_Figure","Base_UOM","Scenario","Month","Qty"])
    rccp_opd = pd.DataFrame(columns=["Product_ID","Key_Figure","Base_UOM","Scenario","Month","Qty"])
else:
    rccp_df = pd.read_excel(SNP_FILE, sheet_name=SNP_SHEET, header=0)

    # Map letters to columns
    prod_col = find_col_by_letter(rccp_df, RCCP_PRODUCT_ID_COL)
    keyf_col = find_col_by_letter(rccp_df, RCCP_KEY_FIGURE_COL)

    m_start_idx = excel_col_to_index(RCCP_MONTHS_RANGE[0])
    m_end_idx = excel_col_to_index(RCCP_MONTHS_RANGE[1])
    rccp_month_cols = list(rccp_df.columns[m_start_idx:m_end_idx+1])

    # Optional columns
    base_uom_col = "Base UOM" if "Base UOM" in rccp_df.columns else None
    scenario_col = "Scenario" if "Scenario" in rccp_df.columns else None

    rccp_df[rccp_month_cols] = convert_numeric(rccp_df[rccp_month_cols])

    id_vars = [c for c in [prod_col, keyf_col, base_uom_col, scenario_col] if c]
    rccp_long = rccp_df[id_vars + rccp_month_cols].melt(
        id_vars=id_vars, value_vars=rccp_month_cols, var_name="MonthRaw", value_name="Qty"
    )
    rccp_long["Month"] = rccp_long["MonthRaw"].apply(normalize_month_header)
    rccp_long["Qty"] = pd.to_numeric(rccp_long["Qty"], errors="coerce").fillna(0)
    rccp_long.drop(columns=["MonthRaw"], inplace=True)

    # Window Mar-2025 .. Apr-2027
    def in_window(mstr):
        try:
            m, y = mstr.split(".")
            return (2025,3) <= (int(y), int(m)) <= (2027,4)
        except Exception:
            return False

    rccp_long = rccp_long[rccp_long["Month"].apply(in_window)]

    # Split by Key Figure (case-insensitive)
    mask_pp = rccp_long[keyf_col].str.lower().eq("planned production")
    mask_opd = rccp_long[keyf_col].str.lower().eq("outbound production demand")

    def tidy(df):
        out = df.rename(columns={prod_col:"Product_ID", keyf_col:"Key_Figure"}).copy()
        if base_uom_col and base_uom_col in out.columns:
            out.rename(columns={base_uom_col:"Base_UOM"}, inplace=True)
        else:
            out["Base_UOM"] = np.nan
        if scenario_col and scenario_col in out.columns:
            out.rename(columns={scenario_col:"Scenario"}, inplace=True)
        else:
            out["Scenario"] = np.nan
        return out[["Product_ID","Key_Figure","Base_UOM","Scenario","Month","Qty"]]

    rccp_planned = tidy(rccp_long[mask_pp])
    rccp_opd = tidy(rccp_long[mask_opd])

print("RCCP Planned Production rows:", len(rccp_planned))
print("RCCP Outbound Production Demand rows:", len(rccp_opd))


RCCP Planned Production rows: 0
RCCP Outbound Production Demand rows: 0


In [7]:

# --- BOM mapping via DP Shortage (aggregate OPD by components per FG) ---
if not os.path.exists(BOM_FILE):
    print(f"Shortage file not found: {BOM_FILE}. BOM_OPD_Demand will be empty.")
    bom_opd = pd.DataFrame(columns=["FG_Product_ID","Component_ID","Month","Outbound_Prod_Demand"])
else:
    sh_df = pd.read_excel(BOM_FILE, sheet_name=BOM_SHEET, header=0)
    sh_df = sh_df.iloc[SHORTAGE_FILTER_ROWS[0]-1:SHORTAGE_FILTER_ROWS[1]]

    # Infer columns A (FG Product_ID) & F (Component_ID)
    try:
        col_prodA = sh_df.columns[excel_col_to_index(SHORTAGE_PRODUCT_ID_COL)]
    except Exception:
        col_prodA = "Product_ID" if "Product_ID" in sh_df.columns else sh_df.columns[0]

    try:
        col_compF = sh_df.columns[excel_col_to_index(SHORTAGE_COMPONENT_COL)]
    except Exception:
        col_compF = "Component_ID" if "Component_ID" in sh_df.columns else sh_df.columns[5]

    sh_df = sh_df[(sh_df[col_prodA].notna()) & (sh_df[col_prodA] != 0) & (sh_df[col_prodA] != "")]
    sh_df[col_prodA] = sh_df[col_prodA].astype(str)

    # Keep only available FG list
    avail_str = set(map(str, PRODUCTS_AVAILABLE))
    sh_df = sh_df[sh_df[col_prodA].isin(avail_str)]

    # Map FG -> components
    comp_map = (sh_df[[col_prodA, col_compF]]
                .dropna()
                .astype(str)
                .groupby(col_prodA)[col_compF]
                .agg(lambda s: sorted(set(s)))
                .to_dict())

    rows = []
    if not rccp_opd.empty:
        for fg, comps in comp_map.items():
            comp_rows = rccp_opd[rccp_opd["Product_ID"].astype(str).isin(set(comps))]
            if comp_rows.empty:
                continue
            agg = comp_rows.groupby("Month", as_index=False)["Qty"].sum()
            for _, rec in agg.iterrows():
                rows.append({
                    "FG_Product_ID": fg,
                    "Component_ID": ";".join(comps),
                    "Month": rec["Month"],
                    "Outbound_Prod_Demand": rec["Qty"]
                })
    bom_opd = pd.DataFrame(rows)

    if not bom_opd.empty:
        def sortkey(mstr):
            try:
                m, y = mstr.split(".")
                return (int(y), int(m))
            except Exception:
                return (0, 0)
        bom_opd = bom_opd[bom_opd["FG_Product_ID"].isin(list(map(str, PRODUCTS_AVAILABLE)))]
        bom_opd.sort_values(by=["FG_Product_ID","Month"], key=lambda s: s.map(sortkey), inplace=True)

print("BOM_OPD_Demand rows:", len(bom_opd))


BOM_OPD_Demand rows: 0


In [8]:

# --- Save all outputs to Excel ---
final_output = ensure_output_path(OUTPUT_FILE)
with pd.ExcelWriter(final_output, engine="openpyxl") as writer:
    (product_demand_long if 'product_demand_long' in globals() else pd.DataFrame()).to_excel(writer, sheet_name="Product_Demand", index=False)
    (rccp_planned if 'rccp_planned' in globals() else pd.DataFrame()).to_excel(writer, sheet_name="RCCP_PlannedProduction", index=False)
    (rccp_opd if 'rccp_opd' in globals() else pd.DataFrame()).to_excel(writer, sheet_name="RCCP_OutboundProdDemand", index=False)
    (bom_opd if 'bom_opd' in globals() else pd.DataFrame()).to_excel(writer, sheet_name="BOM_OPD_Demand", index=False)

print(f"✓ Wrote Excel output to: {final_output}")


✓ Wrote Excel output to: /home/supriyo/Downloads/Biocon_nw/Demand_Planning_Analysis.xlsx


In [None]:

# --- Optional: Compact Month|EA|Batches preview for the first SKU (for quick QA) ---
def compact_view(df: pd.DataFrame) -> pd.DataFrame:
    if df is None or df.empty:
        return pd.DataFrame(columns=["Month","EA","Batches"])
    sku = df["Market_SKU"].iloc[0]
    sub = df[df["Market_SKU"] == sku][["Month","EA","Batches"]].copy()
    sub = sub.groupby("Month", as_index=False).sum()
    return sub

try:
    from ace_tools import display_dataframe_to_user
    preview = compact_view(product_demand_long if 'product_demand_long' in globals() else pd.DataFrame())
    if not preview.empty:
        display_dataframe_to_user("Compact Month | EA | Batches (first SKU example)", preview)
except Exception:
    pass
