# Material Pegging Map - Per-SKU Pegging with Complete Hierarchy
## Each Market SKU sheet shows: Packing -> Assembly -> Filling hierarchy

**Output:** Material_pegging_SKU_2_BOM.xlsx

**Sheet Structure:** Pegging_{8-series SKU}

**Hierarchy per SKU:**
- Level 1: Market SKU (800004403) + Materials
- Separator: 0
- Level 2: Assembly (700003964) + Materials
- Separator: 0
- Level 3: Filling (700001012) + Materials

In [4]:
import pandas as pd
import numpy as np
import re
import time
import warnings
warnings.filterwarnings('ignore')

print("="*120)
print("MATERIAL PEGGING MAP - PER-SKU PEGGING WITH COMPLETE HIERARCHY")
print("="*120)

dp_file = "/home/supriyo/Downloads/Biocon_nw/20251006-DP Material Shortage - Working file.xlsx"
snp_file = "/home/supriyo/Downloads/Biocon_nw/ParkourSC_SNP.xlsx"
pegging_file = "/home/supriyo/Downloads/Biocon_nw/Material_pegging_SKU_2_BOM.xlsx"

print(f"Output: {output_file}")
start_time = time.time()

ModuleNotFoundError: No module named 'pandas'

In [5]:
import sys
print(sys.executable)


/home/supriyo/repogen/.venv/bin/python


In [None]:
import pandas as pd
import numpy as np
import re
import time
import warnings

warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'pandas'

In [None]:
def normalize_text(text):
    if pd.isna(text) or text is None:
        return None
    text = str(text).strip()
    text = re.sub(r'\\s+', ' ', text)
    return text if text else None

def normalize_product_no(value):
    if pd.isna(value) or value is None:
        return None
    text = str(value).strip()
    cleaned = re.sub(r'[^a-zA-Z0-9]', '', text)
    return cleaned if cleaned else None

def extract_model_components(model_text):
    if pd.isna(model_text):
        return None
    model_text = str(model_text).strip()
    components = re.split(r'_+', model_text)
    components = [c.strip() for c in components if c.strip()]
    return '_'.join(components)

def is_valid_qty(qty):
    if pd.isna(qty):
        return False
    qty_str = str(qty).strip()
    if not qty_str or qty_str == '0' or qty_str == 'nan':
        return False
    try:
        return float(qty_str) > 0
    except (ValueError, TypeError):
        return False

print("Functions loaded")

In [None]:
print("Loading data...")

df_headers = pd.read_excel(dp_file, sheet_name="DP Shortage", header=None, skiprows=18, nrows=4, usecols=range(14, 135))
product_headers = {}
for col_idx in range(df_headers.shape[1]):
    product_id = normalize_product_no(df_headers.iloc[1, col_idx])
    if product_id and product_id != '0':
        product_headers[product_id] = {
            'Product_ID': product_id,
            'Product_Description': normalize_text(df_headers.iloc[3, col_idx]),
            'Batch_Size': df_headers.iloc[0, col_idx],
            'Column_Index': col_idx + 14
        }

print(f"Product headers: {len(product_headers)}")

df_materials = pd.read_excel(dp_file, sheet_name="DP Shortage", header=None, skiprows=22, nrows=520, usecols=[0, 1, 2, 3, 4, 5, 10, 13])
df_materials.columns = ['Material', 'Material_Description', 'Model', 'Product_Family', 'Section', 'Common_Unique', 'Total_Lead_Time', 'BUoM']
df_materials['Material_Normalized'] = df_materials['Material'].apply(normalize_product_no)
df_materials_filtered = df_materials[(df_materials['Material_Normalized'].notna()) & (df_materials['Material_Normalized'] != '0')].copy()
print(f"Materials: {len(df_materials_filtered)}")

df_qty = pd.read_excel(dp_file, sheet_name="DP Shortage", header=None, skiprows=22, nrows=520, usecols=range(14, 135))
qty_col_map = {}
for product_id, info in product_headers.items():
    qty_col_map[product_id] = info['Column_Index'] - 14

print("Data loaded successfully")

In [None]:
print("Extracting materials per product...")
product_materials = {}

for product_id, col_idx_in_qty in qty_col_map.items():
    qty_values = df_qty.iloc[:, col_idx_in_qty]
    valid_qty_mask = qty_values.apply(is_valid_qty)
    valid_row_indices = df_materials_filtered.index[valid_qty_mask[df_materials_filtered.index]].tolist()
    
    if len(valid_row_indices) == 0:
        continue
    
    materials_for_product = df_materials_filtered.loc[valid_row_indices].copy()
    materials_for_product['QTY'] = qty_values[valid_row_indices].values
    materials_for_product['Product_ID'] = product_id
    product_materials[product_id] = materials_for_product

print(f"Extracted for {len(product_materials)} products")

In [None]:
print("Loading Resource and SKU data...")

resource_data = {}
try:
    df_resources = pd.read_excel(snp_file, sheet_name="DP Line Utilization", header=None, skiprows=2, nrows=240, usecols=[1, 2, 4])
    df_resources.columns = ['Resource_ID', 'Resource_Description', 'Product_ID']
    for _, row in df_resources.iterrows():
        prod_id = normalize_product_no(row['Product_ID'])
        if prod_id:
            resource_data[prod_id] = {
                'Resource_ID': normalize_text(row['Resource_ID']),
                'Resource_Description': normalize_text(row['Resource_Description'])
            }
except Exception as e:
    print(f"Note: {e}")

sku_data = {}
try:
    df_adv = pd.read_excel(snp_file, sheet_name="Adv Mkt-Mar'25", header=None, skiprows=2, nrows=363, usecols=[1, 3, 5, 8])
    df_adv.columns = ['Product_ID', 'SKU', 'Country', 'Pack_Size']
    for _, row in df_adv.iterrows():
        prod_id = normalize_product_no(row['Product_ID'])
        if prod_id and prod_id not in sku_data:
            sku_data[prod_id] = {'SKU': normalize_text(row['SKU']), 'Country': normalize_text(row['Country']), 'Pack_Size': row['Pack_Size']}
except Exception:
    pass

try:
    df_em = pd.read_excel(snp_file, sheet_name="EM-Mar'25", header=None, skiprows=2, nrows=44, usecols=[1, 6, 12, 14])
    df_em.columns = ['Product_ID', 'SKU', 'Country', 'Pack_Size']
    for _, row in df_em.iterrows():
        prod_id = normalize_product_no(row['Product_ID'])
        if prod_id and prod_id not in sku_data:
            sku_data[prod_id] = {'SKU': normalize_text(row['SKU']), 'Country': normalize_text(row['Country']), 'Pack_Size': row['Pack_Size']}
except Exception:
    pass

print(f"Resource data: {len(resource_data)}")
print(f"SKU data: {len(sku_data)}")

In [None]:
print("Defining hierarchy...")

product_mapping = {}

mCB_skus = ['800004403', '800004402', '800008019', '800008020', '800008034', '800007997', '800007345', '800007516',
            '800002513', '800007608', '800007630', '800002984', '800004986', '800007310', '800007311', '800006648',
            '800007634', '800008073', '800006523', '800002297', '800002872', '800006741', '800007380']

sMCB_skus = ['800006506', '800006505', '800006527', '800006526', '800006525', '800007546', '800007583', '800007839',
             '800006524', '800006627', '800007872']

vial_skus = ['800004400', '800004401', '800006626', '800006740', '800007996']

aspart_dlp_skus = ['800008016', '800002958', '800002948', '800006528', '800002989', '800003528', '800006592', '800006691']

aspart_vial_skus = ['800008017', '800006529']

rhi_skus = ['800001300', '800001298', '800001299']

for sku in mCB_skus:
    product_mapping[sku] = {'assembly': '700003964', 'filling': '700001012', 'root': '700001470', 'family': 'Glargine_mCB_DLP'}

for sku in sMCB_skus:
    product_mapping[sku] = {'assembly': '700004129', 'filling': '700004130', 'root': '700004130', 'family': 'Glargine_sMCB_DLP_EU'}

for sku in vial_skus:
    product_mapping[sku] = {'assembly': '700001123', 'filling': '700001123', 'root': '700001123', 'family': 'Glargine_Vial'}

for sku in aspart_dlp_skus:
    product_mapping[sku] = {'assembly': '700002770', 'filling': '700001301', 'root': '700001301', 'family': 'Aspart_DLP'}

for sku in aspart_vial_skus:
    product_mapping[sku] = {'assembly': '700001318', 'filling': '700001318', 'root': '700001318', 'family': 'Aspart_Vial'}

for sku in rhi_skus:
    product_mapping[sku] = {'assembly': '700000536', 'filling': '700000536', 'root': '700000536', 'family': 'RHI'}

print(f"Hierarchy defined with {len(product_mapping)} SKU mappings")

In [None]:
print("Creating per-SKU pegging sheets...")

output_cols = ['BOM_Type', 'BOM_Level', 'Product_ID', 'Product_Description', 'SKU', 'Country', 'Pack_Size',
               'Material', 'Material_Description', 'QTY', 'Section', 'Product_Family', 'Common_Unique',
               'Total_Lead_Time', 'BUoM', 'Model', 'Resource_ID', 'Resource_Description', 'Batch_Size']

sku_pegging_sheets = {}

for market_sku, mapping_info in sorted(product_mapping.items()):
    assembly_id = mapping_info.get('assembly')
    filling_id = mapping_info.get('filling')
    family_name = mapping_info.get('family', 'Unknown')
    
    pegging_data = []
    
    if market_sku in product_materials:
        for _, mat_row in product_materials[market_sku].iterrows():
            sku_info = sku_data.get(market_sku, {})
            product_info = product_headers.get(market_sku, {})
            resource_info = resource_data.get(market_sku, {})
            
            pegging_data.append({
                'BOM_Type': 'Packing',
                'BOM_Level': 'L1_Market_SKU',
                'Product_ID': market_sku,
                'Product_Description': product_info.get('Product_Description', 'N/A'),
                'SKU': sku_info.get('SKU', 'N/A'),
                'Country': sku_info.get('Country', 'N/A'),
                'Pack_Size': sku_info.get('Pack_Size', 'N/A'),
                'Material': normalize_text(mat_row['Material_Normalized']),
                'Material_Description': normalize_text(mat_row['Material_Description']),
                'QTY': mat_row['QTY'],
                'Section': normalize_text(mat_row['Section']),
                'Product_Family': family_name,
                'Common_Unique': normalize_text(mat_row['Common_Unique']),
                'Total_Lead_Time': mat_row['Total_Lead_Time'],
                'BUoM': normalize_text(mat_row['BUoM']),
                'Model': extract_model_components(mat_row['Model']),
                'Resource_ID': resource_info.get('Resource_ID', 'N/A'),
                'Resource_Description': resource_info.get('Resource_Description', 'N/A'),
                'Batch_Size': product_info.get('Batch_Size', 'N/A')
            })
    
    pegging_data.append({col: '0' if col == 'Product_ID' else None for col in output_cols})
    
    if assembly_id != market_sku and assembly_id in product_materials:
        for _, mat_row in product_materials[assembly_id].iterrows():
            product_info = product_headers.get(assembly_id, {})
            resource_info = resource_data.get(assembly_id, {})
            
            pegging_data.append({
                'BOM_Type': 'Assembly',
                'BOM_Level': 'L2_Assembly',
                'Product_ID': assembly_id,
                'Product_Description': product_info.get('Product_Description', 'N/A'),
                'SKU': 'N/A',
                'Country': 'N/A',
                'Pack_Size': 'N/A',
                'Material': normalize_text(mat_row['Material_Normalized']),
                'Material_Description': normalize_text(mat_row['Material_Description']),
                'QTY': mat_row['QTY'],
                'Section': normalize_text(mat_row['Section']),
                'Product_Family': family_name,
                'Common_Unique': normalize_text(mat_row['Common_Unique']),
                'Total_Lead_Time': mat_row['Total_Lead_Time'],
                'BUoM': normalize_text(mat_row['BUoM']),
                'Model': extract_model_components(mat_row['Model']),
                'Resource_ID': resource_info.get('Resource_ID', 'N/A'),
                'Resource_Description': resource_info.get('Resource_Description', 'N/A'),
                'Batch_Size': product_info.get('Batch_Size', 'N/A')
            })
    
    pegging_data.append({col: '0' if col == 'Product_ID' else None for col in output_cols})
    
    if filling_id != assembly_id and filling_id in product_materials:
        for _, mat_row in product_materials[filling_id].iterrows():
            product_info = product_headers.get(filling_id, {})
            resource_info = resource_data.get(filling_id, {})
            
            pegging_data.append({
                'BOM_Type': 'Filling',
                'BOM_Level': 'L3_Filling',
                'Product_ID': filling_id,
                'Product_Description': product_info.get('Product_Description', 'N/A'),
                'SKU': 'N/A',
                'Country': 'N/A',
                'Pack_Size': 'N/A',
                'Material': normalize_text(mat_row['Material_Normalized']),
                'Material_Description': normalize_text(mat_row['Material_Description']),
                'QTY': mat_row['QTY'],
                'Section': normalize_text(mat_row['Section']),
                'Product_Family': family_name,
                'Common_Unique': normalize_text(mat_row['Common_Unique']),
                'Total_Lead_Time': mat_row['Total_Lead_Time'],
                'BUoM': normalize_text(mat_row['BUoM']),
                'Model': extract_model_components(mat_row['Model']),
                'Resource_ID': resource_info.get('Resource_ID', 'N/A'),
                'Resource_Description': resource_info.get('Resource_Description', 'N/A'),
                'Batch_Size': product_info.get('Batch_Size', 'N/A')
            })
    
    df_pegging = pd.DataFrame(pegging_data)[output_cols]
    sku_pegging_sheets[market_sku] = df_pegging

print(f"Created pegging sheets for {len(sku_pegging_sheets)} market SKUs")

In [None]:
print("Exporting to Excel...")

with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    for idx, (sku, pegging_df) in enumerate(sorted(sku_pegging_sheets.items()), 1):
        sheet_name = f"Pegging_{sku}"[:31]
        pegging_df.to_excel(writer, sheet_name=sheet_name, index=False)
        
        if idx % 10 == 0 or idx == len(sku_pegging_sheets):
            print(f"  Exported {idx}/{len(sku_pegging_sheets)} SKU pegging sheets")

elapsed = time.time() - start_time
print(f"\nExported: {output_file}")
print(f"Total sheets: {len(sku_pegging_sheets)}")
print(f"Time: {elapsed:.2f}s")

In [None]:
print("\n" + "="*120)
print("PER-SKU PEGGING SUMMARY")
print("="*120)

print(f"\nFILE: Material_pegging_SKU_2_BOM.xlsx")
print(f"\nTOTAL SKU PEGGING SHEETS: {len(sku_pegging_sheets)}")

print(f"\nSHEET STRUCTURE (Per Market SKU):")
print(f"  Level 1 (Packing): Market SKU materials")
print(f"  Separator: 0 (Product_ID column only)")
print(f"  Level 2 (Assembly): Assembly materials")
print(f"  Separator: 0 (Product_ID column only)")
print(f"  Level 3 (Filling): Filling materials")

print(f"\nCOLUMNS (19 total):")
for idx, col in enumerate(output_cols, 1):
    print(f"  {idx:2d}. {col}")

print(f"\nEach market SKU has complete hierarchical BOM with Packing -> Assembly -> Filling!")


## Material Availibility and SS

In [None]:
import pandas as pd
import numpy as np

# ---- File + Sheet configuration ----
dp_file = "/home/supriyo/Downloads/Biocon_nw/20251006-DP Material Shortage - Working file.xlsx"
sheet_name = "DP Shortage"

# ---- Read DP Shortage sheet ----
# Header is on row 22 (Excel) → index 21 in pandas
df = pd.read_excel(dp_file, sheet_name=sheet_name, header=21)

# Restrict to valid data rows (23–541 in Excel → index 22–540)
df = df.iloc[22:541].copy()

# ---- Column Mapping ----
output = pd.DataFrame()
output["material_id"] = df["Material"]
output["material_description"] = df["Material Description"]

# Optional SS_DOS column placeholder
output["SS_DOS"] = np.nan      # Can be filled later if required

# SS_QTY → from Total Safety Stock (col ER)
output["SS_QTY"] = df.get("Total Safety Stock", 0).fillna(0)

# UBOM
output["UBOM"] = df.get("BUoM")

# ---- Compute Available_QTY ----
output["Available_QTY"] = (
    df.get("Unrestricted Stock", 0).fillna(0)
    - df.get("Total Safety Stock", 0).fillna(0)
    - df.get("QI Stock", 0).fillna(0)
    - df.get("Rejected RJ1", 0).fillna(0)
    - df.get("Expired Material 2001", 0).fillna(0)
    - df.get("Expired Material QI", 0).fillna(0)
    - df.get("Transfer to Process Order", 0).fillna(0)
)

# Prevent negative available qty
output["Available_QTY"] = output["Available_QTY"].clip(lower=0)

# ---- Save Output ----
save_path = "/home/supriyo/Downloads/Biocon_nw/Product_master_inv.xlsx"
output.to_excel(save_path, index=False)

print(f"✅ Product inventory master saved to: {save_path}")


# **Product_Demand**  **Planned_Production_FG** **BOM_Demand_By_Component**

In [None]:
import pandas as pd
import numpy as np
import re
from pathlib import Path

# ------------------------------
# File paths / sheet names
# ------------------------------
dp_file = "/home/supriyo/Downloads/Biocon_nw/Aug'25_L2_DP_Plan_Circulation_V2.xlsx"
dp_sheet = "L2Ph1_Detail"

rccp_file = "/home/supriyo/Downloads/Biocon_nw/ParkourSC_SNP.xlsx"
rccp_sheet = "DP RCCP"

shortage_file = "/home/supriyo/Downloads/Biocon_nw/20251006-DP Material Shortage - Working file.xlsx"
shortage_sheet = "DP Shortage"

out_file = "/home/supriyo/Downloads/Biocon_nw/Product_master_inv.xlsx"

# ------------------------------
# Helpers
# ------------------------------
MONTH_MAP = {
    "JAN": "01", "FEB": "02", "MAR": "03", "APR": "04", "MAY": "05", "JUN": "06",
    "JUL": "07", "AUG": "08", "SEP": "09", "SEPT": "09", "OCT": "10", "NOV": "11", "DEC": "12"
}

def norm_month_col(name: str):
    """
    Normalize headers like 'OCT 2025', 'Nov-2026', 'Dec 25', 'Jun-27' to 'YYYY-MM'.
    Returns original if not a month.
    """
    if not isinstance(name, str):
        return name
    s = name.strip().upper().replace("’", "'").replace("–", "-").replace("—", "-")
    # Patterns:
    # 1) 'OCT 2025'
    m = re.match(r"^(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|SEPT|OCT|NOV|DEC)[\s\-]+(\d{2,4})$", s)
    if m:
        mon, yr = m.group(1), m.group(2)
        yr = ("20" + yr) if len(yr) == 2 else yr
        return f"{yr}-{MONTH_MAP[mon]}"
    # 2) '2025 Oct' (unlikely here but safe)
    m = re.match(r"^(\d{4})[\s\-]+(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|SEPT|OCT|NOV|DEC)$", s)
    if m:
        yr, mon = m.group(1), m.group(2)
        return f"{yr}-{MONTH_MAP[mon]}"
    return name

def to_numeric_safe(series):
    return pd.to_numeric(series, errors="coerce").fillna(0)

def excel_cols_to_index(df, first_letter: str, last_letter: str):
    """
    Map Excel column letters (e.g., 'S' to 'AM') to integer index slices for current df.columns.
    We rely on physical column positions here.
    """
    def letter_to_num(col):
        col = col.upper()
        num = 0
        for ch in col:
            num = num * 26 + (ord(ch) - ord('A') + 1)
        return num - 1  # zero-based
    start = letter_to_num(first_letter)
    end = letter_to_num(last_letter)
    # Bound by df shape
    start = max(0, min(start, df.shape[1]-1))
    end = max(0, min(end, df.shape[1]-1))
    return slice(start, end+1)

# ------------------------------
# 1) Read L2Ph1_Detail (rows 3–78)
# ------------------------------
# We don't know the exact header row index in the file, but you said row 1-2 are non-data and row 3 starts data.
# We'll read the whole sheet, then slice rows 2:78 (0-based -> rows 3..78 inclusive).
dp_raw = pd.read_excel(dp_file, sheet_name=dp_sheet, engine="openpyxl", header=0)
dp = dp_raw.iloc[2:78].copy()  # rows 3–78 (exclusive of 78 if less rows exist)

# Normalize column names (especially months)
dp.columns = [norm_month_col(c) for c in dp.columns]

# Identify key columns by position per your note:
# E = Market SKU (as per your instruction), K = Batch Size
# We'll also try to pick Product ID from a likely column named 'Product ID' if present.
col_letters = {name: idx for idx, name in enumerate(dp.columns)}

def col_by_letter(letter):
    # approximate: take position in original raw header order
    # Use dp_raw.columns (before normalization) to resolve positions
    headers = list(dp_raw.columns)
    def letter_to_num(col):
        col = col.upper()
        num = 0
        for ch in col:
            num = num * 26 + (ord(ch) - ord('A') + 1)
        return num - 1
    pos = letter_to_num(letter)
    return headers[pos] if pos < len(headers) else None

market_sku_col = col_by_letter("E")
batch_size_col = col_by_letter("K")

# Build month column ranges
ea_cols_letters = ("S", "AM")  # EA S:AM
batch_cols_letters = ("AN", "BI")  # Batches AN:BI

ea_first = col_by_letter(ea_cols_letters[0])
ea_last = col_by_letter(ea_cols_letters[1])
batch_first = col_by_letter(batch_cols_letters[0])
batch_last = col_by_letter(batch_cols_letters[1])

# Convert those to normalized names after we normalized headers
ea_start_idx = list(dp_raw.columns).index(ea_first) if ea_first in dp_raw.columns else None
ea_end_idx = list(dp_raw.columns).index(ea_last) if ea_last in dp_raw.columns else None
batch_start_idx = list(dp_raw.columns).index(batch_first) if batch_first in dp_raw.columns else None
batch_end_idx = list(dp_raw.columns).index(batch_last) if batch_last in dp_raw.columns else None

if None in (ea_start_idx, ea_end_idx, batch_start_idx, batch_end_idx):
    raise RuntimeError("Could not resolve EA/Batch column spans; please verify the sheet layout.")

# Re-find by normalized column names
norm_cols = list(dp.columns)
ea_norm_cols = norm_cols[ea_start_idx:ea_end_idx+1]
batch_norm_cols = norm_cols[batch_start_idx:batch_end_idx+1]

# Prepare base ID columns
id_cols = []
if "Product ID" in dp.columns:
    id_cols.append("Product ID")
if market_sku_col and market_sku_col in dp_raw.columns:
    mk_norm = norm_month_col(market_sku_col)
    # If normalization didn't change (most likely), keep original header
    id_cols.append(market_sku_col if market_sku_col in dp.columns else mk_norm)

# Include product description if available
for cand in ["Product Desc", "Product Description"]:
    if cand in dp.columns:
        id_cols.append(cand)

# Batch size
if batch_size_col in dp_raw.columns:
    bs_norm = norm_month_col(batch_size_col)
    if bs_norm in dp.columns:
        id_cols.append(bs_norm)
        batch_size_norm_name = bs_norm
    else:
        id_cols.append(batch_size_col)
        batch_size_norm_name = batch_size_col
else:
    batch_size_norm_name = None

# Deduplicate while preserving order
seen = set()
id_cols = [c for c in id_cols if not (c in seen or seen.add(c))]

# Melt EA
ea_df = dp[id_cols + ea_norm_cols].copy()
ea_long = ea_df.melt(id_vars=id_cols, value_vars=ea_norm_cols, var_name="Month", value_name="Demand_EA")
ea_long["Demand_EA"] = to_numeric_safe(ea_long["Demand_EA"])

# Melt Batches
batch_df = dp[id_cols + batch_norm_cols].copy()
batch_long = batch_df.melt(id_vars=id_cols, value_vars=batch_norm_cols, var_name="Month", value_name="Demand_Batches")
batch_long["Demand_Batches"] = to_numeric_safe(batch_long["Demand_Batches"])

# Merge EA + Batches
prod_demand = pd.merge(ea_long, batch_long, on=id_cols + ["Month"], how="outer")
prod_demand["Month"] = prod_demand["Month"].apply(norm_month_col)

# Rename key ID columns to canonical names
rename_map = {}
if "Product ID" in prod_demand.columns:
    rename_map["Product ID"] = "Product_ID"
if market_sku_col in prod_demand.columns:
    rename_map[market_sku_col] = "Market_SKU"
if "Product Desc" in prod_demand.columns:
    rename_map["Product Desc"] = "Product_Desc"
if "Product Description" in prod_demand.columns:
    rename_map["Product Description"] = "Product_Desc"
if batch_size_norm_name and batch_size_norm_name in prod_demand.columns:
    rename_map[batch_size_norm_name] = "Batch_Size"

prod_demand.rename(columns=rename_map, inplace=True)

# ------------------------------
# 2) RCCP: Planned Production & Outbound Production Demand
# ------------------------------
rccp = pd.read_excel(rccp_file, sheet_name=rccp_sheet, engine="openpyxl", header=0)

# Normalize month headers
rccp.columns = [norm_month_col(c) for c in rccp.columns]

# Column identities (by names in your description)
# Product ID in col C (we'll rely on column name 'Product ID' if present; else 3rd col)
if "Product ID" in rccp.columns:
    rccp["Product_ID"] = rccp["Product ID"]
else:
    rccp["Product_ID"] = rccp.iloc[:, 2]  # 3rd col

# Key Figure in col H (or by name)
key_col = "Key Figure" if "Key Figure" in rccp.columns else rccp.columns[7]
rccp["Key_Figure"] = rccp[key_col]

# Month spread J..AD = March 2025 .. April 2027: Normalize and collect all YYYY-MM columns
month_cols = [c for c in rccp.columns if isinstance(c, str) and re.match(r"^\d{4}\-\d{2}$", c)]
month_cols = sorted(month_cols)  # chronological

# Filter Filled FG PIDs
filled_fg_pids = {"700001012","700001123","700000536","700001318","700001301"}
exclude_pids = {"700004130"}

rccp_filled = rccp[
    (rccp["Product_ID"].astype(str).isin(filled_fg_pids)) &
    (~rccp["Product_ID"].astype(str).isin(exclude_pids))
].copy()

# Pick Planned Production
pp = rccp_filled[rccp_filled["Key_Figure"].str.strip().str.lower() == "planned production"].copy()
pp_long = pp.melt(id_vars=["Product_ID","Key_Figure"], value_vars=month_cols, var_name="Month", value_name="Planned_Production")
pp_long["Planned_Production"] = to_numeric_safe(pp_long["Planned_Production"])

# Pick Outbound Production Demand (used to explode to components)
opd = rccp_filled[rccp_filled["Key_Figure"].str.strip().str.lower() == "outbound production demand"].copy()
opd_long = opd.melt(id_vars=["Product_ID","Key_Figure"], value_vars=month_cols, var_name="Month", value_name="Outbound_Production_Demand")
opd_long["Outbound_Production_Demand"] = to_numeric_safe(opd_long["Outbound_Production_Demand"])

# Merge PP & OPD
pp_opd = pd.merge(pp_long.drop(columns=["Key_Figure"]),
                  opd_long.drop(columns=["Key_Figure"]),
                  on=["Product_ID","Month"], how="outer").fillna(0)

# ------------------------------
# 3) BOM component mapping from DP Shortage (rows 23–542)
#     - "Product_ID" match in that sheet
#     - Component Material in Col F
# ------------------------------
shortage_raw = pd.read_excel(shortage_file, sheet_name=shortage_sheet, engine="openpyxl", header=0)
shortage = shortage_raw.iloc[22:542].copy()  # rows 23–542

# Try to identify columns:
# Product_ID column: often named 'Model' or 'Product ID' in previous contexts. Prefer exact match if present.
probable_pid_cols = [c for c in shortage.columns if str(c).strip().lower() in {"product id", "product_id", "model", "parent_product_id"}]
if probable_pid_cols:
    shortage["Product_ID"] = shortage[probable_pid_cols[0]]
else:
    # Fallback: take column 'D' (4th) if exists
    if shortage.shape[1] >= 4:
        shortage["Product_ID"] = shortage.iloc[:, 3]
    else:
        raise RuntimeError("Could not find Product_ID column in DP Shortage.")

# Component material in Col F (6th col) per your note
comp_col = shortage.columns[5] if shortage.shape[1] >= 6 else None
if comp_col is None:
    raise RuntimeError("Could not resolve Component Material column (expected column F).")

shortage["Component_Material_ID"] = shortage[comp_col]

# Keep non-blank, non-zero Product_ID & Component
bom_map = shortage[
    shortage["Product_ID"].notna() & (shortage["Product_ID"] != 0) &
    shortage["Component_Material_ID"].notna() & (shortage["Component_Material_ID"] != 0)
].copy()

# Filter to the filled FG PIDs only (as per instruction)
bom_map = bom_map[bom_map["Product_ID"].astype(str).isin(filled_fg_pids)]

# Deduplicate product-component pairs
bom_pairs = bom_map[["Product_ID","Component_Material_ID"]].drop_duplicates()

# ------------------------------
# 4) Create Component-level monthly demand by joining OPD with BOM pairs
#     Assumption: BOM factor = 1 (until you provide exact BOM factors)
# ------------------------------
comp_demand = bom_pairs.merge(pp_opd, on="Product_ID", how="left")

# If you have factors later, multiply here:
# comp_demand["Component_Monthly_Demand"] = comp_demand["Outbound_Production_Demand"] * comp_demand["BOM_Factor"]
comp_demand["Component_Monthly_Demand"] = comp_demand["Outbound_Production_Demand"]

# ------------------------------
# 5) Product mapping dictionary (family, assembly, filling, root)
# ------------------------------
product_mapping = {}

mCB_skus = ['800004403', '800004402', '800008019', '800008020', '800008034', '800007997', '800007345', '800007516',
            '800002513', '800007608', '800007630', '800002984', '800004986', '800007310', '800007311', '800006648',
            '800007634', '800008073', '800006523', '800002297', '800002872', '800006741', '800007380']

sMCB_skus = ['800006506', '800006505', '800006527', '800006526', '800006525', '800007546', '800007583', '800007839',
             '800006524', '800006627', '800007872']

vial_skus = ['800004400', '800004401', '800006626', '800006740', '800007996']

aspart_dlp_skus = ['800008016', '800002958', '800002948', '800006528', '800002989', '800003528', '800006592', '800006691']

aspart_vial_skus = ['800008017', '800006529']

rhi_skus = ['800001300', '800001298', '800001299']

for sku in mCB_skus:
    product_mapping[sku] = {'assembly': '700003964', 'filling': '700001012', 'root': '700001470', 'family': 'Glargine_mCB_DLP'}

for sku in sMCB_skus:
    product_mapping[sku] = {'assembly': '700004129', 'filling': '700004130', 'root': '700004130', 'family': 'Glargine_sMCB_DLP_EU'}

for sku in vial_skus:
    product_mapping[sku] = {'assembly': '700001123', 'filling': '700001123', 'root': '700001123', 'family': 'Glargine_Vial'}

for sku in aspart_dlp_skus:
    product_mapping[sku] = {'assembly': '700002770', 'filling': '700001301', 'root': '700001301', 'family': 'Aspart_DLP'}

for sku in aspart_vial_skus:
    product_mapping[sku] = {'assembly': '700001318', 'filling': '700001318', 'root': '700001318', 'family': 'Aspart_Vial'}

for sku in rhi_skus:
    product_mapping[sku] = {'assembly': '700000536', 'filling': '700000536', 'root': '700000536', 'family': 'RHI'}

# Attach mapping to Product_Demand where Market_SKU is available
if "Market_SKU" in prod_demand.columns:
    map_df = pd.DataFrame(product_mapping).T.reset_index().rename(columns={"index": "Market_SKU"})
    prod_demand = prod_demand.merge(map_df, on="Market_SKU", how="left")

# ------------------------------
# 6) Write outputs
# ------------------------------
with pd.ExcelWriter(out_file, engine="openpyxl", mode="w") as writer:
    # Product_Demand (normalized long form with EA + Batches)
    cols_order = [c for c in ["Product_ID","Market_SKU","Product_Desc","Batch_Size","Month","Demand_EA","Demand_Batches",
                              "family","assembly","filling","root"] if c in prod_demand.columns]
    prod_demand[cols_order].to_excel(writer, sheet_name="Product_Demand", index=False)

    # RCCP Planned Production / OPD (filtered to filled FGs)
    pp_opd.sort_values(["Product_ID","Month"]).to_excel(writer, sheet_name="Planned_Production_FG", index=False)

    # Component-level monthly demand (from OPD mapped via BOM)
    comp_cols = ["Product_ID","Component_Material_ID","Month","Outbound_Production_Demand","Component_Monthly_Demand"]
    comp_demand.sort_values(["Product_ID","Component_Material_ID","Month"]).to_excel(writer, sheet_name="BOM_Demand_By_Component", index=False)

print(f"Success: wrote Product_Demand, Planned_Production_FG, BOM_Demand_By_Component to {out_file}")


In [4]:
import pandas as pd
import numpy as np
import re
import time
import warnings

warnings.filterwarnings('ignore')

print("=" * 120)
print("MATERIAL PEGGING MAP - PER-SKU PEGGING WITH COMPLETE HIERARCHY")
print("=" * 120)

dp_file = "/home/supriyo/Downloads/Biocon_nw/20251006-DP Material Shortage - Working file.xlsx"
snp_file = "/home/supriyo/Downloads/Biocon_nw/ParkourSC_SNP.xlsx"
pegging_file = "/home/supriyo/Downloads/Biocon_nw/Material_pegging_SKU_2_BOM.xlsx"
output_file = "/home/supriyo/Downloads/Biocon_nw/pegging_output.xlsx"

xls = pd.ExcelFile(pegging_file, engine='openpyxl')

# filter sheet nmaes that start with Pegging_
pegging_sheets = [sheet for sheet in xls.sheet_names if sheet.startswith("Pegging_")]

print(f"Found {len(pegging_sheets)} pegging sheets in {pegging_file}")
for sheet in pegging_sheets:
    print(f"  - {sheet}")


MATERIAL PEGGING MAP - PER-SKU PEGGING WITH COMPLETE HIERARCHY
Found 52 pegging sheets in /home/supriyo/Downloads/Biocon_nw/Material_pegging_SKU_2_BOM.xlsx
  - Pegging_800001298
  - Pegging_800001299
  - Pegging_800001300
  - Pegging_800002297
  - Pegging_800002513
  - Pegging_800002872
  - Pegging_800002948
  - Pegging_800002958
  - Pegging_800002984
  - Pegging_800002989
  - Pegging_800003528
  - Pegging_800004400
  - Pegging_800004401
  - Pegging_800004402
  - Pegging_800004403
  - Pegging_800004986
  - Pegging_800006505
  - Pegging_800006506
  - Pegging_800006523
  - Pegging_800006524
  - Pegging_800006525
  - Pegging_800006526
  - Pegging_800006527
  - Pegging_800006528
  - Pegging_800006529
  - Pegging_800006592
  - Pegging_800006626
  - Pegging_800006627
  - Pegging_800006648
  - Pegging_800006691
  - Pegging_800006740
  - Pegging_800006741
  - Pegging_800007310
  - Pegging_800007311
  - Pegging_800007345
  - Pegging_800007380
  - Pegging_800007516
  - Pegging_800007546
  - Pegg

In [None]:
col_name

'g'