In [1]:
import os
import pandas as pd
import numpy as np
import joblib
from xgboost import XGBClassifier, XGBRegressor

# ============================================================
# 0. Resolve correct path (root vs notebooks)
# ============================================================
cwd = os.getcwd()
if os.path.basename(cwd).lower() == "notebooks":
    PROJECT_ROOT = os.path.abspath(os.path.join(cwd, ".."))
else:
    PROJECT_ROOT = cwd

DATA_PROCESSED = os.path.join(PROJECT_ROOT, "data_processed")
MODEL_DIR = os.path.join(PROJECT_ROOT, "models")

# ============================================================
# 1. Load FY25 Feature Table
# ============================================================
input_file = os.path.join(DATA_PROCESSED, "FY25_SDRAM_feature_table.csv")
df = pd.read_csv(input_file)

print("Loaded FY25 Feature Table:", df.shape)
print(df.head())

# ============================================================
# 2. Supplier Normalization
# ============================================================
supplier_normalization_map = {
    "Micron Technology": "Micron",
    "Micron Technology Inc": "Micron",
    "Micron Technology, Inc.": "Micron",
    "Micron": "Micron",
    "Samsung Electronics": "Samsung",
    "Samsung Electronics Co., Ltd.": "Samsung",
    "Samsung Electronics Co., Ltd. and its subsidiaries": "Samsung",
    "Samsung": "Samsung",
    "SK hynix": "SK Hynix",
    "SK Hynix Inc.": "SK Hynix",
    "Consolidated (K-IFRS)": "SK Hynix",
    "SK Hynix": "SK Hynix",
}

df["supplier"] = df["supplier"].astype(str).apply(
    lambda x: supplier_normalization_map.get(x.strip(), x.strip())
)

# Assign supplier_code (same mapping used in Model-1B)
supplier_code_map = {"Micron": 0, "Samsung": 1, "SK Hynix": 2}
df["supplier_code"] = df["supplier"].map(supplier_code_map)

# ============================================================
# 3. Load Model-1B (Supervised PSL Classification)
# ============================================================
model1 = XGBClassifier()
model1.load_model(os.path.join(MODEL_DIR, "model1_xgb.json"))

model1_scaler = joblib.load(os.path.join(MODEL_DIR, "model1_scaler.pkl"))
psl_encoder = joblib.load(os.path.join(MODEL_DIR, "model1_psl_encoder.pkl"))

# ============================================================
# 4. Prepare Model-1B Features
# ============================================================
model1_features = [
    "fiscal_year",
    "supplier_code",
    "gross_margin_pct",
    "cash_flow",
    "debt_equity_ratio",
    "node_parity",
    "DDR_gen_support",
    "geo_risk",
    "tariff_risk",
    "chip_shortage_impact",
]

X1 = df[model1_features]
X1_scaled = model1_scaler.transform(X1)

# ============================================================
# 5. Predict PSL Category for FY25
# ============================================================
psl_pred_codes = model1.predict(X1_scaled)
df["predicted_PSL_status"] = psl_encoder.inverse_transform(psl_pred_codes)

# Add probability columns
psl_probs = model1.predict_proba(X1_scaled)
for i, cls in enumerate(psl_encoder.classes_):
    df[f"Prob_{cls}"] = psl_probs[:, i]

# ============================================================
# 6. Load Model-2B (Supervised Spend Allocation)
# ============================================================
model2 = joblib.load(os.path.join(MODEL_DIR, "model2_xgb.pkl"))
allocation_scaler = joblib.load(os.path.join(MODEL_DIR, "model2_scaler.pkl"))
psl_encoder_alloc = joblib.load(os.path.join(MODEL_DIR, "model2_psl_encoder.pkl"))

# ============================================================
# 7. Prepare Model-2B Input
# ============================================================
df["PSL_status_encoded"] = psl_encoder_alloc.transform(df["predicted_PSL_status"])

def clean_numeric(series):
    return (
        series.astype(str)
        .str.replace(r"[,%]", "", regex=True)
        .str.replace(r"[^\d\.\-]", "", regex=True)
        .replace("", np.nan)
        .astype(float)
    )

model2_features = [
    "PSL_status_encoded",
    "cost_savings",
    "PPV",
    "QP",
    "QR",
    "lead_time_attainment",
    "carbon_emission_intensity",
    "renewable_energy_usage",
    "plastic_recycle",
    "human_rights_compliance_score",
]

df[model2_features] = df[model2_features].apply(clean_numeric)

X2 = df[model2_features]
X2_scaled = allocation_scaler.transform(X2)

# ============================================================
# 8. Predict Spend Allocation (strict 100% per year)
# ============================================================
df["raw_pred"] = model2.predict(X2_scaled)

def softmax(x):
    ex = np.exp(x - np.max(x))
    return ex / ex.sum()

final_allocs = []
for fy, group in df.groupby("fiscal_year"):
    sm = softmax(group["raw_pred"].values)
    alloc = np.round(sm * 100 / 5) * 5
    alloc = np.clip(alloc, 0, 100)

    diff = 100 - alloc.sum()
    if diff != 0:
        alloc[np.argmax(alloc)] += diff

    final_allocs.extend(alloc)

df["predicted_allocation_percent"] = final_allocs
df.drop(columns=["raw_pred"], inplace=True)

# ============================================================
# 9. Save Output
# ============================================================
output_file = os.path.join(DATA_PROCESSED, "FY25_supplier_strategy_output.csv")
df.to_csv(output_file, index=False)

print("\n✅ FINAL OUTPUT SAVED:", output_file)


Loaded FY25 Feature Table: (3, 23)
   supplier cost_savings  PPV  QP  QR lead_time_attainment  \
0    Micron          15% -500  95  90                  95%   
1   Samsung           7% -500  85  80                  90%   
2  SK Hynix          10% -500  95  90                  95%   

   carbon_emission_intensity renewable_energy_usage plastic_recycle  \
0                       12.0                    38%             91%   
1                       14.5                    32%             88%   
2                       13.5                    35%             89%   

   human_rights_compliance_score  ...  tariff_risk  chip_shortage_impact  \
0                             97  ...        0.086                  0.08   
1                             95  ...        0.081                  0.08   
2                             96  ...        0.090                  0.08   

        revenue          cogs  gross_margin_pct     cash_flow  \
0  3.737800e+07  2.250500e+07         39.800000  1.752500e+07