# Loading

In [1]:
import os
import joblib
import pandas as pd
import numpy as np
from pprint import pprint


# Paths & feature lists

In [None]:
# Cell 3 - Files & expected feature names (edit if filenames differ)
MODEL_DIR = "pkl_file"
DATA_DIR = "datasets"   # from your screenshots the csv are here

MODEL_PATHS = {
    "fabric_model": os.path.join(MODEL_DIR, "Frabic_model.pkl"),   # fallback names below
    "sustainability_model": os.path.join(MODEL_DIR, "sustainability_model.pkl"),
    "supply_model": os.path.join(MODEL_DIR, "supply_chain_model.pkl")
}

DATA_PATHS = {
    "fabric_dataset": os.path.join(DATA_DIR, "fabric_full_smart_dataset_5000.csv"),
    "sustainability_dataset": os.path.join(DATA_DIR, "fabric_full_sustainability_dataset_5000.csv"),
    "supply_dataset": os.path.join(DATA_DIR, "supply_chain_dataset_5000.csv")
}

# FEATURES expected by each model (from your message)
FABRIC_FEATURES = [
    "Fabric_Type", "Weight_gsm", "Breathability", "Insulation",
    "Tensile_Strength", "Moisture_Absorption", "Recyclability",
    "Biodegradability", "Cost_Rs_per_meter", "Terrain"
    # note: Performance_Score is the target of training, NOT input for prediction
]

SUSTAINABILITY_FEATURES = [
    "Fabric_Type", "Recyclability", "Biodegradability",
    "Water_Usage", "Carbon_Emissions", "Toxicity",
    "Microplastic_Shedding", "Durability", "Region_Suitability"
]

SUPPLY_FEATURES = [
    "Fabric_Type", "Region", "Cost_per_Unit(INR)", "Transport_Time(days)",
    "Distance_to_Base(km)", "Demand_Units", "Available_Stock",
    "Supplier_Rating(1-5)", "Risk_Factor(1-10)"
]

print("Check these paths now. Edit them if your files are in different folders.")
pprint(MODEL_PATHS)
pprint(DATA_PATHS)


Check these paths now. Edit them if your files are in different folders.
{'fabric_model': 'pkl_file\\Frabic_model.pkl',
 'supply_model': 'pkl_file\\supply_chain_model.pkl',
 'sustainability_model': 'pkl_file\\sustainability_model.pkl'}
{'fabric_dataset': 'datasets\\fabric_full_smart_dataset_5000.csv',
 'supply_dataset': 'datasets\\supply_chain_dataset_5000.csv',
 'sustainability_dataset': 'datasets\\fabric_full_sustainability_dataset_5000.csv'}


# Safe loader helpers

In [3]:
# Cell 4 - Safe loader helpers
def safe_load(path):
    if path is None:
        return None
    if os.path.exists(path):
        try:
            obj = joblib.load(path)
            print(f"Loaded: {path} -> {type(obj).__name__}")
            return obj
        except Exception as e:
            print(f"Failed loading {path}: {e}")
            return None
    else:
        print(f"Not found: {path}")
        return None

# Try alternative filenames if necessary
def try_load(primary, alternatives=[]):
    m = safe_load(primary)
    if m is not None:
        return m
    for alt in alternatives:
        m = safe_load(os.path.join(MODEL_DIR, alt))
        if m is not None:
            return m
    return None


#  Load models

In [4]:
# Cell 5 - Load models (edit alt list if your filenames differ)
fabric_model = try_load(MODEL_PATHS["fabric_model"], alternatives=["Fabric_model.pkl","fabric_model.pkl","Frabic_model.pkl"])
sustain_model = try_load(MODEL_PATHS["sustainability_model"], alternatives=["sustainability_model_v1.pkl","sustain_model.pkl"])
supply_model = try_load(MODEL_PATHS["supply_model"], alternatives=["supply_chain_model_v1.pkl","supply_model.pkl","supply_chain_model.pkl"])

print("Models summary:")
print(" fabric_model:", "OK" if fabric_model else "MISSING")
print(" sustain_model:", "OK" if sustain_model else "MISSING")
print(" supply_model:", "OK" if supply_model else "MISSING")


Loaded: pkl_file\Frabic_model.pkl -> RandomForestRegressor
Loaded: pkl_file\sustainability_model.pkl -> RandomForestRegressor
Loaded: pkl_file\supply_chain_model.pkl -> RandomForestClassifier
Models summary:
 fabric_model: OK
 sustain_model: OK
 supply_model: OK


# Load datasets

In [7]:
# Cell 6 - Load datasets
df_fabric = pd.read_csv(DATA_PATHS["fabric_dataset"]) if os.path.exists(DATA_PATHS["fabric_dataset"]) else None
df_sust   = pd.read_csv(DATA_PATHS["sustainability_dataset"]) if os.path.exists(DATA_PATHS["sustainability_dataset"]) else None
df_supply = pd.read_csv(DATA_PATHS["supply_dataset"]) if os.path.exists(DATA_PATHS["supply_dataset"]) else None

print("Datasets loaded:")
print(" fabric:", "OK" if df_fabric is not None else "MISSING")
print(" sustainability:", "OK" if df_sust is not None else "MISSING")
print(" supply:", "OK" if df_supply is not None else "MISSING")

# quick peek
if df_fabric is not None:
    display(df_fabric.head(3))
if df_sust is not None:
    display(df_sust.head(3))
if df_supply is not None:
    display(df_supply.head(3))


Datasets loaded:
 fabric: OK
 sustainability: OK
 supply: OK


Unnamed: 0,Fabric_Type,Weight_gsm,Breathability,Insulation,Tensile_Strength,Moisture_Absorption,Recyclability,Biodegradability,Cost_Rs_per_meter,Terrain,Performance_Score
0,Rayon,334.893756,2.650913,8.017219,6.371651,5.012495,1.899774,5.13324,533.877486,Jungle,52.38
1,Spandex,288.284311,1.507704,7.497989,9.446974,1.007009,9.929904,6.557334,936.897083,Cold Desert,83.86
2,Blended Fabric A,173.193325,6.506676,2.255445,3.629302,4.297257,5.10463,8.066584,339.526984,Humid Coast,48.97


Unnamed: 0,Fabric_Type,Recyclability,Biodegradability,Water_Usage,Carbon_Emissions,Toxicity,Microplastic_Shedding,Durability,Region_Suitability
0,Nylon,7.251198,1.659177,1454.12667,12.838951,5.696168,8.785652,8.199509,Desert
1,Nylon,7.623716,0.741226,4552.053956,11.354955,5.950722,6.870238,9.143675,High Altitude
2,Bamboo Fiber,7.212781,8.011012,4784.081969,3.71435,2.33864,0.0,7.214075,Jungle


Unnamed: 0,Fabric_Type,Region,Cost_per_Unit(INR),Transport_Time(days),Distance_to_Base(km),Demand_Units,Available_Stock,Supplier_Rating(1-5),Risk_Factor(1-10),Supply_Priority
0,Kevlar,Hot Desert,1317.2,10.48,726.2,8816,11804,2.6,7.5,MEDIUM
1,Nylon,High Altitude,863.47,44.68,1402.7,14854,16190,4.5,6.1,MEDIUM
2,Bamboo Fibre,Coastal,263.06,7.11,277.5,8711,9040,2.0,3.4,LOW


#  Enrich fabric dataset if required

In [8]:
# Cell 7 - Ensure fabric dataset has required attributes (fills from defaults if missing)
if df_fabric is None:
    raise RuntimeError("fabric dataset not found. Place it and re-run Cell 6.")

required = FABRIC_FEATURES.copy()
missing = [c for c in required if c not in df_fabric.columns]
print("Fabric attributes missing:", missing)

# If external property file exists, try merge
prop_file = os.path.join(DATA_DIR, "fabric_properties.csv")
if missing and os.path.exists(prop_file):
    df_props = pd.read_csv(prop_file)
    df_fabric = pd.merge(df_fabric, df_props, on="Fabric_Type", how="left")
    missing = [c for c in required if c not in df_fabric.columns]
    print("After merging fabric_properties.csv, still missing:", missing)

# Fallback: fill using simple profile defaults by Fabric_Type
if missing:
    print("Filling missing attributes using default FABRIC_PROFILES (best-effort).")
    FABRIC_PROFILES = {
        "Cotton": {"Weight_gsm":160,"Breathability":8.5,"Insulation":3.0,"Tensile_Strength":5.0,"Moisture_Absorption":8.0,"Recyclability":8.0,"Biodegradability":9.0,"Cost_Rs_per_meter":180},
        "Polyester": {"Weight_gsm":140,"Breathability":5.0,"Insulation":3.5,"Tensile_Strength":7.5,"Moisture_Absorption":2.5,"Recyclability":5.5,"Biodegradability":1.0,"Cost_Rs_per_meter":250},
        "Nylon": {"Weight_gsm":150,"Breathability":4.5,"Insulation":4.0,"Tensile_Strength":8.5,"Moisture_Absorption":3.0,"Recyclability":6.0,"Biodegradability":0.5,"Cost_Rs_per_meter":600},
        "Wool": {"Weight_gsm":300,"Breathability":6.0,"Insulation":9.0,"Tensile_Strength":6.5,"Moisture_Absorption":7.0,"Recyclability":7.0,"Biodegradability":8.0,"Cost_Rs_per_meter":900},
        "Kevlar": {"Weight_gsm":220,"Breathability":2.0,"Insulation":5.0,"Tensile_Strength":10.0,"Moisture_Absorption":1.0,"Recyclability":3.0,"Biodegradability":0.0,"Cost_Rs_per_meter":2500},
        "Bamboo Fiber": {"Weight_gsm":150,"Breathability":7.0,"Insulation":4.0,"Tensile_Strength":5.5,"Moisture_Absorption":7.0,"Recyclability":8.0,"Biodegradability":8.5,"Cost_Rs_per_meter":400}
    }

    for col in missing:
        if col == "Terrain":
            # if Region exists use that, else default to "General Use"
            if "Region" in df_fabric.columns:
                df_fabric["Terrain"] = df_fabric["Region"]
            else:
                df_fabric["Terrain"] = "General Use"
            continue
        df_fabric[col] = df_fabric.apply(
            lambda r: FABRIC_PROFILES.get(r.get("Fabric_Type"), {}).get(col, np.nan),
            axis=1
        )

    # fill any remaining numeric NaNs with column median
    for col in required:
        if df_fabric[col].isnull().any():
            if df_fabric[col].dtype.kind in "biufc":
                med = df_fabric[col].median()
                df_fabric[col] = df_fabric[col].fillna(0 if np.isnan(med) else med)
            else:
                df_fabric[col] = df_fabric[col].fillna("UNKNOWN")

print("After enrichment - sample:")
display(df_fabric[required].head(5))

# create encoded columns if model expects them (safe to have)
if "Fabric_Encoded" not in df_fabric.columns:
    df_fabric["Fabric_Encoded"] = pd.factorize(df_fabric["Fabric_Type"])[0]
if "Terrain_Encoded" not in df_fabric.columns:
    df_fabric["Terrain_Encoded"] = pd.factorize(df_fabric["Terrain"])[0]


Fabric attributes missing: []
After enrichment - sample:


Unnamed: 0,Fabric_Type,Weight_gsm,Breathability,Insulation,Tensile_Strength,Moisture_Absorption,Recyclability,Biodegradability,Cost_Rs_per_meter,Terrain
0,Rayon,334.893756,2.650913,8.017219,6.371651,5.012495,1.899774,5.13324,533.877486,Jungle
1,Spandex,288.284311,1.507704,7.497989,9.446974,1.007009,9.929904,6.557334,936.897083,Cold Desert
2,Blended Fabric A,173.193325,6.506676,2.255445,3.629302,4.297257,5.10463,8.066584,339.526984,Humid Coast
3,Wool,269.572662,1.418054,6.467904,2.534717,1.585464,9.53997,9.690688,1222.176155,Cold Desert
4,Blended Fabric B,111.255076,7.158097,4.961372,2.098344,5.456592,1.309497,9.183884,425.230973,Jungle


#  Build feature matrices for prediction 

In [9]:
# Cell 8 - Build X matrices exactly in the order expected by models
# Fabric model likely needs numeric fabric attributes
X_fabric = df_fabric.copy()
# If your fabric_model expects only numeric columns, select them - adjust if needed
fabric_input_cols = [c for c in FABRIC_FEATURES if c in X_fabric.columns]
X_fabric_inputs = X_fabric[fabric_input_cols]

# Sustainability inputs
if df_sust is not None:
    X_sust = df_sust.copy()
    sust_input_cols = [c for c in SUSTAINABILITY_FEATURES if c in X_sust.columns]
    X_sust_inputs = X_sust[sust_input_cols]
else:
    X_sust_inputs = None

# Supply inputs
if df_supply is not None:
    X_supply = df_supply.copy()
    supply_input_cols = [c for c in SUPPLY_FEATURES if c in X_supply.columns]
    X_supply_inputs = X_supply[supply_input_cols]
else:
    X_supply_inputs = None

print("Feature shapes (fabric, sust, supply):", 
      X_fabric_inputs.shape if X_fabric_inputs is not None else None,
      None if X_sust_inputs is None else X_sust_inputs.shape,
      None if X_supply_inputs is None else X_supply_inputs.shape)


Feature shapes (fabric, sust, supply): (5000, 10) (5000, 9) (5000, 9)


#  Predict and assemble result

In [None]:
# Cell: prepare_inputs_and_predict.py
import os, joblib, pandas as pd, numpy as np

DATA_DIR = "datasets"
PKL_DIR  = "pkl_file"

# Load data
df_fabric = pd.read_csv(os.path.join(DATA_DIR, "fabric_full_smart_dataset_5000.csv"))
df_sust   = pd.read_csv(os.path.join(DATA_DIR, "fabric_full_sustainability_dataset_5000.csv"))
df_supply = pd.read_csv(os.path.join(DATA_DIR, "supply_chain_dataset_5000.csv"))

# Load models
fabric_model = joblib.load(os.path.join(PKL_DIR, "Frabic_model.pkl")) if os.path.exists(os.path.join(PKL_DIR, "Frabic_model.pkl")) else joblib.load(os.path.join(PKL_DIR, "fabric_model.pkl"))
sust_model   = joblib.load(os.path.join(PKL_DIR, "sustainability_model.pkl"))
supply_model = joblib.load(os.path.join(PKL_DIR, "supply_chain_model.pkl"))

# Load encoders we created earlier
fabric_le = joblib.load(os.path.join(PKL_DIR, "fabric_label_encoder.pkl"))
terrain_le = joblib.load(os.path.join(PKL_DIR, "terrain_label_encoder.pkl")) if os.path.exists(os.path.join(PKL_DIR, "terrain_label_encoder.pkl")) else None
region_le = joblib.load(os.path.join(PKL_DIR, "region_label_encoder.pkl")) if os.path.exists(os.path.join(PKL_DIR, "region_label_encoder.pkl")) else None

# Create encoded columns (safe: map unseen -> -1 then raise or map to mode)
def safe_transform(le, series, name):
    vals = series.astype(str).fillna("nan")
    known = set(le.classes_)
    # map unknowns to a new index appended
    mapped = []
    for v in vals:
        if v in known:
            mapped.append(int(le.transform([v])[0]))
        else:
            # fallback: add to encoder classes if you want persistent mapping (not ideal)
            # simpler: map unknown to -1 (models may fail if not expected)
            mapped.append(-1)
    return np.array(mapped)

df_fabric["Fabric_Encoded"] = safe_transform(fabric_le, df_fabric["Fabric_Type"], "Fabric_Type")
if "Terrain" in df_fabric.columns and terrain_le is not None:
    df_fabric["Terrain_Encoded"] = safe_transform(terrain_le, df_fabric["Terrain"], "Terrain")
else:
    # if no Terrain encoder, make encoded by factorize (only if models were trained this way)
    df_fabric["Terrain_Encoded"] = pd.factorize(df_fabric["Terrain"].fillna("General Use"))[0]

# For supply and sustainability datasets:
df_supply["Fabric_Encoded"] = safe_transform(fabric_le, df_supply["Fabric_Type"], "Fabric_Type")
if region_le is not None and "Region" in df_supply.columns:
    df_supply["Region_Encoded"] = safe_transform(region_le, df_supply["Region"], "Region")
else:
    if "Region" in df_supply.columns:
        df_supply["Region_Encoded"] = pd.factorize(df_supply["Region"].fillna("Unknown"))[0]

df_sust["Fabric_Encoded"] = safe_transform(fabric_le, df_sust["Fabric_Type"], "Fabric_Type")
if region_le is not None and "Region_Suitability" in df_sust.columns:
    df_sust["Region_Encoded"] = safe_transform(region_le, df_sust["Region_Suitability"], "Region_Suitability")

# -------------------
# Build feature arrays using model.feature_names_in_ if present
# -------------------
def build_X_for_model(model, df, fallback_cols):
    if hasattr(model, "feature_names_in_"):
        cols = list(model.feature_names_in_)
        missing = [c for c in cols if c not in df.columns]
        if missing:
            raise RuntimeError(f"Missing columns for model: {missing}")
        return df[cols]
    # fallback: try to use fallback_cols (list)
    missing = [c for c in fallback_cols if c not in df.columns]
    if missing:
        raise RuntimeError(f"Missing fallback columns for model: {missing}")
    return df[fallback_cols]

# Provide your expected fallback orders (must exactly match training order)
fabric_fallback = ['Weight_gsm','Breathability','Insulation','Tensile_Strength',
                   'Moisture_Absorption','Recyclability','Biodegradability',
                   'Cost_Rs_per_meter','Fabric_Encoded','Terrain_Encoded']

sust_fallback = ['Recyclability','Biodegradability','Water_Usage','Carbon_Emissions',
                 'Toxicity','Microplastic_Shedding','Durability']

supply_fallback = ['Fabric_Encoded','Region_Encoded','Cost_per_Unit(INR)','Transport_Time(days)',
                   'Distance_to_Base(km)','Demand_Units','Available_Stock','Supplier_Rating(1-5)','Risk_Factor(1-10)']

# Build Xs safely
X_perf = build_X_for_model(fabric_model, df_fabric, fabric_fallback)
X_sust = build_X_for_model(sust_model, df_sust if df_sust is not None else df_fabric, sust_fallback)
X_supply = build_X_for_model(supply_model, df_supply, supply_fallback)

# Run predictions
perf_preds = fabric_model.predict(X_perf)
sust_preds = sust_model.predict(X_sust)
supply_preds = supply_model.predict(X_supply)

# Attach to dataframes
df_fabric['Performance_Pred'] = perf_preds
if 'Sustainability_Pred' not in df_sust.columns:
    df_sust['Sustainability_Pred'] = sust_preds
df_supply['Supply_Priority_Pred'] = supply_preds

# print("Done predictions. Sample outputs:")
# display(df_fabric[['Fabric_Type','Performance_Pred']].head())
# display(df_sust[['Fabric_Type','Sustainability_Pred']].head())
display(df_supply[['Fabric_Type','Supply_Priority_Pred']].head(55)) # idar issue haiii sort tommorow




Unnamed: 0,Fabric_Type,Supply_Priority_Pred
0,Kevlar,MEDIUM
1,Nylon,MEDIUM
2,Bamboo Fibre,MEDIUM
3,Nylon,MEDIUM
4,Nylon,MEDIUM
5,Bamboo Fibre,MEDIUM
6,Cotton,MEDIUM
7,Polyester,MEDIUM
8,Wool,MEDIUM
9,Nylon,MEDIUM


In [15]:
# Cell: inspect_models.py
import joblib, os, pprint
MODEL_DIR = "pkl_file"

def load_if_exists(name):
    p = os.path.join(MODEL_DIR, name)
    if os.path.exists(p):
        m = joblib.load(p)
        return m
    return None

fabric_model = load_if_exists("Frabic_model.pkl") or load_if_exists("Fabric_model.pkl") or load_if_exists("fabric_model.pkl")
sust_model   = load_if_exists("sustainability_model.pkl")
supply_model = load_if_exists("supply_chain_model.pkl") or load_if_exists("supply_model.pkl")

for name,model in [("fabric_model",fabric_model),("sustainability_model",sust_model),("supply_model",supply_model)]:
    print("\n---",name,"---")
    if model is None:
        print("MISSING")
        continue
    # try to print sklearn-known attributes
    print("Class:", type(model).__name__)
    if hasattr(model, "feature_names_in_"):
        print("feature_names_in_:")
        pprint.pprint(list(model.feature_names_in_))
    # if pipeline:
    if hasattr(model, "named_steps"):
        print("Pipeline named_steps:", list(model.named_steps.keys()))
    # show simple repr
    print("Model repr (short):", str(model)[:400])



--- fabric_model ---
Class: RandomForestRegressor
feature_names_in_:
['Weight_gsm',
 'Breathability',
 'Insulation',
 'Tensile_Strength',
 'Moisture_Absorption',
 'Terrain_Encoded',
 'Fabric_Encoded']
Model repr (short): RandomForestRegressor(random_state=42)

--- sustainability_model ---
Class: RandomForestRegressor
feature_names_in_:
['Recyclability',
 'Biodegradability',
 'Water_Usage',
 'Carbon_Emissions',
 'Toxicity',
 'Microplastic_Shedding',
 'Durability']
Model repr (short): RandomForestRegressor(n_estimators=200)

--- supply_model ---
Class: RandomForestClassifier
Model repr (short): RandomForestClassifier(max_depth=18, n_estimators=350, random_state=42)
