In [71]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from xgboost import XGBRegressor


LOAD DATASET

In [72]:
DATA_FILE = "ecopack_dataset_api_80.csv"

df = pd.read_csv(DATA_FILE)

print("Dataset loaded:", df.shape)


Dataset loaded: (20, 8)


CLEANING

In [73]:
df.columns = df.columns.str.strip()
df["material_Name"] = df["material_Name"].str.lower().str.strip()

df['strength_score'] = df['strength_score'].map({
    'Low': 1,
    'Medium': 2,
    'High': 3
})
df.drop_duplicates(inplace=True)
df.fillna(df.median(numeric_only=True), inplace=True)


FEATURE SELECTION

In [74]:
FEATURES = [
    "strength_score",
    "weight_capacity_kg",
    "recyclability_percent",
    "biodegradability_score"
]

TARGET_COST = "cost_per_unit"
TARGET_CO2 = "co2_emission_score"

X = df[FEATURES]
y_cost = df[TARGET_COST]
y_co2 = df[TARGET_CO2]


COST MODEL (Random Forest)

In [75]:
y_cost_log = np.log1p(y_cost)

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
    X, y_cost_log, test_size=0.2, random_state=42
)

rf_cost = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

rf_cost.fit(X_train_c, y_train_c)

y_pred_cost_log = rf_cost.predict(X_test_c)
y_pred_cost = np.expm1(y_pred_cost_log)
y_true_cost = np.expm1(y_test_c)

mae_cost = mean_absolute_error(y_true_cost, y_pred_cost)
rmse_cost = np.sqrt(mean_squared_error(y_true_cost, y_pred_cost))
r2_cost = r2_score(y_true_cost, y_pred_cost)

print("\n COST MODEL (Random Forest) ")
print("MAE :", round(mae_cost, 4))
print("RMSE:", round(rmse_cost, 4))
print("R2  :", round(r2_cost, 4))



 COST MODEL (Random Forest) 
MAE : 0.185
RMSE: 0.2102
R2  : 0.6859


CO2 MODEL (XGBoost)

In [76]:
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(
    X, y_co2, test_size=0.2, random_state=42
)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_2)
X_test_scaled = scaler.transform(X_test_2)

xgb_co2 = XGBRegressor(
    n_estimators=100,
    random_state=42
)

xgb_co2.fit(X_train_scaled, y_train_2)

y_pred_co2 = xgb_co2.predict(X_test_scaled)

mae_co2 = mean_absolute_error(y_test_2, y_pred_co2)
rmse_co2 = np.sqrt(mean_squared_error(y_test_2, y_pred_co2))
r2_co2 = r2_score(y_test_2, y_pred_co2)

print("\n CO2 MODEL (XGBoost) ")
print("MAE :", round(mae_co2, 4))
print("RMSE:", round(rmse_co2, 4))
print("R2  :", round(r2_co2, 4))



 CO2 MODEL (XGBoost) 
MAE : 1.284
RMSE: 1.527
R2  : 0.3304


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


NORMALIZATION

In [None]:

df['cost_norm'] = 1 - safe_minmax(df['cost_per_unit'])
df['co2_norm'] = 1 - safe_minmax(df['co2_emission_score'])
df['strength_norm'] = safe_minmax(df['strength_score'])
df['recyclability_norm'] = df['recyclability_percent'] / 100
df['biodegradability_norm'] = df['biodegradability_score'] / 10

SUITABILITY_SCORE

In [80]:

df['suitability_score'] = (
    0.4 * df['strength_norm'] +
    0.3 * df['recyclability_norm'] +
    0.3 * df['biodegradability_norm']
)

FINAL SCORE

In [81]:
df['final_score'] = (
    0.4 * df['cost_norm'] +
    0.4 * df['co2_norm'] +
    0.2 * df['suitability_score']
)

ranked_materials = df.sort_values("final_score", ascending=False)

print("\nTop 5 Materials:")
print(ranked_materials[['material_id','material_Name','final_score']].head(10))



Top 5 Materials:
    material_id      material_Name  final_score
18           19        molded pulp     0.947074
14           15     recycled paper     0.919936
8             9        kraft paper     0.872439
16           17  sugarcane bagasse     0.871649
0             1              paper     0.869774
12           13          cardboard     0.868764
9            10            bagasse     0.856342
2             3        molded pulp     0.805845
7             8               jute     0.768284
10           11         hemp fiber     0.688118


In [78]:
joblib.dump(rf_cost, "cost_model.pkl")
joblib.dump(xgb_co2, "co2_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("\nModels saved successfully:")
print(" - cost_model.pkl")
print(" - co2_model.pkl")
print(" - scaler.pkl")



Models saved successfully:
 - cost_model.pkl
 - co2_model.pkl
 - scaler.pkl
