In [None]:
import pandas as pd

# Load featured dataset
df = pd.read_csv("data/processed/materials_featured.csv")

print(df.shape)
print(df.columns)

In [None]:
X = df[[
    "strength_encoded",          # encoded strength level
    "weight_capacity",           # numeric
    "biodegradability_score",    # binary / ordinal
    "recyclability_pct",         # numeric %
    "cost_efficiency_index"      # engineered score
]]

In [None]:
# Cost prediction target
y_cost = df["cost_inr_per_kg"]

# CO2 impact prediction target (engineered index as per mentor)
y_co2 = df["co2_impact_index"]

print(X.shape)
print(y_cost.shape)
print(y_co2.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_cost_train, y_cost_test = train_test_split(
    X,
    y_cost,
    test_size=0.2,
    random_state=42
)

print(X_train.shape)
print(X_test.shape)

In [None]:
from sklearn.ensemble import RandomForestRegressor

cost_model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

cost_model.fit(X_train, y_cost_train)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

cost_predictions = cost_model.predict(X_test)

print("COST MODEL EVALUATION")
print("MAE:", mean_absolute_error(y_cost_test, cost_predictions))
print("RMSE:", mean_squared_error(y_cost_test, cost_predictions, squared=False))
print("R2:", r2_score(y_cost_test, cost_predictions))

In [None]:
from xgboost import XGBRegressor

co2_model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    random_state=42
)

# Align CO2 target with cost train indices
co2_model.fit(X_train, y_co2.loc[y_cost_train.index])

In [None]:
co2_predictions = co2_model.predict(X_test)

print("CO2 MODEL EVALUATION")
print(
    "MAE:",
    mean_absolute_error(y_co2.loc[y_cost_test.index], co2_predictions)
)
print(
    "RMSE:",
    mean_squared_error(
        y_co2.loc[y_cost_test.index],
        co2_predictions,
        squared=False
    )
)
print(
    "R2:",
    r2_score(y_co2.loc[y_cost_test.index], co2_predictions)
)

In [None]:
df_test = df.loc[X_test.index].copy()

df_test["predicted_cost"] = cost_predictions
df_test["predicted_co2"] = co2_predictions

# Final combined ranking score
df_test["final_score"] = (
    0.5 * df_test["predicted_cost"].rank(ascending=True) +
    0.5 * df_test["predicted_co2"].rank(ascending=True)
)

# Lower score = better material
df_test_sorted = df_test.sort_values("final_score")

df_test_sorted.head()

In [None]:
import joblib

joblib.dump(cost_model, "models/cost_model.pkl")
joblib.dump(co2_model, "models/co2_model.pkl")

print("Models saved successfully.")