In [None]:
import psycopg2
import pandas as pd

conn = psycopg2.connect(
    host="localhost",
    database="Eco_Pack",
    user="postgres",
    password="password"
)

query = "SELECT * FROM materials;"
df = pd.read_sql(query, conn)

print(df.head())

print(df.shape)
print(df.columns)

   material_id                          material_name  strength_score  \
0            4                  Kraft Paper Variant 4               6   
1            5               Recycled Paper Variant 5               9   
2            6                Bagasse Fiber Variant 6               6   
3            7  Biodegradable Plastic (PLA) Variant 7               6   
4            8                Bagasse Fiber Variant 8              10   

   weight_capacity_kg  biodegradability_score  co2_emission_kg  \
0               10.02                       5             3.01   
1               19.63                       7             4.23   
2               46.70                       6             3.24   
3               41.30                       1             1.11   
4               13.16                       4             4.15   

   recyclability_percent  cost_per_unit_usd moisture_resistance  \
0                     62               2.01                High   
1                     73      

  df = pd.read_sql(query, conn)


In [3]:
X = df[
    [
        "strength_score",
        "weight_capacity_kg",
        "biodegradability_score",
        "recyclability_percent",
        "cost_per_unit_usd",
        "reuse_cycles",
        "eco_score",
        "co2_per_strength",
        "cost_efficiency",
        "reuse_efficiency"
    ]
]

y = df["co2_emission_kg"]

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [9]:
print(X.shape)
print(y.shape)

(500, 10)
(500,)


In [17]:
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
import numpy as np

y_pred = model.predict(X_test)

print("COST MODEL EVALUATION")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

COST MODEL EVALUATION
MAE: 0.14117099999999963
RMSE: 0.18707034639407671
R2 Score: 0.9879615572133281


In [19]:
X_cost_pred = df[
    [
        "strength_score",
        "weight_capacity_kg",
        "biodegradability_score",
        "recyclability_percent",
        "reuse_cycles",
        "eco_score",              
        "co2_emission_kg",        
        "reuse_efficiency"
    ]
]
y_cost_pred = df["cost_per_unit_usd"]

In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_cost_pred, y_cost_pred, test_size=0.2, random_state=42
)

In [23]:
from sklearn.ensemble import RandomForestRegressor

cost_model = RandomForestRegressor(random_state=42)
cost_model.fit(X_train, y_train)

In [25]:
print(X_cost_pred.shape)
print(y_cost_pred.shape)

(500, 8)
(500,)


In [27]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

cost_predictions = cost_model.predict(X_test)

print("COST MODEL EVALUATION")
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

COST MODEL EVALUATION
RMSE: 2.484798187881664
MAE: 2.0199510000000003
R2 Score: -15.143966882630007


In [29]:
import joblib
joblib.dump(model, 'cost_model.pkl')
joblib.dump(cost_model, 'co2_model.pkl')

['co2_model.pkl']

In [31]:
new_material_cost = pd.DataFrame([{
    "strength_score": 85,
    "weight_capacity_kg": 40,
    "biodegradability_score": 70,
    "recyclability_percent": 60,
    "reuse_cycles": 8,
    "eco_score": 65,
    "co2_emission_kg": 4.83,
    "reuse_efficiency": 0.8
}])

predicted_cost = cost_model.predict(new_material_cost)
print("Predicted cost per unit (USD):", predicted_cost[0])

Predicted cost per unit (USD): 2.3994999999999993


In [33]:
new_material = [[
    85,  
    40,   
    70,     
    60,     
    2.5,    
    8,     
    65,     
    0.5,    
    34,  
    0.8     
]]

predicted_co2 = model.predict(new_material)
print("Predicted CO2 emission:", predicted_co2)

Predicted CO2 emission: [4.8269]


