In [3]:
# Step 1: Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import r2_score, classification_report
import itertools


In [17]:
file_path = "LCA_Input_Dataset_5000.csv"
df = pd.read_csv(file_path)

np.random.seed(42)

df['co2'] = 0.05*df['electricity_kWh_per_FU'] + \
            0.02*df['fuel_MJ_per_FU'] + \
            0.01*df['transport_distance_km'] - \
            0.03*df['recycled_content_pct'] + \
            np.random.normal(0,1,len(df))

df['energy'] = df['electricity_kWh_per_FU']*3.6 + df['fuel_MJ_per_FU'] + \
               np.random.normal(0,5,len(df))

df['water'] = 5 + 0.02*df['transport_distance_km'] + \
              0.05*df['product_lifetime_years'] + \
              np.random.normal(0,1,len(df))

df['is_circular'] = ((df['end_of_life_recycle_rate_pct'] > 50) | 
                     (df['product_lifetime_years'] > 15)).astype(int)

print("Dataset shape:", df.shape)
df.head()


Dataset shape: (5000, 20)


Unnamed: 0,project_name,functional_unit,metal,production_route,recycled_content_pct,electricity_kWh_per_FU,electricity_grid_region,fuel_MJ_per_FU,fuel_type,transport_mode,transport_distance_km,process_yield_pct,end_of_life_recycle_rate_pct,product_lifetime_years,region,data_confidence,co2,energy,water,is_circular
0,Al-proj-0001,1 tonne,Aluminium,mix,30,2078,US-Grid,1400,coal,rail,780,96,23,43,China,predicted,139.296714,8878.681202,22.071505,1
1,Al-proj-0002,1 tonne,Aluminium,mix,25,674,IN-Grid,562,oil,ship,1245,97,93,35,India,user,56.501736,2986.132929,31.344501,1
2,Co-proj-0003,1 tonne,Copper,mix,25,2192,CN-Grid,1595,coal,rail,1881,99,82,22,China,user,160.207689,9477.221784,43.122619,1
3,Co-proj-0004,1 tonne,Copper,secondary,46,5751,IN-Grid,1994,biomass,truck,361,86,57,6,Europe,extracted,331.18303,22695.949549,12.630418,1
4,Co-proj-0005,1 tonne,Copper,secondary,45,6210,CN-Grid,1248,oil,ship,367,99,75,37,Europe,predicted,337.545847,23607.664145,15.387179,1


In [5]:
# def generate_dataset(n=1000, random_state=42):
#     np.random.seed(random_state)

#     process_type = np.random.choice(['primary','recycled'], size=n, p=[0.7,0.3])
#     energy_coal_pct = np.clip(np.random.normal(60, 20, size=n), 0, 100)
#     energy_hydro_pct = 100 - energy_coal_pct
#     transport_mode = np.random.choice(['truck','rail'], size=n, p=[0.7,0.3])
#     transport_distance = np.random.uniform(50, 500, size=n)
#     end_of_life = np.random.choice(['landfill','recycle'], size=n, p=[0.6,0.4])
#     recycled_content_pct = np.where(process_type=='recycled',
#                                     np.random.uniform(40, 90, size=n),
#                                     np.random.uniform(0, 20, size=n))

#     co2 = (
#         0.1 * energy_coal_pct +
#         np.where(process_type == 'primary', 15, 5) +
#         0.01 * transport_distance -
#         0.05 * recycled_content_pct +
#         np.where(end_of_life == 'landfill', 3, -1) +
#         np.random.normal(0, 1, n)
#     )

#     energy = (
#         np.where(process_type == 'primary', 6000, 2000) +
#         20 * energy_coal_pct -
#         15 * recycled_content_pct +
#         np.random.normal(0, 100, n)
#     )

#     water = (
#         np.where(process_type == 'primary', 25, 8) +
#         0.01 * transport_distance +
#         np.random.normal(0, 1, n)
#     )

#     circular = ((recycled_content_pct > 30) & (end_of_life == 'recycle')).astype(int)

#     df = pd.DataFrame({
#         'process_type': process_type,
#         'energy_coal_pct': energy_coal_pct,
#         'energy_hydro_pct': energy_hydro_pct,
#         'transport_mode': transport_mode,
#         'transport_distance': transport_distance,
#         'end_of_life': end_of_life,
#         'recycled_content_pct': recycled_content_pct,
#         'co2': co2,
#         'energy': energy,
#         'water': water,
#         'is_circular': circular
#     })
#     return df

# # Generate dataset
# df = generate_dataset()
# df.head()


In [None]:
# Step 3: Split into features and targets
target_cols = ['co2', 'energy', 'water', 'is_circular']
X = df.drop(columns=target_cols)
y_co2 = df['co2']
y_energy = df['energy']
y_water = df['water']
y_circ = df['is_circular']


numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = X.select_dtypes(exclude=[np.number]).columns.tolist()

print("Numeric features:", numeric_cols)
print("Categorical features:", categorical_cols)


Numeric features: ['recycled_content_pct', 'electricity_kWh_per_FU', 'fuel_MJ_per_FU', 'transport_distance_km', 'process_yield_pct', 'end_of_life_recycle_rate_pct', 'product_lifetime_years']
Categorical features: ['project_name', 'functional_unit', 'metal', 'production_route', 'electricity_grid_region', 'fuel_type', 'transport_mode', 'region', 'data_confidence']


In [None]:
# Step 4: Preprocessing pipeline
numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numeric_cols),
    ('cat', categorical_pipeline, categorical_cols)
])


In [None]:
# Step 5: Train/Test split
X_train, X_test, y_co2_train, y_co2_test, y_energy_train, y_energy_test, y_water_train, y_water_test, y_circ_train, y_circ_test = \
    train_test_split(X, y_co2, y_energy, y_water, y_circ, test_size=0.2, random_state=42)


X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc = preprocessor.transform(X_test)

reg_co2 = RandomForestRegressor(n_estimators=200, random_state=42).fit(X_train_proc, y_co2_train)
reg_energy = RandomForestRegressor(n_estimators=200, random_state=42).fit(X_train_proc, y_energy_train)
reg_water = RandomForestRegressor(n_estimators=200, random_state=42).fit(X_train_proc, y_water_train)
clf = RandomForestClassifier(n_estimators=200, random_state=42).fit(X_train_proc, y_circ_train)

print("âœ… Model Performance:")
print("CO2 RÂ²:", r2_score(y_co2_test, reg_co2.predict(X_test_proc)))
print("Energy RÂ²:", r2_score(y_energy_test, reg_energy.predict(X_test_proc)))
print("Water RÂ²:", r2_score(y_water_test, reg_water.predict(X_test_proc)))
print("\nCircularity classification report:\n", classification_report(y_circ_test, clf.predict(X_test_proc)))


âœ… Model Performance:
CO2 RÂ²: 0.9996051735257988
Energy RÂ²: 0.9998905834401801
Water RÂ²: 0.9908670775776819

Circularity classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        79
           1       1.00      1.00      1.00       921

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



In [None]:
# Step 6: Prediction for one process (first row in dataset)
sample = X.iloc[0:1]
sample_proc = preprocessor.transform(sample)

pred = {
    "CO2 (t/FU)": round(reg_co2.predict(sample_proc)[0], 2),
    "Energy (MJ/FU)": round(reg_energy.predict(sample_proc)[0], 2),
    "Water (mÂ³/FU)": round(reg_water.predict(sample_proc)[0], 2),
    "Circular?": "Yes" if clf.predict(sample_proc)[0] else "No"
}

print("Predictions for sample process:\n", pred)


ðŸ”® Predictions for sample process:
 {'CO2 (t/FU)': np.float64(138.66), 'Energy (MJ/FU)': np.float64(8873.4), 'Water (mÂ³/FU)': np.float64(22.42), 'Circular?': 'Yes'}


In [None]:

# Step 8: Circularity evaluation & recommendations (Standard)

def evaluate_circularity(row, pred):
    recs = []
    
    if row['end_of_life_recycle_rate_pct'] < 50:
        recs.append("Increase end-of-life recycling rate above 50% to improve circularity.")
    if row['product_lifetime_years'] < 15:
        recs.append("Extend product lifetime (durability, repairability).")
    if row['electricity_grid_region'].lower() in ["coal", "mixed"] or row['electricity_kWh_per_FU'] > 5000:
        recs.append("Shift energy mix towards renewable or low-carbon sources.")
    if row['transport_mode'] == "truck" and row['transport_distance_km'] > 500:
        recs.append("Switch to rail/ship for long-distance transport.")
    if row['production_route'] == "primary" and row['recycled_content_pct'] < 30:
        recs.append("Use more recycled content instead of primary raw material.")
    
    if not recs:
        recs.append("Already highly circular.")
    
    return recs

# Test on a few random processes
samples = X.sample(3, random_state=42)

for i, (_, row) in enumerate(samples.iterrows(), start=1):
    sample_proc = preprocessor.transform(pd.DataFrame([row]))
    pred = {
        "CO2 (t/FU)": round(reg_co2.predict(sample_proc)[0], 2),
        "Energy (MJ/FU)": round(reg_energy.predict(sample_proc)[0], 2),
        "Water (mÂ³/FU)": round(reg_water.predict(sample_proc)[0], 2),
        "Circular?": "Yes" if clf.predict(sample_proc)[0] else "No"
    }
    
    print(f"\nProcess {i}: {row['project_name']} ({row['metal']})")
    print("Functional unit:", row['functional_unit'])
    print("Predictions:", pred)
    print("Recommendations:")
    for rec in evaluate_circularity(row, pred):
        print(" -", rec)



Process 1: Co-proj-1502 (Copper)
Functional unit: 1 tonne
Predictions: {'CO2 (t/FU)': np.float64(144.63), 'Energy (MJ/FU)': np.float64(9419.58), 'Water (mÂ³/FU)': np.float64(22.48), 'Circular?': 'Yes'}
Recommendations:
 - Extend product lifetime (durability, repairability).
 - Switch to rail/ship for long-distance transport.

Process 2: Al-proj-2587 (Aluminium)
Functional unit: 1 tonne
Predictions: {'CO2 (t/FU)': np.float64(404.21), 'Energy (MJ/FU)': np.float64(28682.92), 'Water (mÂ³/FU)': np.float64(7.42), 'Circular?': 'Yes'}
Recommendations:
 - Shift energy mix towards renewable or low-carbon sources.

Process 3: Al-proj-2654 (Aluminium)
Functional unit: 1 tonne
Predictions: {'CO2 (t/FU)': np.float64(535.17), 'Energy (MJ/FU)': np.float64(37707.43), 'Water (mÂ³/FU)': np.float64(32.85), 'Circular?': 'Yes'}
Recommendations:
 - Shift energy mix towards renewable or low-carbon sources.
 - Switch to rail/ship for long-distance transport.


In [None]:
def recommend(current, preprocessor, reg_co2, reg_energy, reg_water):
    
    current = current.to_dict() if not isinstance(current, dict) else current


    coal = current.get("energy_coal_pct", 50)
    hydro = current.get("energy_hydro_pct", 100 - coal)
    rec = current.get("recycled_content_pct", 10)
    eol = current.get("end_of_life", "landfill")

    rec_options = [rec, min(100, rec+20)]
    coal_options = [coal, max(0, coal-20)]
    eol_options = ["landfill", "recycle"]

    scenarios = []
    for r, c, e in itertools.product(rec_options, coal_options, eol_options):
        scenario = current.copy()
        scenario["recycled_content_pct"] = r
        scenario["energy_coal_pct"] = c
        scenario["energy_hydro_pct"] = 100 - c
        scenario["end_of_life"] = e
        scenarios.append(scenario)

    df = pd.DataFrame(scenarios)
    X_proc = preprocessor.transform(df)
    df["pred_co2"] = reg_co2.predict(X_proc)
    df["pred_energy"] = reg_energy.predict(X_proc)
    df["pred_water"] = reg_water.predict(X_proc)

    return df.sort_values(by="pred_co2").reset_index(drop=True)


In [None]:
current = df.iloc[0]  

recommendations = recommend(current, preprocessor, reg_co2, reg_energy, reg_water)



pd.set_option("display.max_columns", None)   
pd.set_option("display.width", 1000)
print(recommendations)


   project_name functional_unit      metal production_route  recycled_content_pct  electricity_kWh_per_FU electricity_grid_region  fuel_MJ_per_FU fuel_type transport_mode  transport_distance_km  process_yield_pct  end_of_life_recycle_rate_pct  product_lifetime_years region data_confidence         co2       energy      water  is_circular  energy_coal_pct  energy_hydro_pct end_of_life    pred_co2  pred_energy  pred_water
0  Al-proj-0001         1 tonne  Aluminium              mix                    50                    2078                 US-Grid            1400      coal           rail                    780                 96                            23                      43  China       predicted  139.296714  8878.681202  22.071505            1               30                70     recycle  138.497203  8870.802178   22.487398
1  Al-proj-0001         1 tonne  Aluminium              mix                    50                    2078                 US-Grid            1400      coa