In [1]:
# Imports
import sys
sys.executable
import numpy as np
import requests # for downloading webpages
from bs4 import BeautifulSoup  # for parsing HTML
import pandas as pd # for storing and handling datasets
import time # for adding delays between requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

SCORE SYSTEM:

In [None]:
# function score system
def calculate_sustainability_score(df_merged):
    # Work on a copy
    df = df_merged.copy()

# WEIGHTED MATERIAL IMPACTS (row-level)

    material_cols = ["Material_CO2", "Material_Water", "Material_Energy", "Material_Chemical"]

    for col in material_cols:
        df[f"Weighted_{col}"] = df[col] * (df["Percentage_Material"] / 100)

    weighted_cols = [f"Weighted_{c}" for c in material_cols]

# AGGREGATE MATERIAL IMPACTS PER PRODUCT

    material_agg = df.groupby("Id")[weighted_cols].sum().reset_index()
 
 # BUILD PRODUCT-LEVEL TABLE (NO weighted cols yet)

    prod = df.groupby("Id", as_index=False).agg({
        "Brand": "first",
        "Product_Name": "first",
        "Price": "first",
        "Category": "first",
        "Subcategory": "first",
        "Care_CO2": "first",
        "Care_Water": "first",
        "Care_Energy": "first",
        "Origin_Grid": "first",
        "Origin_Transport": "first",
        "Origin_Manufacturing": "first",
        "Cert1_Bonus": "first",
        "Cert2_Bonus": "first",
    })


# MERGE MATERIAL AGGREGATES INTO PRODUCT TABLE
  
    prod = prod.merge(material_agg, on="Id", how="left")

 # NORMALIZATION (min–max)

    def minmax(series):
        if series.max() == series.min():
            return series * 0
        return (series - series.min()) / (series.max() - series.min())

# MATERIAL normalization (0 = best, 1 = worst)
    prod["Material_CO2_norm"] = minmax(prod["Weighted_Material_CO2"])
    prod["Material_Water_norm"] = minmax(prod["Weighted_Material_Water"])
    prod["Material_Energy_norm"] = minmax(prod["Weighted_Material_Energy"])
    prod["Material_Chemical_norm"] = minmax(prod["Weighted_Material_Chemical"])

# CARE normalization
    prod["Care_CO2_norm"] = minmax(prod["Care_CO2"])
    prod["Care_Water_norm"] = minmax(prod["Care_Water"])
    prod["Care_Energy_norm"] = minmax(prod["Care_Energy"])

# ORIGIN indices already 0–1 (impact indices)
    prod["Origin_Grid_norm"] = prod["Origin_Grid"]
    prod["Origin_Transport_norm"] = prod["Origin_Transport"]
    prod["Origin_Manufacturing_norm"] = prod["Origin_Manufacturing"]

# ENVIRONMENTAL BURDEN SCORE (0 = best, 1 = worst)
   
    env_cols = [
        "Material_CO2_norm", "Material_Water_norm",
        "Material_Energy_norm", "Material_Chemical_norm",
        "Care_CO2_norm", "Care_Water_norm", "Care_Energy_norm",
        "Origin_Grid_norm", "Origin_Transport_norm", "Origin_Manufacturing_norm"
    ]

    prod["Score_env_burden"] = prod[env_cols].mean(axis=1)


# POSITIVE SUSTAINABILITY SCORE (0–1)
 # Flip burden → sustainability
    prod["S_env"] = 1 - prod["Score_env_burden"]

# Certification bonus (positive)
    prod["Certification_Total"] = prod["Cert1_Bonus"].fillna(0) + prod["Cert2_Bonus"].fillna(0)

# Final sustainability score in 0–1
    prod["S_final"] = (prod["S_env"] + prod["Certification_Total"]).clip(0, 1)


# FINAL 0–100 SUSTAINABILITY SCORE
    prod["Score_100"] = prod["S_final"] * 100
    prod["Score_100"] = prod["Score_100"].round(0) # round final score


# RETURN CLEAN TABLE

    cols_out = [
        "Id", "Brand", "Product_Name", "Price",
        "Category", "Subcategory",
        "Score_100", "S_final", "S_env", "Score_env_burden",
        "Certification_Total"
    ] + env_cols

    return prod[cols_out]
