In [2]:
# Imports
import sys
sys.executable
import numpy as np
import requests # for downloading webpages
from bs4 import BeautifulSoup  # for parsing HTML
import pandas as pd # for storing and handling datasets
import time # for adding delays between requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

SCORE SYSTEM:

In [None]:
# read dataset
df_merged = pd.read_csv("Merged_product_dataset.csv")


In [15]:
# function score system
def calculate_sustainability_score(df_merged):
    # Work on a copy
    df = df_merged.copy()

# WEIGHTED MATERIAL IMPACTS (row-level)
    material_cols = ["Material_CO2", "Material_Water", "Material_Energy", "Material_Chemical"]

    for col in material_cols:
        df[f"Weighted_{col}"] = df[col] * (df["Percentage_Material"] / 100)

    weighted_cols = [f"Weighted_{c}" for c in material_cols]

# AGGREGATE MATERIAL IMPACTS PER PRODUCT
    material_agg = df.groupby("Id")[weighted_cols].sum().reset_index()
 
 # BUILD PRODUCT-LEVEL TABLE (NO weighted cols yet)
    prod = df.groupby("Id", as_index=False).agg({
        "Brand": "first",
        "Product_Name": "first",
        "Price": "first",
        "Category": "first",
        "Subcategory": "first",
        "Care_CO2": "first",
        "Care_Water": "first",
        "Care_Energy": "first",
        "Origin_Grid": "first",
        "Origin_Transport": "first",
        "Origin_Manufacturing": "first",
        "Cert1_Bonus": "first",
        "Cert2_Bonus": "first",
    })


# MERGE MATERIAL AGGREGATES INTO PRODUCT TABLE
  
    prod = prod.merge(material_agg, on="Id", how="left")

 # NORMALIZATION (min–max)

    def minmax(series):
        if series.max() == series.min():
            return series * 0
        return (series - series.min()) / (series.max() - series.min())

# MATERIAL normalization (0 = best, 1 = worst)
    prod["Material_CO2_norm"] = minmax(prod["Weighted_Material_CO2"])
    prod["Material_Water_norm"] = minmax(prod["Weighted_Material_Water"])
    prod["Material_Energy_norm"] = minmax(prod["Weighted_Material_Energy"])
    prod["Material_Chemical_norm"] = minmax(prod["Weighted_Material_Chemical"])

# CARE normalization
    prod["Care_CO2_norm"] = minmax(prod["Care_CO2"])
    prod["Care_Water_norm"] = minmax(prod["Care_Water"])
    prod["Care_Energy_norm"] = minmax(prod["Care_Energy"])

# ORIGIN indices already 0–1 (impact indices)
    prod["Origin_Grid_norm"] = prod["Origin_Grid"]
    prod["Origin_Transport_norm"] = prod["Origin_Transport"]
    prod["Origin_Manufacturing_norm"] = prod["Origin_Manufacturing"]

# ENVIRONMENTAL BURDEN SCORE (0 = best, 1 = worst)
   
    env_cols = [
        "Material_CO2_norm", "Material_Water_norm",
        "Material_Energy_norm", "Material_Chemical_norm",
        "Care_CO2_norm", "Care_Water_norm", "Care_Energy_norm",
        "Origin_Grid_norm", "Origin_Transport_norm", "Origin_Manufacturing_norm"
    ]

    prod["Score_env_burden"] = prod[env_cols].mean(axis=1)


# POSITIVE SUSTAINABILITY SCORE (0–1)
 # Flip burden → sustainability
    prod["S_env"] = 1 - prod["Score_env_burden"]

# Certification bonus (positive)
    prod["Certification_Total"] = prod["Cert1_Bonus"].fillna(0) + prod["Cert2_Bonus"].fillna(0)

# Final sustainability score in 0–1
    prod["S_final"] = (prod["S_env"] + prod["Certification_Total"]).clip(0, 1)


# FINAL 0–100 SUSTAINABILITY SCORE
    prod["Score_100"] = prod["S_final"] * 100
    prod["Score_100"] = prod["Score_100"].round(0) # round final score


# RETURN CLEAN TABLE

    cols_out = [
        "Id", "Brand", "Product_Name", "Price",
        "Category", "Subcategory",
        "Score_100", "S_final", "S_env", "Score_env_burden",
        "Certification_Total"
    ] + env_cols

    return prod[cols_out]


In [9]:
df_score = calculate_sustainability_score(df_merged)

# See scores per product
df_score[["Id", "Product_Name", "Brand", "Price", "Score_100"]].head()


Unnamed: 0,Id,Product_Name,Brand,Price,Score_100
0,1,Jacquard-knit merino wool jumper,H&M,€ 79.99,76.0
1,2,Oversize Jumper,H&M,€ 24.99,44.0
2,3,Soft Knit Jumper,Zara,€ 25.95,71.0
3,4,Cashmere Oversize Sweater,Zara,€ 139.00,50.0
4,5,Women's Recycled Wool-Blend Crewneck Sweater,Patagonia,€ 160.00,100.0


In [10]:
#nan values
df_score["Score_100"].isna().sum()


np.int64(0)

In [11]:
# check columns names
print(list(df_score))

['Id', 'Brand', 'Product_Name', 'Price', 'Category', 'Subcategory', 'Score_100', 'S_final', 'S_env', 'Score_env_burden', 'Certification_Total', 'Material_CO2_norm', 'Material_Water_norm', 'Material_Energy_norm', 'Material_Chemical_norm', 'Care_CO2_norm', 'Care_Water_norm', 'Care_Energy_norm', 'Origin_Grid_norm', 'Origin_Transport_norm', 'Origin_Manufacturing_norm']


In [12]:
df_score

Unnamed: 0,Id,Brand,Product_Name,Price,Category,Subcategory,Score_100,S_final,S_env,Score_env_burden,...,Material_CO2_norm,Material_Water_norm,Material_Energy_norm,Material_Chemical_norm,Care_CO2_norm,Care_Water_norm,Care_Energy_norm,Origin_Grid_norm,Origin_Transport_norm,Origin_Manufacturing_norm
0,1,H&M,Jacquard-knit merino wool jumper,€ 79.99,Woman,Jumper,76.0,0.760372,0.610372,0.389628,...,0.442509,0.294938,0.779978,0.728850,0.0,0.0,0.0,0.65,0.40,0.60
1,2,H&M,Oversize Jumper,€ 24.99,Woman,Jumper,44.0,0.437647,0.437647,0.562353,...,0.181185,0.024195,0.377338,0.390817,1.0,1.0,1.0,0.65,0.40,0.60
2,3,Zara,Soft Knit Jumper,€ 25.95,Woman,Jumper,71.0,0.711008,0.461008,0.538992,...,0.133275,0.025011,0.264026,0.317607,1.0,1.0,1.0,0.65,0.40,0.60
3,4,Zara,Cashmere Oversize Sweater,€ 139.00,Woman,Jumper,50.0,0.499217,0.499217,0.500783,...,1.000000,0.257830,1.000000,1.000000,0.0,0.0,0.0,0.55,0.45,0.75
4,5,Patagonia,Women's Recycled Wool-Blend Crewneck Sweater,€ 160.00,Woman,Sweater,100.0,1.000000,0.865000,0.135000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.45,0.35,0.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,1002,Zara,Jacket Slim Fit,€ 83.95,Man,Jacket,46.0,0.459131,0.459131,0.540869,...,0.146341,0.005492,0.229923,0.276934,1.0,1.0,1.0,0.55,0.45,0.75
1002,1003,Patagonia,Jumper Textured,€ 113.09,Man,Jumper,100.0,1.000000,0.610372,0.389628,...,0.442509,0.294938,0.779978,0.728850,0.0,0.0,0.0,0.65,0.40,0.60
1003,1004,Patagonia,Jumper Asymmetric,€ 108.94,Man,Jumper,87.0,0.866471,0.466471,0.533529,...,0.163763,0.540270,0.342794,0.638467,1.0,1.0,1.0,0.25,0.10,0.30
1004,1005,Patagonia,Jumper Ribbed,€ 224.17,Woman,Jumper,97.0,0.969217,0.569217,0.430783,...,1.000000,0.257830,1.000000,1.000000,0.0,0.0,0.0,0.40,0.20,0.45


In [13]:
# save df score
df_score.to_csv("sustainability_scores.csv", index=False)


In [None]:
# save pickle
df_score.to_pickle("sustainability_scores.pkl")