In [1]:
import requests
from bs4 import BeautifulSoup
import unicodedata
import re
import time 
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Selected brands come from the Lyst Index, updated quarterly

def get_brand_name():

    url_brands = 'https://www.lyst.com/the-lyst-index/the-lyst-index-q2-25/'
    r = requests.get(url_brands)
    soup_lyst = BeautifulSoup(r.content)
    grid = soup_lyst.find('div', attrs = {'class':'chart-update'})
    brand_elements = grid.find_all('div', attrs = {'class':'mono-type padding-left'})

    brand_names = []
    
    for elements in brand_elements:
          clean_brand_name = elements.get_text().title()
          brand_names.append(clean_brand_name)

    return brand_names

In [3]:
# To use the brand names to build the URLs on Good On You, we need to clean them

brand_id = get_brand_name()

brand_id_cleaned = []

for brand in brand_id:
    remove_accents = ''.join((c for c in unicodedata.normalize('NFD', brand) if unicodedata.category(c) != 'Mn'))
    cleaned = re.sub(r"[^a-zA-Z0-9\s-]", "", remove_accents)
    cleaned = cleaned.lower().replace(" ", "-")
    brand_id_cleaned.append(cleaned)


In [4]:
# Function to get data from Good On You website

def get_data(brand_id_cleaned):
    
    brand_data = {}
    index = 0

    for brand in brand_id_cleaned:
        
        url = f"https://directory.goodonyou.eco/brands/{str(brand)}"
        response = requests.get(url)
        soup = BeautifulSoup(response.content)

        # brand overall rating
        general_rating = soup.find("p", attrs = {"id":"brand-rating"}).get_text()
    
        # scores by class
        driver = webdriver.Chrome()
        driver.get(url)

        time.sleep(2)  # wait for the page to load

        scores = driver.find_elements(By.CSS_SELECTOR, 'span.StyledText-sc-1sadyjn-0.LabelMeter__TextScore-sc-6zrovj-2')

        # extracting the three scores with error handling
        try: 
            planet = scores[0].text
        except:
            planet = "Not available"
        try: 
            people = scores[1].text
        except:
            people = "Not available"
        try: 
            animals = scores[2].text
        except:
            animals = "Not available"

        driver.quit()

        brand_data[index] = {
            "Brand": brand.title().replace("-", " "),
            "Overall sustainability rating": general_rating,
            "Planet rating": planet,
            "People rating": people,
            "Animals rating": animals
        }
        
        index += 1
        
    return brand_data    

brand_data = get_data(brand_id_cleaned)

In [5]:
# Transforming the data into a DataFrame

df_brands = pd.DataFrame.from_dict(brand_data, orient='index')
df_brands

df_brands.to_csv("ratings.csv", index=False)