# Scraping Sustainability Ratings of Fashion Brands

In [49]:
# imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import csv

## Eco-Stylist Org

IN-PROGRESS: scrape brands from website to create brands variable, compehensive brand name normalization

In [57]:
# create variables 
eco_stylist_brands = ['Patagonia']
url_eco_stylist = "https://www.eco-stylist.com/ethical-brand/"

eco_stylist_df = pd.DataFrame(columns=['Brand', 'Overall', 'Transparency', 'Fair Labor', 'Sustainably Made'])

In [58]:
# scrape for brand data
for brand in eco_stylist_brands:
    # normalize brand name
    norm_brand = brand.lower()

    # search for brand review
    review = requests.get(url_eco_stylist + norm_brand + "/")
    
    # check if brand review exists
    if review.status_code == 200:
        # content
        content = BeautifulSoup(review.text, 'html.parser')

        # overall rating
        overall = content.find(string=re.compile("Overall Rating:")).split(" ")[2]
        
        # transparency, fair labor, sustainably made
        ratings = content.find_all(string=re.compile("Rated:"))
        transparency = ratings[0].split(" ")[1]
        fair_labor = ratings[1].split(" ")[1]
        sustainably_made = ratings[2].split(" ")[1]

        # update dataframe
        eco_stylist_df.loc[len(eco_stylist_df.index)] = [brand, overall, transparency, fair_labor, sustainably_made]

    else:
        # update dataframe 
        eco_stylist_df.loc[len(eco_stylist_df.index)] = [brand, "N/A", "N/A", "N/A", "N/A"]

In [59]:
# check dataframe
eco_stylist_df.head(5)

Unnamed: 0,Brand,Overall,Transparency,Fair Labor,Sustainably Made
0,Patagonia,Gold,Excellent,Excellent,Excellent


In [60]:
# export csv file
eco_stylist_df.to_csv('../data/eco_stylist_ratings.csv')

## Sustainable Review Org

IN-PROGRESS: scrape brands from website to create brands variable

In [39]:
# create variables
sustainable_review_brands = ["Nike", "Adidas", "The North Face", "Victoria's Secret", "ZuZu & Co", "Becci Boo's Custom Shoes"]
url_sustainable_review = "https://sustainablereview.com/brand-ratings/"

sustainable_review_df = pd.DataFrame(columns=['Brand', 'Rating', 'Factors'])

In [40]:
# scrape for brand data
for brand in sustainable_review_brands:
    # normalize brand name
    norm_brand = brand.lower().replace(' & ', '-').replace('&', '').replace(' - ','-').replace(" '",'-').replace("'",'').replace(' / ','-').replace('/','-').replace('é', 'e').replace("ä", 'a').replace('â','a').replace('ü', 'u').replace('ñ','n').replace('ø','o').replace('ö','o').replace('ò','o').replace('ó','o').replace('ç', 'c').replace(' + ', '-').replace('(','').replace(')','').replace('[','').replace(']','').replace('...','-').replace('. ','-').replace('.','-').replace(' ', '-')

    # search for brand review
    review = requests.get(url_sustainable_review + norm_brand + "/")
    
    # check if brand review exists
    if review.status_code == 200:
        # content
        content = BeautifulSoup(review.text, 'html.parser')

        # rating
        information = content.find('div', class_='InfoBox')
        rating = information.find('p').get_text().split(" ")[3]
        
        # factors
        body = content.find('div', class_='col-md-12 col-lg-9')
        factors = str(body.find_all('h3')).split(", ")

        # clean list of factors
        cleaned_factors = []
        for factor in factors: 
            cleaned_factor = factor.replace("[","").replace("]","").replace('<h3>', '').replace('<strong>', '').replace('</h3>', '').replace('</strong>', '').replace("Similar brands:","")
           
            # drop ':' from factor
            if cleaned_factor.endswith(":"):
                cleaned_factor = "".join(cleaned_factor[:-1])

            # exclude headings with "Conclusion"
            if "Conclusion" in cleaned_factor:
                 cleaned_factor = ""

            # append cleaned factor to list if the item is not emtpy
            if cleaned_factor != "":
                cleaned_factors.append(cleaned_factor)
        
        # update dataframe
        sustainable_review_df.loc[len(sustainable_review_df.index)] = [brand, rating, ", ".join(cleaned_factors)]

    else:
        # update dataframe 
        sustainable_review_df.loc[len(sustainable_review_df.index)] = [brand, "N/A", "N/A"]

In [41]:
# check dataframe
sustainable_review_df.head(5)

Unnamed: 0,Brand,Rating,Factors
0,Nike,3,"Environmental Progress, Chemical Impact Mitiga..."
1,Adidas,3,Adidas’ Global Recognition for Sustainability ...
2,The North Face,4,Greenhouse Gas Emissions Reduction on the Hori...
3,Victoria's Secret,Not Rated by Sustainable Review,
4,ZuZu & Co,4,Using Eco-Friendly Materials and Reducing Wast...


In [42]:
# export csv file
sustainable_review_df.to_csv('../data/sustainable_review_ratings.csv')