### Import libraries

In [1]:
import pandas as pd
import numpy as np

import time

import requests
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [2]:
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)

### Define functions

In [3]:
def scroll(driver):
    # scroll the whole page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    # wait for the new page to load
    WebDriverWait(driver, 10).until(
        lambda driver: driver.execute_script("return document.readyState") == "complete"
    )


In [4]:
def get_all_slogans(url):
    response = requests.get(url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    slogans = []
    div = soup.find('div', {'class': 'info_content'})
    texts = div.find_all('p')
    for text in texts:
        slogans.append(text.text)
    return slogans

In [5]:
def click_on_each_company(driver, soup):

    company_names = soup.find_all('div', {'class': 'd-flex w-100 justify-content-between font-weight-bolder'})
    df = pd.DataFrame(columns=[u'Company', u'Slogans'])

    for i in range(len(company_names)):
        # company_names = driver.find_elements(By.XPATH, "//div[@class='d-flex w-100 justify-content-between font-weight-bolder']")
        company_name = company_names[i].text
        company = driver.find_elements(By.XPATH, '//a[@class="list-group-item text-decoration-none text-reset"]')[i]
        driver.execute_script("arguments[0].click();", company)
        
        url = driver.current_url
        slogans = get_all_slogans(url)
    
        # append the dictionary to the dataframe
        for slogan in slogans:
            data = {
                "Company": company_name,
                "Slogans": slogan
            }
            df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)

        # go back to the previous page
        driver.back()

    return df

### Scrape Drinking Slogans from https://sloganlist.com/drinking-slogans/

In [28]:
url = 'https://sloganlist.com/drinking-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Drinking'
df.to_csv('drinking_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Drinking'

    # save the dataframe with the chunk name
    df.to_csv(f'drinking_slogan_{i+1}.csv', index=False)

# 17 minutes, 49 seconds

Page 2 of 29
Page 3 of 29
Page 4 of 29
Page 5 of 29
Page 6 of 29
Page 7 of 29
Page 8 of 29
Page 9 of 29
Page 10 of 29
Page 11 of 29
Page 12 of 29
Page 13 of 29
Page 14 of 29
Page 15 of 29
Page 16 of 29
Page 17 of 29
Page 18 of 29
Page 19 of 29
Page 20 of 29
Page 21 of 29
Page 22 of 29
Page 23 of 29
Page 24 of 29
Page 25 of 29
Page 26 of 29
Page 27 of 29
Page 28 of 29
Page 29 of 29


In [30]:
df1 = pd.read_csv('drinking_slogan_1.csv', encoding='utf-8')
df2 = pd.read_csv('drinking_slogan_2.csv', encoding='utf-8')
df3 = pd.read_csv('drinking_slogan_3.csv', encoding='utf-8')
df4 = pd.read_csv('drinking_slogan_4.csv', encoding='utf-8')
df5 = pd.read_csv('drinking_slogan_5.csv', encoding='utf-8')
df6 = pd.read_csv('drinking_slogan_6.csv', encoding='utf-8')
df7 = pd.read_csv('drinking_slogan_7.csv', encoding='utf-8')
df8 = pd.read_csv('drinking_slogan_8.csv', encoding='utf-8')
df9 = pd.read_csv('drinking_slogan_9.csv', encoding='utf-8')
df10 = pd.read_csv('drinking_slogan_10.csv', encoding='utf-8')
df11 = pd.read_csv('drinking_slogan_11.csv', encoding='utf-8')
df12 = pd.read_csv('drinking_slogan_12.csv', encoding='utf-8')
df13 = pd.read_csv('drinking_slogan_13.csv', encoding='utf-8')
df14 = pd.read_csv('drinking_slogan_14.csv', encoding='utf-8')
df15 = pd.read_csv('drinking_slogan_15.csv', encoding='utf-8')
df16 = pd.read_csv('drinking_slogan_16.csv', encoding='utf-8')
df17 = pd.read_csv('drinking_slogan_17.csv', encoding='utf-8')
df18 = pd.read_csv('drinking_slogan_18.csv', encoding='utf-8')
df19 = pd.read_csv('drinking_slogan_19.csv', encoding='utf-8')
df20 = pd.read_csv('drinking_slogan_20.csv', encoding='utf-8')
df21 = pd.read_csv('drinking_slogan_21.csv', encoding='utf-8')
df22 = pd.read_csv('drinking_slogan_22.csv', encoding='utf-8')
df23 = pd.read_csv('drinking_slogan_23.csv', encoding='utf-8')
df24 = pd.read_csv('drinking_slogan_24.csv', encoding='utf-8')
df25 = pd.read_csv('drinking_slogan_25.csv', encoding='utf-8')
df26 = pd.read_csv('drinking_slogan_26.csv', encoding='utf-8')
df27 = pd.read_csv('drinking_slogan_27.csv', encoding='utf-8')
df28 = pd.read_csv('drinking_slogan_28.csv', encoding='utf-8')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10,
                df11, df12, df13, df14, df15, df16, df17,
                df18, df19, df20, df21, df22, df23, df24, df25, df26, df27, df28
                ]).drop_duplicates().reset_index(drop=True)

df

df.to_csv('drinking_slogan.csv', index=False)

### Scrape Food slogans from https://sloganlist.com/food-slogans/

In [9]:
url = 'https://sloganlist.com/food-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'food'
df.to_csv('food_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Food'

    # save the dataframe with the chunk name
    df.to_csv(f'food_slogan_{i+1}.csv', index=False)

Page 2 of 17
Page 3 of 17
Page 4 of 17
Page 5 of 17
Page 6 of 17
Page 7 of 17
Page 8 of 17
Page 9 of 17
Page 10 of 17
Page 11 of 17
Page 12 of 17
Page 13 of 17
Page 14 of 17
Page 15 of 17
Page 16 of 17
Page 17 of 17


In [None]:
df1 = pd.read_csv('food_slogan_1.csv')
df2 = pd.read_csv('food_slogan_2.csv')
df3 = pd.read_csv('food_slogan_3.csv')
df4 = pd.read_csv('food_slogan_4.csv')
df5 = pd.read_csv('food_slogan_5.csv')
df6 = pd.read_csv('food_slogan_6.csv')
df7 = pd.read_csv('food_slogan_7.csv')
df8 = pd.read_csv('food_slogan_8.csv')
df9 = pd.read_csv('food_slogan_9.csv')
df10 = pd.read_csv('food_slogan_10.csv')
df11 = pd.read_csv('food_slogan_11.csv')
df12 = pd.read_csv('food_slogan_12.csv')
df13 = pd.read_csv('food_slogan_13.csv')
df14 = pd.read_csv('food_slogan_14.csv')
df15 = pd.read_csv('food_slogan_15.csv')
df16 = pd.read_csv('food_slogan_16.csv')
df17 = pd.read_csv('food_slogan_17.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10,
                df11, df12, df13, df14, df15, df16, df17]).drop_duplicates().reset_index(drop=True) # 34879

# df
df.to_csv('food_slogan.csv', index=False)

### Scrape Restaurant slogans from https://sloganlist.com/restaurant-slogans/

In [10]:
url = 'https://www.sloganlist.com/restaurant-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Restaurant'
df.to_csv('restaurant_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Restaurant'

    # save the dataframe with the chunk name
    df.to_csv(f'restaurant_slogan_{i+1}.csv', index=False)

Page 2 of 9
Page 3 of 9
Page 4 of 9
Page 5 of 9
Page 6 of 9
Page 7 of 9
Page 8 of 9
Page 9 of 9


In [11]:
df1 = pd.read_csv('restaurant_slogan_1.csv')
df2 = pd.read_csv('restaurant_slogan_2.csv')
df3 = pd.read_csv('restaurant_slogan_3.csv')
df4 = pd.read_csv('restaurant_slogan_4.csv')
df5 = pd.read_csv('restaurant_slogan_5.csv')
df6 = pd.read_csv('restaurant_slogan_6.csv')
df7 = pd.read_csv('restaurant_slogan_7.csv')
df8 = pd.read_csv('restaurant_slogan_8.csv')
df9 = pd.read_csv('restaurant_slogan_9.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9]).drop_duplicates().reset_index(drop=True)

# df
df.to_csv('restaurant_slogan.csv', index=False)

### Scrape Automobile slogans from https://sloganlist.com/car-slogans/

In [12]:
url = 'https://www.sloganlist.com/car-slogan/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Car'
df.to_csv('car_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Car'

    # save the dataframe with the chunk name
    df.to_csv(f'car_slogan_{i+1}.csv', index=False)

Page 2 of 8
Page 3 of 8
Page 4 of 8
Page 5 of 8
Page 6 of 8
Page 7 of 8
Page 8 of 8


In [13]:
df1 = pd.read_csv('car_slogan_1.csv')
df2 = pd.read_csv('car_slogan_2.csv')
df3 = pd.read_csv('car_slogan_3.csv')
df4 = pd.read_csv('car_slogan_4.csv')
df5 = pd.read_csv('car_slogan_5.csv')
df6 = pd.read_csv('car_slogan_6.csv')
df7 = pd.read_csv('car_slogan_7.csv')
df8 = pd.read_csv('car_slogan_8.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8]).drop_duplicates().reset_index(drop=True)

# df
df.to_csv('car_slogan.csv', index=False)

### Scrape Apparel slogans from https://sloganlist.com/apparel-slogans/

In [14]:
url = 'https://www.sloganlist.com/apparel-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Apparel'
df.to_csv('apparel_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Apparel'

    # save the dataframe with the chunk name
    df.to_csv(f'apparel_slogan_{i+1}.csv', index=False)

Page 2 of 8
Page 3 of 8
Page 4 of 8
Page 5 of 8
Page 6 of 8
Page 7 of 8
Page 8 of 8


In [15]:
df1 = pd.read_csv('apparel_slogan_1.csv')
df2 = pd.read_csv('apparel_slogan_2.csv')
df3 = pd.read_csv('apparel_slogan_3.csv')
df4 = pd.read_csv('apparel_slogan_4.csv')
df5 = pd.read_csv('apparel_slogan_5.csv')
df6 = pd.read_csv('apparel_slogan_6.csv')
df7 = pd.read_csv('apparel_slogan_7.csv')
df8 = pd.read_csv('apparel_slogan_8.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8]).drop_duplicates().reset_index(drop=True)
df.to_csv('apparel_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Puma,Live fast,Apparel
1,Puma,Powerhouse,Apparel
2,Puma,Forever faster,Apparel
3,Puma,Body train together,Apparel
4,Puma,Where the action is,Apparel
...,...,...,...
818,Nike,Nike ID. Your Colors. Your Style. Your Shoes.,Apparel
819,Lacoste,Lacoste essentials,Apparel
820,Lacoste,The crocodile is back,Apparel
821,Lacoste,Life is a beautiful sport,Apparel


### Scrape Techonology slogans from https://www.sloganlist.com/technology-slogans/

In [16]:
url = 'https://www.sloganlist.com/technology-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Technology'
df.to_csv('technology_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Technology'

    # save the dataframe with the chunk name
    df.to_csv(f'technology_slogan_{i+1}.csv', index=False)

Page 2 of 7
Page 3 of 7
Page 4 of 7
Page 5 of 7
Page 6 of 7
Page 7 of 7


In [17]:
df1 = pd.read_csv('technology_slogan_1.csv')
df2 = pd.read_csv('technology_slogan_2.csv')
df3 = pd.read_csv('technology_slogan_3.csv')
df4 = pd.read_csv('technology_slogan_4.csv')
df5 = pd.read_csv('technology_slogan_5.csv')
df6 = pd.read_csv('technology_slogan_6.csv')
df7 = pd.read_csv('technology_slogan_7.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7]).drop_duplicates().reset_index(drop=True)
df.to_csv('technology_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Orbotech,Improving yields through technology. (1999),Technology
1,Orbotech,With you all the way. (2007),Technology
2,Orbotech,Be sure. (2008),Technology
3,Orbotech,The language of electronics. (2015),Technology
4,Ferranti,Are big in automation now. (1965),Technology
...,...,...,...
709,Panasonic,"Panasonic, The One That I Want. ( 1996–2003 )",Technology
710,Panasonic,What's New Panasonic. ( 1996–2003 ),Technology
711,Panasonic,Ideas for Life. ( 2003–2013 ),Technology
712,Panasonic,"A Better Life, A Better World. ( 2013–present )",Technology


### Scrape Business slogans from https://www.sloganlist.com/business-slogans/

In [18]:
url = 'https://www.sloganlist.com/business-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Business'
df.to_csv('business_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Business'

    # save the dataframe with the chunk name
    df.to_csv(f'business_slogan_{i+1}.csv', index=False)

Page 2 of 13
Page 3 of 13
Page 4 of 13
Page 5 of 13
Page 6 of 13
Page 7 of 13
Page 8 of 13
Page 9 of 13
Page 10 of 13
Page 11 of 13
Page 12 of 13
Page 13 of 13


In [19]:
df1 = pd.read_csv('business_slogan_1.csv')
df2 = pd.read_csv('business_slogan_2.csv')
df3 = pd.read_csv('business_slogan_3.csv')
df4 = pd.read_csv('business_slogan_4.csv')
df5 = pd.read_csv('business_slogan_5.csv')
df6 = pd.read_csv('business_slogan_6.csv')
df7 = pd.read_csv('business_slogan_7.csv')
df8 = pd.read_csv('business_slogan_8.csv')
df9 = pd.read_csv('business_slogan_9.csv')
df10 = pd.read_csv('business_slogan_10.csv')
df11 = pd.read_csv('business_slogan_11.csv')
df12 = pd.read_csv('business_slogan_12.csv')
df13 = pd.read_csv('business_slogan_13.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13]).drop_duplicates().reset_index(drop=True)
df.to_csv('business_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Hot Wheels,Go With the Winner.,Business
1,Hot Wheels,"Hot Wheels, leading the way!",Business
2,Hot Wheels,Hottest metal cars in the world.,Business
3,Hot Wheels,It's not the same without the flame!,Business
4,7-Eleven,7-Eleven. Oh thank heaven.,Business
...,...,...,...
908,De Beers,right hand ring,Business
909,De Beers,A diamond is forever.,Business
910,De Beers,Diamonds are forever,Business
911,De Beers,orever – And now in London.,Business


### Scrape Company slogans from https://www.sloganlist.com/company-slogans/

In [20]:
url = 'https://www.sloganlist.com/company-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['Company', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Company'
df.to_csv('company_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Company'

    # save the dataframe with the chunk name
    df.to_csv(f'company_slogan_{i+1}.csv', index=False)

Page 2 of 33
Page 3 of 33
Page 4 of 33
Page 5 of 33
Page 6 of 33
Page 7 of 33
Page 8 of 33
Page 9 of 33
Page 10 of 33
Page 11 of 33
Page 12 of 33
Page 13 of 33
Page 14 of 33
Page 15 of 33
Page 16 of 33
Page 17 of 33
Page 18 of 33
Page 19 of 33
Page 20 of 33
Page 21 of 33
Page 22 of 33
Page 23 of 33
Page 24 of 33
Page 25 of 33
Page 26 of 33
Page 27 of 33
Page 28 of 33
Page 29 of 33
Page 30 of 33
Page 31 of 33
Page 32 of 33
Page 33 of 33


In [21]:
df1 = pd.read_csv('company_slogan_1.csv')
df2 = pd.read_csv('company_slogan_2.csv')
df3 = pd.read_csv('company_slogan_3.csv')
df4 = pd.read_csv('company_slogan_4.csv')
df5 = pd.read_csv('company_slogan_5.csv')
df6 = pd.read_csv('company_slogan_6.csv')
df7 = pd.read_csv('company_slogan_7.csv')
df8 = pd.read_csv('company_slogan_8.csv')
df9 = pd.read_csv('company_slogan_9.csv')
df10 = pd.read_csv('company_slogan_10.csv')
df11 = pd.read_csv('company_slogan_11.csv')
df12 = pd.read_csv('company_slogan_12.csv')
df13 = pd.read_csv('company_slogan_13.csv')
df14 = pd.read_csv('company_slogan_14.csv')
df15 = pd.read_csv('company_slogan_15.csv')
df16 = pd.read_csv('company_slogan_16.csv')
df17 = pd.read_csv('company_slogan_17.csv')
df18 = pd.read_csv('company_slogan_18.csv')
df19 = pd.read_csv('company_slogan_19.csv')
df20 = pd.read_csv('company_slogan_20.csv')
df21 = pd.read_csv('company_slogan_21.csv')
df22 = pd.read_csv('company_slogan_22.csv')
df23 = pd.read_csv('company_slogan_23.csv')
df24 = pd.read_csv('company_slogan_24.csv')
df25 = pd.read_csv('company_slogan_25.csv')
df26 = pd.read_csv('company_slogan_26.csv')
df27 = pd.read_csv('company_slogan_27.csv')
df28 = pd.read_csv('company_slogan_28.csv')
df29 = pd.read_csv('company_slogan_29.csv')
df30 = pd.read_csv('company_slogan_30.csv')
df31 = pd.read_csv('company_slogan_31.csv')
df32 = pd.read_csv('company_slogan_32.csv')
df33 = pd.read_csv('company_slogan_33.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, 
                df14, df15, df16, df17, df18, df19, df20, df21, df22, df23, df24,
                df25, df26, df27, df28, df29, df30, df31, df32, df33]).drop_duplicates().reset_index(drop=True)
df.to_csv('company_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,HBO,It's HBO.,Company
1,HBO,So original,Company
2,HBO,Just You Wait,Company
3,HBO,The Home Box,Company
4,HBO,Simply The Best,Company
...,...,...,...
2322,Energizer,Nothing outlasts the Energizer. It keeps going...,Company
2323,John Lewis,For all Life's Moments,Company
2324,John Lewis,Never Knowingly Undersold.,Company
2325,Tesco,Every Little Helps.,Company


### Scrape Beauty slogans from https://www.sloganlist.com/beauty-slogans/

In [29]:
url = 'https://www.sloganlist.com/cosmetics-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['cosmetics', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Cosmetics'
df.to_csv('cosmetics_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Cosmetics'

    # save the dataframe with the chunk name
    df.to_csv(f'cosmetics_slogan_{i+1}.csv', index=False)

Page 2 of 6
Page 3 of 6
Page 4 of 6
Page 5 of 6
Page 6 of 6


In [30]:
df1 = pd.read_csv('cosmetics_slogan_1.csv')
df2 = pd.read_csv('cosmetics_slogan_2.csv')
df3 = pd.read_csv('cosmetics_slogan_3.csv')
df4 = pd.read_csv('cosmetics_slogan_4.csv')
df5 = pd.read_csv('cosmetics_slogan_5.csv')
df6 = pd.read_csv('cosmetics_slogan_6.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6]).drop_duplicates().reset_index(drop=True)
df.to_csv('cosmetics_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Clarins,The problem solver in skin care. Premier in Fr...,Cosmetics
1,Clarins,No one understands your skin better. (1997),Cosmetics
2,Clarins,"It’s a fact. With Clarins, life’s more beautif...",Cosmetics
3,Clarins,The European leader in luxury skincare. (2009),Cosmetics
4,Clarins,The UK leader in luxury skin care. (2009),Cosmetics
...,...,...,...
532,Maybelline,Make it happen,Cosmetics
533,Maybelline,Beautiful discovery,Cosmetics
534,Maybelline,"Maybelline, Maybelline Ooh La La",Cosmetics
535,Maybelline,Beautiful eyes for you with Maybelline,Cosmetics


### Scrape Household slogans from https://www.sloganlist.com/household-slogans/

In [40]:
url = 'https://www.sloganlist.com/household-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['household', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[5]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Household'
df.to_csv('household_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Household'

    # save the dataframe with the chunk name
    df.to_csv(f'household_slogan_{i+1}.csv', index=False)

Page 2 of 4
Page 3 of 4
Page 4 of 4


In [41]:
df1 = pd.read_csv('household_slogan_1.csv')
df2 = pd.read_csv('household_slogan_2.csv')
df3 = pd.read_csv('household_slogan_3.csv')
df4 = pd.read_csv('household_slogan_4.csv')

df = pd.concat([df1, df2, df3, df4]).drop_duplicates().reset_index(drop=True)
df.to_csv('household_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,PlayStation,U R Not e.,Household
1,PlayStation,eNoS Lives.,Household
2,PlayStation,Do Not Underestimate The Power of PlayStation.,Household
3,PlayStation,"Wherever, Whenever, Forever.",Household
4,PlayStation,The Beginning.,Household
...,...,...,...
207,Glade,It's freshness to go!,Household
208,Glade,Create the mood with Glade.,Household
209,Glade,Breathing life into your home.,Household
210,Glade,Your car smells bad?,Household


### Scrape Financial slogans from https://www.sloganlist.com/financial-slogans/

In [42]:
url = 'https://www.sloganlist.com/financial-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['financial', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Financial'
df.to_csv('financial_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Financial'

    # save the dataframe with the chunk name
    df.to_csv(f'financial_slogan_{i+1}.csv', index=False)

Page 2 of 11
Page 3 of 11
Page 4 of 11
Page 5 of 11
Page 6 of 11
Page 7 of 11
Page 8 of 11
Page 9 of 11
Page 10 of 11
Page 11 of 11


In [43]:
df1 = pd.read_csv('financial_slogan_1.csv')
df2 = pd.read_csv('financial_slogan_2.csv')
df3 = pd.read_csv('financial_slogan_3.csv')
df4 = pd.read_csv('financial_slogan_4.csv')
df5 = pd.read_csv('financial_slogan_5.csv')
df6 = pd.read_csv('financial_slogan_6.csv')
df7 = pd.read_csv('financial_slogan_7.csv')
df8 = pd.read_csv('financial_slogan_8.csv')
df9 = pd.read_csv('financial_slogan_9.csv')
df10 = pd.read_csv('financial_slogan_10.csv')
df11 = pd.read_csv('financial_slogan_11.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11]).drop_duplicates().reset_index(drop=True)
df.to_csv('financial_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Nationwide Building Society,On your side.,Financial
1,Nationwide Building Society,Building a better society.,Financial
2,Nationwide Building Society,Nationwide is on your side.,Financial
3,Yorkshire Bank,Always thinking.,Financial
4,Yorkshire Bank,We Care About Here.,Financial
...,...,...,...
526,Mastercard,"For Everything Else, There’s MasterCard.",Financial
527,Mastercard,There are some things money can't buy. For eve...,Financial
528,Mastercard,More living. Less limits. ( World Mastercard ),Financial
529,Mastercard,Helping you find profits in new places. ( Mast...,Financial


### Scrape Tours & Travel slogans from https://www.sloganlist.com/tours-slogans/

In [44]:
url = 'https://www.sloganlist.com/tours-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['tours', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Tours'
df.to_csv('tours_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Tours'

    # save the dataframe with the chunk name
    df.to_csv(f'tours_slogan_{i+1}.csv', index=False)

Page 2 of 8
Page 3 of 8
Page 4 of 8
Page 5 of 8
Page 6 of 8
Page 7 of 8
Page 8 of 8


In [45]:
df1 = pd.read_csv('tours_slogan_1.csv')
df2 = pd.read_csv('tours_slogan_2.csv')
df3 = pd.read_csv('tours_slogan_3.csv')
df4 = pd.read_csv('tours_slogan_4.csv')
df5 = pd.read_csv('tours_slogan_5.csv')
df6 = pd.read_csv('tours_slogan_6.csv')
df7 = pd.read_csv('tours_slogan_7.csv')
df8 = pd.read_csv('tours_slogan_8.csv')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8]).drop_duplicates().reset_index(drop=True)
df.to_csv('tours_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Haven Holidays,Just what you’ve been looking for. (1981),Tours
1,Haven Holidays,There’s one that’s just right for you. (1984),Tours
2,Haven Holidays,Holidays are made in Haven. (1988),Tours
3,Haven Holidays,The family choice. (1990),Tours
4,Haven Holidays,Happy families. (1991),Tours
...,...,...,...
1175,Holiday Inn,Pleasing people the world over.,Tours
1176,Holiday Inn,The best surprise is no surprise.,Tours
1177,Holiday Inn,We put a smile back on your face.,Tours
1178,Holiday Inn,Stay smart. ( Holiday Inn Express ),Tours


### Scrape Airlines slogans from https://www.sloganlist.com/airlines-slogans/

In [26]:
# compute how long it takes to run the code
start_time = time.time()

url = 'https://www.sloganlist.com/airlines-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['airlines', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[7]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Airlines'
df.to_csv('airlines_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Airlines'

    # save the dataframe with the chunk name
    df.to_csv(f'airlines_slogan_{i+1}.csv', index=False, encoding='utf-8')

# print how long it takes to run the code
print("--- %s seconds ---" % (round(time.time() - start_time), 2))

Page 2 of 8
Page 3 of 8
Page 4 of 8
Page 5 of 8
Page 6 of 8
Page 7 of 8
Page 8 of 8
--- 193.62247824668884 seconds ---


In [27]:
df1 = pd.read_csv('airlines_slogan_1.csv', encoding='utf-8')
df2 = pd.read_csv('airlines_slogan_2.csv', encoding='utf-8')
df3 = pd.read_csv('airlines_slogan_3.csv', encoding='utf-8')
df4 = pd.read_csv('airlines_slogan_4.csv', encoding='utf-8')
df5 = pd.read_csv('airlines_slogan_5.csv', encoding='utf-8')
df6 = pd.read_csv('airlines_slogan_6.csv', encoding='utf-8')
df7 = pd.read_csv('airlines_slogan_7.csv', encoding='utf-8')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7]).drop_duplicates().reset_index(drop=True)

# # decode the text to utf-8
# df['Slogans'] = df['Slogans'].apply(lambda x: x.encode('iso-8859-1').decode('utf-8'))

df.to_csv('airlines_slogan.csv', index=False, encoding='utf-8')
df.head(20)

Unnamed: 0,Company,Slogans,Category
0,EgyptAir,Makes all the difference. (1978),Airlines
1,EgyptAir,The international airline of Egypt. (1983),Airlines
2,EgyptAir,The way I like to travel. (2016),Airlines
3,Canadian Air,We ship your world. (1988),Airlines
4,Canadian Air,The easy choice. (1989),Airlines
5,Canadian Air,Focused: On you. (1990),Airlines
6,Aurigny,The channel airline. (2002),Airlines
7,Aurigny,Rediscover what life’s about. (2004),Airlines
8,Aurigny,The Channel Islands airline. (2007),Airlines
9,Aurigny,The islands’ preferred airline. (2014),Airlines


### Scrape Television channels slogans from https://www.sloganlist.com/television-channels-slogan/

In [38]:
url = 'https://www.sloganlist.com/television-channels-slogan/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[4]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Television'
df.to_csv('television_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Television'

    # save the dataframe with the chunk name
    df.to_csv(f'television_slogan_{i+1}.csv', index=False)

Page 2 of 3
Page 3 of 3


In [39]:
df1 = pd.read_csv('television_slogan_1.csv')
df2 = pd.read_csv('television_slogan_2.csv')
df3 = pd.read_csv('television_slogan_3.csv')

df = pd.concat([df1, df2, df3]).drop_duplicates().reset_index(drop=True)
df.to_csv('television_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Hulu,Come TV with us.,Television
1,Hulu,Never miss a moment. Watch Live.,Television
2,d2h,Direct 2 Home.,Television
3,d2h,Demand 2 Have.,Television
4,d2h,Khushiyon Ki Chatri.,Television
...,...,...,...
232,Super Channel,See it first ( Super Channel Fuse ),Television
233,Super Channel,See it together ( Super Channel Heart & Home ),Television
234,Super Channel,See it again ( Super Channel Vault ),Television
235,Super Channel,See it playing ( Ginx eSports TV Canada ),Television


### Scrape Health & Medicine slogans from https://www.sloganlist.com/health-medicine-slogans/

In [37]:
url = 'https://www.sloganlist.com/health-medicine-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Health'
df = df.drop_duplicates().reset_index(drop=True)
df.to_csv('health_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,British Heart Foundation,Help us fight Britain’s biggest killer. (1983),Health
1,British Heart Foundation,There’s so much we need to research. (1983),Health
2,British Heart Foundation,The heart research charity. (1985),Health
3,British Heart Foundation,Help keep British hearts beating. (1990),Health
4,British Heart Foundation,Helps keep British hearts beating. (1990),Health
5,British Heart Foundation,It’s time to tackle heart failure. (2002),Health
6,British Heart Foundation,Seven out of ten people over 45 have high chol...,Health
7,British Heart Foundation,Their hearts are in your hands. (2003),Health
8,British Heart Foundation,Who is your heart telling you to run for? (2003),Health
9,British Heart Foundation,The big red fightback. (2004),Health


### Scrape Sports & Games slogans from https://www.sloganlist.com/sports-games-slogans/

In [31]:
url = 'https://www.sloganlist.com/sports-games-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# df = pd.DataFrame(columns=['sports', 'Slogans']) # create an empty dataframe

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[3]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Sports'
df.to_csv('sports_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Sports'

    # save the dataframe with the chunk name
    df.to_csv(f'sports_slogan_{i+1}.csv', index=False)

Page 2 of 2


In [32]:
df1 = pd.read_csv('sports_slogan_1.csv')
df2 = pd.read_csv('sports_slogan_2.csv')

df = pd.concat([df1, df2]).drop_duplicates().reset_index(drop=True)
df.to_csv('sports_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,Helly Hansen,Great clothes for active people. (1988),Sports
1,Helly Hansen,Technical clothing = performance = exhilaratio...,Sports
2,Helly Hansen,Between human will and nature’s forces. (2005),Sports
3,Helly Hansen,Survival – Work – Sport. (2008),Sports
4,Helly Hansen,Confident when it matters. (2012),Sports
...,...,...,...
154,International League T20,A League Apart.,Sports
155,International League T20,Har Ball Bawaal.,Sports
156,International League T20,Every Ball will be Epic.,Sports
157,FIFA,For the Good of the Game.,Sports


### Scrape Education slogans from https://www.sloganlist.com/education-slogans/

In [35]:
url = 'https://www.sloganlist.com/education-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Education'
df = df.drop_duplicates().reset_index(drop=True)
df.to_csv('education_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,InterExec,The one who stands out. (1985),Education
1,InterExec,Acting for senior executives since 1976. (2006),Education
2,InterExec,The direct line to the executive shortlist. (2...,Education
3,InterExec,Achieving career goals since 1976. (2007),Education
4,InterExec,Acting for top executives 1976. (2008),Education
...,...,...,...
61,Texas Lutheran University,Learn boldly. Live to inspire.,Education
62,Texas Lutheran University,"Get an education, not just a degree.",Education
63,University of Richmond,Word of life and the light of knowledge.,Education
64,Feather River College,Academic Adventures in a Mountain Environment,Education


### Scrape Campaign slogans from https://www.sloganlist.com/campaign-slogans/

In [34]:
url = 'https://www.sloganlist.com/campaign-slogans/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Campaign'
df = df.drop_duplicates().reset_index(drop=True)
df.to_csv('campaign_slogan.csv', index=False)
df

Unnamed: 0,Company,Slogans,Category
0,National Dairy Council,Drinka pinta - quencha thirst. (1967),Campaign
1,National Dairy Council,Keep up the pintas. (1969),Campaign
2,National Dairy Council,Pinta man is great. (1969),Campaign
3,National Dairy Council,Milk is cool. (1978),Campaign
4,National Dairy Council,Your daily pinta - three foods in one. (1978),Campaign
5,National Dairy Council,No drink can beat it. Milk is supreme. (1979),Campaign
6,National Dairy Council,Enjoy a natural pinta. (1980),Campaign
7,National Dairy Council,The lighter tasting milk. (1990),Campaign
8,UK Tea Council,Join the tea set.,Campaign
9,UK Tea Council,The tea set needs you.,Campaign


### Scrape Uncategorized slogans from https://www.sloganlist.com/uncategorized/

In [31]:
url = 'https://www.sloganlist.com/uncategorized/'

response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')

driver.get(url)

# accept cookies
try:
    consent = driver.find_element(By.XPATH, "//p[@class='fc-button-label']")
    driver.execute_script("arguments[0].click();", consent)
except:
    pass

scroll(driver) # load the whole page

# get the number of subpages
total_subpages = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[1]/div[2]/ul/li[6]/a')
total_subpages = int(total_subpages.get_attribute('text').split('...')[-1])

df = pd.DataFrame(click_on_each_company(driver, soup))

# merge the two dataframes
df['Category'] = 'Uncategorized'
df.to_csv('uncategorized_slogan_1.csv', index=False)

for i in range(1, total_subpages):

    print(f'Page {i+1} of {total_subpages}')

    next_page_url = f'index_{i+1}.html'
    response = requests.get(url+next_page_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    driver.get(url+next_page_url)

    scroll(driver)

    time.sleep(1)

    df = pd.DataFrame(click_on_each_company(driver, soup)).drop_duplicates()
    df['Category'] = 'Uncategorized'

    # save the dataframe with the chunk name
    df.to_csv(f'uncategorized_slogan_{i+1}.csv', index=False)

Page 2 of 5
Page 3 of 5
Page 4 of 5
Page 5 of 5


In [33]:
df1 = pd.read_csv('uncategorized_slogan_1.csv', encoding='utf-8')
df2 = pd.read_csv('uncategorized_slogan_2.csv', encoding='utf-8')
df3 = pd.read_csv('uncategorized_slogan_3.csv', encoding='utf-8')
df4 = pd.read_csv('uncategorized_slogan_4.csv', encoding='utf-8')
df5 = pd.read_csv('uncategorized_slogan_5.csv', encoding='utf-8')

df = pd.concat([df1, df2, df3, df4, df5]).drop_duplicates().reset_index(drop=True)

df.head(20)

df.to_csv('uncategorized_slogan.csv', index=False)


Unnamed: 0,Company,Slogans,Category
0,American Cancer Society,We want to wipe out cancer in your lifetime. G...,Uncategorized
1,American Cancer Society,Share the cost of living. (1981),Uncategorized
2,American Cancer Society,Leave your mark on life. (1987),Uncategorized
3,American Cancer Society,Help us keep winning. (1987),Uncategorized
4,American Cancer Society,Colon polyps. Stop them before they go bad. (2...,Uncategorized
5,American Cancer Society,Colon cancer. Get the test. Get the polyp. Get...,Uncategorized
6,American Cancer Society,Hope. Progress. Answers. (2005),Uncategorized
7,American Cancer Society,The official sponsor of birthdays. (2013),Uncategorized
8,Barnardo's,Britain’s largest child care charity. (1976),Uncategorized
9,Barnardo's,Together we can give young people a chance. (1...,Uncategorized
