## Sephora & Ulta Beauty Scrapper

For this project, I scrape store location information of Sephora and Ulta Beauty

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re 

### Sephora

In [4]:
def SephoraScrapper():
    #For Sephora, I will get the link of store first, then on the store page,
    #I will get the location info
    
    #get url
    url = 'https://www.sephora.com/happening/storelist'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    ##first, get store name and link data
    store_name = []
    store_link = []

    for store in soup.find_all(class_= 'css-j9u336'):
        #get store name
        store_name.append(store.text)
    
        #get link
        link = 'http://www.sephora.com' + store.get('href')
        store_link.append(link)
        
    #save it into df and select stores in the US
    sephora_df = pd.DataFrame(data = {'name':store_name, 'link': store_link})
    sephora_df = sephora_df.loc[0:1087,]
    
    
    
    ##next, click the clink and get location info
    
    store_loc = []
    store_zip = []
    
    for s in range(0, len(sephora_df)):
        s_link = sephora_df.loc[s, 'link']
        s_request = requests.get(s_link)
        s_soup = BeautifulSoup(s_request.content, 'html.parser')
        s_text = s_soup.find_all('p', class_ = 'css-a2osvj')[1]
        s_loc = s_text.text
        s_zipcode = re.findall(r"\D(\d{5})\D", s_text.text)
    
        store_loc.append(s_loc)
        store_zip.append(s_zipcode)
        
    
    sephora_df['location'] = store_loc
    sephora_df['zip'] = store_zip
    
    return sephora_df

In [5]:
sephora_df = SephoraScrapper()

### Ulta Beauty

For Ulta, I will use selenium to scrap because there are interactive bottons (dynamics website).

In [6]:
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains

def UltaScrapperI():
    
    #set up driver and open the ulta website
    driver = webdriver.Chrome('/Users/szuminyu/Desktop/chromedriver')
    driver.get('https://www.ulta.com/stores/directory')
        
    #click through all buttons and open locations
    
    button = driver.find_elements_by_class_name('sl-all-locations__state__locations')
    
    for i in range(0,len(button)):
        if button[i].is_displayed():
            ActionChains(driver).move_to_element(button[i]).perform()
            button[i].click() 
    
    return driver


def UltaScrapperII(driver):
    ## Click buttons on chrome manually ## 
    
    #Make sure the informatin I need is there, show print some info if succeed
    driver.find_element_by_class_name('sl-item__nap')
    
    #then feed it into bs
    soup = BeautifulSoup(driver.page_source, 'lxml')

    
    #get store data
    ulta_store = []
    ulta_loc = []
    ulta_zip = []

    for store in soup.find_all('span', {'class','sl-item__header__mall'}):
        store_name = store.text
        ulta_store.append(store_name)

    for s in soup.find_all('span', {'class', 'sl-item__nap'}): 
        store_loc = s.text
        store_zip = re.findall(r"[0-9]{5}$(?![0-9])", store_loc)
    
        ulta_loc.append(store_loc)
        ulta_zip.append(store_zip)
        
    #put them into dataframe
    ulta_df = pd.DataFrame({'name': ulta_store, 'location': ulta_loc, 'zip': ulta_zip})
    
    return ulta_df

In [7]:
mydriver = UltaScrapperI()

In [8]:
ulta_df = UltaScrapperII(mydriver)

### Put two dataset together

In [9]:
sephora_df = sephora_df.drop(['link'], axis = 1)
sephora_df['source']='Sephora'
ulta_df['source'] ='Ulta'
sephora_ulta = sephora_df.append(ulta_df)
sephora_ulta.head(10)

Unnamed: 0,name,location,zip,source
0,"Birmingham, Birmingham","205 Summit BlvdBirmingham, AL 35243US",[35243],Sephora
1,"Hoover, Riverchase Galleria","2000 Riverchase GalleriaHoover, AL 35244US",[35244],Sephora
2,"Huntsville, Huntsville","335 The Bridge StHuntsville, AL 35806US",[35806],Sephora
3,"Anchorage, 5TH AVE MALL","320 W. 5th AveAnchorage, AK 99501US",[99501],Sephora
4,"Chandler, Chandler Fashion Center","3111 W. Chandler BlvdChandler, AZ 85226US",[85226],Sephora
5,"Gilbert, San Tan Village","2206 E Williams Field RdGilbert, AZ 85295US",[85295],Sephora
6,"Glendale, Arrowhead Towne Center","7700 W Arrowhead Towne CenterGlendale, AZ 85308US",[85308],Sephora
7,"Mesa, Dana Park","1822 S Val Vista Dr\nSuite 106 \nMesa, AZ 85204US",[85204],Sephora
8,"Phoenix, Biltmore","2502 E. Camelback Rd. Phoenix, AZ 85016US",[85016],Sephora
9,"Scottsdale, Scottsdale Fashion Center","7014 East Camelback RdScottsdale, AZ 85251US",[85251],Sephora


In [10]:
sephora_ulta.to_csv('sephora_ulta.csv')