In [1]:
import pandas as pd
import numpy as np
import requests
from retrying import retry
import re
from bs4 import BeautifulSoup
import time
import math
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow,Flow
from google.auth.transport.requests import Request
import os
import pickle
import ulta_functions as ulta
import google_api_functions as gapi
import google_sheets_credentials as creds
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import copy
import concurrent.futures
import json

In [2]:
print("\nstarting...\n")

session = requests.Session()
all_url_info = {}
products = {}


starting...



In [3]:
f = open("data/all_url_info_dict.json","r")
all_url_info = json.loads(f.read())
f.close()
urls = list(all_url_info.keys())

In [4]:
print('scraping ulta...')
#I'm using threading to make the code run faster
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    futures = {executor.submit(ulta.scrape_url, url, session, products, all_url_info): url for url in urls}
    for future in concurrent.futures.as_completed(futures):
        url = futures[future]
        try:
            data = future.result()
        except Exception as exc:
            print(url, ':', exc)
        else:
            products = data

scraping ulta...
https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 186
https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 324
https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 186
https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 186
https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 324
https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 186
https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 324
https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 186
https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 : name 'exc' is not defined
https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 324
https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 324
https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 : name 'exc' is not defined


In [5]:
session.close()

In [245]:
ulta_df = pd.DataFrame.from_dict(products).transpose().reset_index().rename(columns={'index' : 'name'}).set_index('id')
#loading in yesterday's data to check for price changes

## loading in old data

In [246]:
old_ulta_df = pd.read_csv('data/ulta_df.csv').rename(columns={'price' : 'old_price', 'sale' : 'old_sale', 'secret_sale' : 'old_secret_sale', 'options' : 'old_options'}).set_index('id')
old_ulta_df = old_ulta_df[['old_price', 'old_sale', 'old_secret_sale', 'old_options']]

## checking for products whose price has changed since yesterday

In [247]:
changed_prices_df = pd.merge(ulta_df, old_ulta_df, on='id', how='inner').dropna(subset=['price', 'old_price']).query('price != old_price').query('sale == 0 & old_sale == 0')
changed_prices_df['old_options'] = changed_prices_df[['old_options']].fillna(value=str(0))
changed_prices_df['options'] = changed_prices_df[['options']].fillna(value=str(0))

In [248]:
df = copy.deepcopy(changed_prices_df)
for i in range(len(changed_prices_df)):
    #checking if an item that was on sale yesterday is still on sale and dropping those that aren't
    if '-' in changed_prices_df.iloc[i]['old_price'] and '-' not in changed_prices_df.iloc[i]['price'] and changed_prices_df.iloc[i]['old_price'].split(' - ')[1] <= changed_prices_df.iloc[i]['price']:
        df = df.drop([changed_prices_df.iloc[i].name])
    elif '-' in changed_prices_df.iloc[i]['old_price'] and '-' in changed_prices_df.iloc[i]['price'] and float(changed_prices_df.iloc[i]['price'].split(' - ')[1][1:]) >= float(changed_prices_df.iloc[i]['old_price'].split(' - ')[1][1:]):
        df = df.drop([changed_prices_df.iloc[i].name])
    elif '-' in changed_prices_df.iloc[i]['old_price'] and '-' in changed_prices_df.iloc[i]['price'] and float(changed_prices_df.iloc[i]['price'].split(' - ')[0][1:]) >= float(changed_prices_df.iloc[i]['old_price'].split(' - ')[0][1:]):
        df = df.drop([changed_prices_df.iloc[i].name])
    #if the new price is a hyphenated price and the old price is not, make sure that the second price in the hyphenated one is actually less than the unhyphenated price to make sure it's actually a sale
    elif '-' in changed_prices_df.iloc[i]['price'] and '-' not in changed_prices_df.iloc[i]['old_price'] and float(changed_prices_df.iloc[i]['price'].split(' - ')[0][1:]) > float(changed_prices_df.iloc[i]['old_price'][1:]):
        print(i)
        df = df.drop([changed_prices_df.iloc[i].name])
    #if neither of the prices are hyphenated, make sure the current price is lower than the old price
    elif '-' not in changed_prices_df.iloc[i]['price'] and '-' not in changed_prices_df.iloc[i]['old_price'] and float(changed_prices_df.iloc[i]['price'][1:]) >= float(changed_prices_df.iloc[i]['old_price'][1:]):
        df = df.drop([changed_prices_df.iloc[i].name])
    elif 'Sizes' in changed_prices_df.iloc[i]['options'] and 'Sizes' in changed_prices_df.iloc[i]['old_options'] and changed_prices_df.iloc[i]['options'] != changed_prices_df.iloc[i]['old_options']:
        df = df.drop([changed_prices_df.iloc[i].name])
    elif changed_prices_df.iloc[i]['old_options'] == str(0) and 'Sizes' in changed_prices_df.iloc[i]['options']:
        df = df.drop([changed_prices_df.iloc[i].name])

changed_prices_df = copy.deepcopy(df).drop(columns={'old_price', 'old_sale', 'old_secret_sale', 'old_options'})

In [249]:
changed_prices_df

Unnamed: 0_level_0,name,url,brand,desc,rating,number_of_reviews,sale,price,secret_sale,offers,options,main_category,sub_category,sub_sub_category,sale_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


## checking prices of some items that I suspect might be a secret sale

In [250]:
ulta_df_t = ulta_df.dropna(subset=['price', 'options'])
check_prices_df = ulta_df_t[ulta_df_t['options'].str.contains("Colors") & ulta_df_t['price'].str.contains("-")].query('sale == 0 & secret_sale == 0')

df = copy.deepcopy(check_prices_df)
for i in range(len(check_prices_df)):
    if float(check_prices_df.iloc[i]['price'].split(' - ')[0][1:])/float(check_prices_df.iloc[i]['price'].split(' - ')[1][1:]) > .95:
        df = df.drop([check_prices_df.iloc[i].name])
    elif check_prices_df.iloc[i].name in changed_prices_df.index.tolist():
        df = df.drop([check_prices_df.iloc[i].name])
check_prices_df = copy.deepcopy(df)

In [251]:
check_prices_df

Unnamed: 0_level_0,name,url,brand,desc,rating,number_of_reviews,sale,price,secret_sale,offers,options,main_category,sub_category,sub_sub_category,sale_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
xlsImpprod12251079,It Cosmetics CC+ Cream Illumination SPF 50+,https://www.ulta.com/cc-cream-illumination-spf...,It Cosmetics,CC+ Cream Illumination SPF 50+,4.30,3987,0,$19.00 - $39.50,0,Free Gift with Purchase!,13 Colors,makeup,face,,
pimprod2005671,PÜR 4-In-1 Love Your Selfie Longwear Foundatio...,https://www.ulta.com/4-in-1-love-your-selfie-l...,PÜR,4-In-1 Love Your Selfie Longwear Foundation & ...,4.00,384,0,$25.20 - $36.00,0,,40 Colors,makeup,face,,
pimprod2006201,UOMA Beauty Stay Woke Luminous Brightening Con...,https://www.ulta.com/stay-woke-luminous-bright...,UOMA Beauty,Stay Woke Luminous Brightening Concealer,4.20,36,0,$12.00 - $25.00,0,,20 Colors,makeup,face,,
xlsImpprod16411307,It Cosmetics Perfect Lighting Radiant Touch Ma...,https://www.ulta.com/perfect-lighting-radiant-...,It Cosmetics,Perfect Lighting Radiant Touch Magic Wand,4.30,35,0,$20.30 - $29.00,0,Free Gift with Purchase!,2 Colors,makeup,face,,
pimprod2000278,Revlon PhotoReady Candid Antioxidant Concealer,https://www.ulta.com/photoready-candid-antioxi...,Revlon,PhotoReady Candid Antioxidant Concealer,4.40,337,0,$4.99 - $9.99,0,"Buy 1, get 1 at 50% off!",12 Colors,makeup,face,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xlsImpprod17761957,Loving Tan Deluxe Bronzing Mousse,https://www.ulta.com/deluxe-bronzing-mousse?pr...,Loving Tan,Deluxe Bronzing Mousse,4.00,407,0,$34.95 - $39.95,0,,3 Colors,skin care,suncare,,
pimprod2014445,Australian Glow One Hour Express Self Tan Mousse,https://www.ulta.com/one-hour-express-self-tan...,Australian Glow,One Hour Express Self Tan Mousse,4.70,124,0,$24.99 - $29.99,0,Free Gift with Purchase!,2 Colors,bath & body,suncare,,
pimprod2014454,Australian Glow One Hour Express Self Tan Mous...,https://www.ulta.com/one-hour-express-self-tan...,Australian Glow,One Hour Express Self Tan Mousse Refill,4.90,18,0,$22.99 - $26.99,0,Free Gift with Purchase!,2 Colors,bath & body,suncare,,
xlsImpprod14631251,NuMe Classic Curling Wand 1 1/4'',https://www.ulta.com/classic-curling-wand-1-14...,NuMe,Classic Curling Wand 1 1/4'',4.50,422,0,$69.00 - $89.00,0,,2 Colors,tools & brushes,hair styling tools,,


## getting products with .97 in their price

In [252]:
price_97_df = copy.deepcopy(ulta_df.query('secret_sale == 1 & sale == 0')).reset_index().rename(columns={'index' : 'name'}).set_index('id')

In [253]:
price_97_df

Unnamed: 0_level_0,name,url,brand,desc,rating,number_of_reviews,sale,price,secret_sale,offers,options,main_category,sub_category,sub_sub_category,sale_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
pimprod2006566,Derma E Tinted Moisturizing BB Cream with SPF 30+,https://www.ulta.com/tinted-moisturizing-bb-cr...,Derma E,Tinted Moisturizing BB Cream with SPF 30+,2.9,57.0,0,$9.97,1,,2 Colors,makeup,face,,
xlsImpprod14541123,e.l.f. Cosmetics Flawless Finish Foundation,https://www.ulta.com/flawless-finish-foundatio...,e.l.f. Cosmetics,Flawless Finish Foundation,3.7,394.0,0,$2.97 - $6.00,1,Free Gift with Purchase!,26 Colors,makeup,face,,
xlsImpprod4550001,Urban Decay Cosmetics Naked Skin Weightless Ul...,https://www.ulta.com/naked-skin-weightless-ult...,Urban Decay Cosmetics,Naked Skin Weightless Ultra Definition Liquid ...,4.5,7437.0,0,$20.00 - $27.97,1,Free Gift with Purchase!,8 Colors,makeup,face,,
xlsImpprod15921108,MAC Cleanse Off Oil,https://www.ulta.com/cleanse-off-oil?productId...,MAC,Cleanse Off Oil,4.5,95.0,0,$23.97,1,,,skin care,cleansers,,
pimprod2000588,It Cosmetics IT Girl Vol. 2 Your Life-Changing...,https://www.ulta.com/it-girl-vol-2-your-life-c...,It Cosmetics,IT Girl Vol. 2 Your Life-Changing Eye & Cheek ...,4.2,50.0,0,$18.97,1,Free Gift with Purchase!,,makeup,gifts & value sets,,
xlsImpprod16801836,Lancôme Ombre Hypnôse Stylo Shadow Stick Matte...,https://www.ulta.com/ombre-hypnose-stylo-shado...,Lancôme,Ombre Hypnôse Stylo Shadow Stick Matte Metallics,4.4,27.0,0,$17.97 - $25.00,1,Free Gift with Purchase!,11 Colors,makeup,eyes,eyeshadow,
xlsImpprod18481005,Lottie London Lip Foil Iridescent Duo Chrome L...,https://www.ulta.com/lip-foil-iridescent-duo-c...,Lottie London,Lip Foil Iridescent Duo Chrome Lip Topper,4.2,6.0,0,$3.97 - $6.49,1,,2 Colors,makeup,lips,,
xlsImpprod16971043,Lottie London #HOLO Duo Chrome Lip Gloss,https://www.ulta.com/holo-duo-chrome-lip-gloss...,Lottie London,#HOLO Duo Chrome Lip Gloss,4.6,33.0,0,$3.97,1,,,makeup,lips,,
xlsImpprod14851155,Rodial Snake Moisturiser O2 SPF 15,https://www.ulta.com/snake-moisturiser-o2-spf-...,Rodial,Snake Moisturiser O2 SPF 15,0.0,,0,$59.97,1,,,skin care,moisturizers,,
xlsImpprod17991027,Wish Formula I'm Pro Ampoule Pad - C,https://www.ulta.com/im-pro-ampoule-pad-c?prod...,Wish Formula,I'm Pro Ampoule Pad - C,0.0,,0,$15.97,1,,,skin care,treatment & serums,face serums,


## putting them all together removing duplicates

In [298]:
secret_sales_df = pd.DataFrame.drop_duplicates(pd.concat([changed_prices_df, check_prices_df, price_97_df]))

## making sure I'm not excluding any products in the google sheet

In [299]:
query = "id not in {}".format(secret_sales_df.index.tolist())
old_secret_sales_in_stock = pd.read_csv('data/secret_sales_in_stock.csv').set_index('id')
not_in_secret_sales = old_secret_sales_in_stock.query(query).index.tolist()

ulta_df_t = ulta_df.reset_index().rename(columns={'index' : 'id'})
not_in_secret_sales_df = ulta_df_t[ulta_df_t['id'].isin(not_in_secret_sales)].set_index('id')

secret_sales_df = pd.DataFrame.drop_duplicates(pd.concat([secret_sales_df, not_in_secret_sales_df]))
secret_sales_df = secret_sales_df.query('sale == 0')
secret_sales = pd.DataFrame.to_dict(secret_sales_df.reset_index().rename(columns={'index' : 'id'}).set_index('name').transpose())

In [300]:
secret_sales_df_t = secret_sales_df.reset_index().rename(columns={'index' : 'id'})
secret_sales_t = pd.DataFrame.to_dict(secret_sales_df_t[secret_sales_df_t['id'].isin(ids)].set_index('name').transpose())

## finding out which options are in stock

In [301]:
def get_products_in_stock(secret_sales, driver):
    products_in_stock = {}
    for product in secret_sales:
        variants_in_stock = {}
        temp = {}
        #opening product url in the driver/browser
        driver.get(secret_sales[product]['url'])
        #if the product doesn't exist anymore ulta wil take you to this site
        if driver.current_url == 'https://www.ulta.com/404.jsp':
            next
        #making sure that the url is correct! it wasn't for a couple of the products for some reason idk why. but I'm 
        #fixing it in this step.
        elif driver.current_url.split('productId=')[1] != secret_sales[product]['id']:
            driver.find_element_by_xpath("//*[@id='navigation__wrapper--sticky']/div/div[1]/div[2]/div/a").click()
            driver.find_element_by_xpath("//*[@id='searchInput']").send_keys(secret_sales[product]['id'])
            driver.find_element_by_xpath("//*[@id='js-mobileHeader']/div/div/div/div[1]/div/div[1]/form/button").click()
            if driver.current_url == 'https://www.ulta.com/404.jsp':
                next
            elif driver.current_url.split('productId=')[1] == secret_sales[product]['id']:
                secret_sales[product]['url'] = driver.current_url
        #if I don't add this sleep, the page doesn't finish loading. tried to use implicit waits but this just worked better.
        
        time.sleep(1)
        #getting all the product variants from the page
        product_variants = driver.find_elements_by_class_name('ProductSwatchImage__variantHolder')
        if len(product_variants) == 0:
            #products that only have one color or one size or whatever have their product variant information in a different lcoation
            product_variants = driver.find_elements_by_class_name('ProductDetail__productSwatches')
        for product_variant in product_variants:
            try:
                #clicking on each variant at a time to get their price and availability
                product_variant.click()
            except:
                #if I can't click on it I want to go to the next variant
                next
            else:
                #if I don't add this sleep, the page doesn't finish loading. tried to use implicit waits but this just worked better.
                time.sleep(1)
                keep = False
                #creating a BeautifulSoup object to extract data
                soup = BeautifulSoup(driver.page_source, features="lxml")
                #there are products that only a couple of shades are labeled as sale so I'm removing those to make sure no sale items slip through
                if soup.find('img', {'src' : 'https://images.ulta.com/is/image/Ulta/badge-sale?fmt=png-alpha'}) is not None:
                    next
                #getting price
                price = soup.find('meta', {'property' : 'product:price:amount'}).get('content')
                #attempting to catch other secret sale items that don't end with .97
                if '.97' in price:
                    keep = True
                elif '-' not in secret_sales[product]['price']:
                    if float(price) <= float(secret_sales[product]['price'][1:]):
                        keep = True
                elif '-' in secret_sales[product]['price']:
                    if price == secret_sales[product]['price'].split(' - ')[1][1:] and '0' != price[-1]:
                        keep = True
                    elif price == secret_sales[product]['price'].split(' - ')[0][1:]:
                        keep = True
                    elif price != secret_sales[product]['price'].split(' - ')[0][1:] and float(price) < float(secret_sales[product]['price'].split(' - ')[1][1:]) and '0' != price[-1]:
                        keep = True
                if keep == True:
                    option = soup.find('meta', {'property' : 'product:color'}).get('content')
                    #checking other possible locations of option
                    if option == '' and soup.find('div', {'class' : 'ProductDetail__colorPanel'}) is not None:
                        option_tag = soup.find('div', {'class' : 'ProductDetail__colorPanel'}).find_all('span')[1]
                        if option_tag is not None:
                            option = option_tag.text
                    if option == '' and soup.find('span', {'class' : 'ProductVariantSelector__description'}) is not None:
                        option = soup.find('span', {'class' : 'ProductVariantSelector__description'}).text
                    #putting the option as 'NA' if I can't find its label
                    if option == '':
                        option = 'NA'
                    #only adding the product variant if it's available
                    if soup.find('div', {'class' : 'ProductDetail__availabilitySection ProductDetail__availabilitySection--error'}) is None:
                        temp[option] = price
        #checking if the temp dictionary is empty to make sure if there are indeed product variants in stock
        if bool(temp):
            #rearranging the dictionary to group variants with the same size together and putting the different options in a single string
            #so that, in the end, for each product, there is a dictionary including the different price options and, for each price option, 
            #a string containing the options (colors, sizes) available for that price point. 
            for key, value in temp.items():
                variants_in_stock.setdefault(value, set()).add(key)
            for key, value in variants_in_stock.items():
                new_value = ", ".join(value)
                variants_in_stock[key] = new_value
            products_in_stock[secret_sales[product]['id']] = variants_in_stock
        else:
            #if there aren't any product variants in stock, I don't want them in the document
            next
    return(products_in_stock, secret_sales)

In [302]:
driver = webdriver.Chrome(r'C:\Users\elerm\Downloads\chromedriver_win32\chromedriver.exe')
products_in_stock_t, secret_sales_t = ulta.get_products_in_stock(secret_sales_t, driver)
driver.close()
driver.quit()

In [303]:
products_in_stock_df_t = pd.DataFrame.from_dict(products_in_stock_t).transpose().reset_index().rename(columns={'index' : 'id'})
products_in_stock_df_t = pd.melt(products_in_stock_df_t, id_vars=['id'], var_name='price2', value_name='options2').dropna().set_index('id')

In [304]:
products_in_stock_df_t

Unnamed: 0_level_0,price2,options2
id,Unnamed: 1_level_1,Unnamed: 2_level_1
pimprod2015149,13.5,"Cosmic Violet, 24K Gold"
xlsImpprod17631337,2.62,Soft Black
pimprod2003096,2.62,Jet Black
xlsImpprod18691109,4.0,Pink


In [307]:
secret_sales_df_t = pd.DataFrame.from_dict(secret_sales_t).transpose().reset_index().rename(columns={'index' : 'name'}).set_index('id')
in_stock_and_secret_sales = pd.merge(products_in_stock_df_t, secret_sales_df_t, on='id', how='left')
add_old_secret_stock = pd.merge(in_stock_and_secret_sales, old_secret_sales_in_stock.rename(columns={'old_price' : 'old_secret_sales_old_price'})[['old_secret_sales_old_price']], on='id', how='left')
add_old_ulta_df = pd.merge(add_old_secret_stock, old_ulta_df.rename(columns={'old_price' : 'old_ulta_df_price'})[['old_ulta_df_price']], on='id', how='left')
secret_sales_in_stock_t = copy.deepcopy(add_old_ulta_df)

In [308]:
secret_sales_in_stock_t

Unnamed: 0_level_0,price2,options2,name,url,brand,desc,rating,number_of_reviews,sale,price,secret_sale,offers,options,main_category,sub_category,sub_sub_category,sale_price,old_secret_sales_old_price,old_ulta_df_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
pimprod2015149,13.5,"Cosmic Violet, 24K Gold",IGK House Party Hair + Body Glitter Stick,https://www.ulta.com/house-party-hair-body-gli...,IGK,House Party Hair + Body Glitter Stick,3.0,2,0,$13.50,0,,2 Colors,hair,hair color,,,$18.00,$13.50
xlsImpprod17631337,2.62,Soft Black,ULTA Limitless Lashes Mascara Lengthening + Vo...,https://www.ulta.com/limitless-lashes-mascara-...,ULTA,Limitless Lashes Mascara Lengthening + Volumizing,2.6,221,0,$2.62,0,"Buy 2, get 1 FREE - Add 3 items to qualify!",,makeup,ulta collection,,,$10.50,$2.62
pimprod2003096,2.62,Jet Black,ULTA Bold Ambition Mascara,https://www.ulta.com/bold-ambition-mascara?pro...,ULTA,Bold Ambition Mascara,3.2,81,0,$2.62,0,"Buy 2, get 1 FREE - Add 3 items to qualify!",,makeup,eyes,,,$10.50,$2.62
xlsImpprod18691109,4.0,Pink,ULTA Soothing Glitter Gel Eye Mask,https://www.ulta.com/soothing-glitter-gel-eye-...,ULTA,Soothing Glitter Gel Eye Mask,4.2,17,0,$4.00,0,,,skin care,eye treatments,,,$8.00,$4.00


In [None]:
old_price = []
for i in range(len(secret_sales_in_stock_t)):
    if '-' not in secret_sales_in_stock_t.iloc[i]['price'] and '-' not in secret_sales_in_stock_t.iloc[i]['old_secret_sales_old_price'] and '-' not in secret_sales_in_stock_t.iloc[i]['old_ulta_price']:
        max_price = max(float(secret_sales_in_stock_t.iloc[i]['price'][1:]))
    

In [132]:
old_price = []
for i in range(len(secret_sales_in_stock)):
    try:
        np.isnan(secret_sales_in_stock.iloc[i]['old_price_x'])
    except TypeError:
        temp1 = False
    else:
        temp1 = np.isnan(secret_sales_in_stock.iloc[i]['old_price_x'])
    try:
        np.isnan(secret_sales_in_stock.iloc[i]['old_price_y'])
    except TypeError:
        temp2 = False
    else:
        temp2 = np.isnan(secret_sales_in_stock.iloc[i]['old_price_y'])
    if temp1 == True and temp2 == False:
        old_price.append(secret_sales_in_stock.iloc[i]['old_price_y'])
    elif temp1 == False and temp2 == True:
        old_price.append(secret_sales_in_stock.iloc[i]['old_price_x'])
    else:
        old_price.append(secret_sales_in_stock.iloc[i]['price']) 

In [133]:
old_secret_sales_in_stock = pd.read_csv('data/secret_sales_in_stock.csv')

age = []
for i in range(len(secret_sales_in_stock)):
    if secret_sales_in_stock.iloc[i].name in old_secret_sales_in_stock['id'].tolist():
        age.append('old')
    else:
        age.append('new')
secret_sales_in_stock['age'] = age

In [134]:
secret_sales_in_stock = secret_sales_in_stock.drop(columns={'product', 'old_price_x', 'old_price_y', 'price', 'options', 'sale', 'secret_sale', 'sale_price'}).rename(columns={'price2' : 'price', 'options2' : 'options', 'desc' : 'product'})

In [135]:
secret_sales_in_stock['price'] = pd.to_numeric(secret_sales_in_stock['price'])
secret_sales_in_stock['age'] = age
secret_sales_in_stock['old_price'] = old_price

In [136]:
df = copy.deepcopy(secret_sales_in_stock)
for i in range(len(secret_sales_in_stock)):
    if '$' not in str(secret_sales_in_stock.iloc[i]['old_price']) and secret_sales_in_stock.iloc[i]['price'] >= float(secret_sales_in_stock.iloc[i]['old_price']):
        df = df.drop([secret_sales_in_stock.iloc[i].name])
secret_sales_in_stock = copy.deepcopy(df)

In [96]:
hyperlink_urls = secret_sales_in_stock['url'].tolist()
df = secret_sales_in_stock[['main_category', 'sub_category', 'sub_sub_category', 'name', 'brand', 'product', 'price', 'old_price', 'options', 'offers', 'rating', 'number_of_reviews', 'age']].fillna(' ')

In [138]:
secret_sales_in_stock

Unnamed: 0_level_0,price,options,name,url,brand,product,rating,number_of_reviews,offers,main_category,sub_category,sub_sub_category,age,old_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
pimprod2006566,9.97,"Medium, Light",Derma E Tinted Moisturizing BB Cream with SPF 30+,https://www.ulta.com/tinted-moisturizing-bb-cr...,Derma E,Tinted Moisturizing BB Cream with SPF 30+,2.90,57,,makeup,face,,old,$19.95
pimprod2006971,8.00,"106 Hazelnut, 105 Warm Almond",KIKO Milano New Green Me BB Cream,https://www.ulta.com/new-green-me-bb-cream?pro...,KIKO Milano,New Green Me BB Cream,4.00,3,,makeup,face,,old,$16.00
pimprod2006970,8.00,102 Sunkissed Copper,KIKO Milano New Green Me Compact Highlighter,https://www.ulta.com/new-green-me-compact-high...,KIKO Milano,New Green Me Compact Highlighter,3.00,1,,makeup,face,highlighter,old,$16.00
pimprod2006977,8.00,101 Pure Black,KIKO Milano New Green Me Extreme Volume Mascara,https://www.ulta.com/new-green-me-extreme-volu...,KIKO Milano,New Green Me Extreme Volume Mascara,3.70,7,,makeup,eyes,mascara,old,$16.00
xlsImpprod17951071,8.00,"Dyn-o-mite (yellow), Groovy (green)",Winky Lux Disco Gloss,https://www.ulta.com/disco-gloss?productId=xls...,Winky Lux,Disco Gloss,4.30,4,,makeup,lips,,old,$16.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xlsImpprod5260123,9.50,"Olive You Kelly (olive green creme), Blazing S...",Orly Nail Lacquer,https://www.ulta.com/nail-lacquer?productId=xl...,Orly,,4.2,349,,nails,nail polish,,old,$2.38 - $9.50
xlsImpprod14411013,32.00,Cashmere 06 (for fair neutral skin w/ subtle y...,bareMinerals BAREPRO Performance Wear Powder F...,https://www.ulta.com/barepro-performance-wear-...,bareMinerals,,4.1,2814,,makeup,face,,old,$21.97 - $32.00
xlsImpprod15721097,11.98,"Fair, Medium",Hempz Yuzu & Starfruit Touch of Summer Moistur...,https://www.ulta.com/yuzu-starfruit-touch-of-s...,Hempz,,4.3,15,,bath & body,suncare,,old,$11.97
pimprod2015149,13.50,"Cosmic Violet, 24K Gold",IGK House Party Hair + Body Glitter Stick,https://www.ulta.com/house-party-hair-body-gli...,IGK,,0,,,hair,hair color,,old,$18.00


In [144]:
set(df[129:139].index)

{'620',
 'pimprod2003096',
 'pimprod2015149',
 'xlsImpprod14411013',
 'xlsImpprod15721097',
 'xlsImpprod17631337',
 'xlsImpprod18691109',
 'xlsImpprod5260123'}

In [98]:
def Create_Service(client_secret_file, token_write_file, api_service_name, api_version, *scopes):
    global service
    SCOPES = [scope for scope in scopes[0]]
    
    cred = None

    if os.path.exists(token_write_file):
        with open(token_write_file, 'rb') as token:
            cred = pickle.load(token)

    if not cred or not cred.valid:
        if cred and cred.expired and cred.refresh_token:
            cred.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(client_secret_file, SCOPES)
            cred = flow.run_local_server()

        with open(token_write_file, 'wb') as token:
            pickle.dump(cred, token)

    try:
        service = build(api_service_name, api_version, credentials=cred)
        print(api_service_name, 'service created successfully')
    except Exception as e:
        print(e)

In [99]:
def Add_Hyperlinks(sheetId, df, hyperlink_urls):
    requests = []
    
    for i in range(len(df)):
        hyperlink = '"' + hyperlink_urls[i] + '"'
        hypertext = '"' + df.iloc[i]['name'] + '"'
        request = {
            "updateCells": {
                "rows": [
                    {
                        "values": [{
                            "userEnteredValue": {
                                "formulaValue": "=HYPERLINK({link}, {text})".format(link = hyperlink, text = hypertext)
                            }
                        }]
                    }
                ],
                "fields": "userEnteredValue",
                "start": {
                    "sheetId": 0,
                    "rowIndex": i + 1,
                    "columnIndex": 3
                }
            }
        }
        requests.append(request)
        
    body = {"requests": requests}
    service.spreadsheets().batchUpdate(spreadsheetId=sheetId, body=body).execute()
    print('Hyperlinks successfully added')

In [100]:
print("updating sheet hosted on mod's google drive...")
#update the sheet hosted on the mod's google drive
gapi.Create_Service(creds.get_credentials_file('main_mod'), creds.get_token_write_file('main_mod'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])
gapi.Clear_Sheet(creds.get_sheet_id('main_mod'))
gapi.Export_Data_To_Sheets(creds.get_sheet_id('main_mod'), df)
gapi.Update_Filter(creds.get_sheet_id('main_mod'), creds.get_filter_id('main_mod'), len(df), len(df.columns))

updating sheet hosted on mod's google drive...
sheets service created successfully
Sheet successfully cleared
Sheet successfully updated
Filter successfully updated


In [101]:
Create_Service(creds.get_credentials_file('main_mod'), creds.get_token_write_file('main_mod'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])

sheets service created successfully


In [102]:
Add_Hyperlinks(creds.get_sheet_id('main_mod'), df, hyperlink_urls)

Hyperlinks successfully added


In [103]:
print('updating sheet hosted on my google drive...')
#update the sheet hosted on my google drive
gapi.Create_Service(creds.get_credentials_file('main_local'), creds.get_token_write_file('main_local'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])
gapi.Clear_Sheet(creds.get_sheet_id('main_local'))
gapi.Export_Data_To_Sheets(creds.get_sheet_id('main_local'), df)
gapi.Update_Filter(creds.get_sheet_id('main_local'), creds.get_filter_id('main_local'), len(df), len(df.columns))

updating sheet hosted on my google drive...
sheets service created successfully
Sheet successfully cleared
Sheet successfully updated
Filter successfully updated


In [104]:
Create_Service(creds.get_credentials_file('main_local'), creds.get_token_write_file('main_local'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])

sheets service created successfully


In [105]:
Add_Hyperlinks(creds.get_sheet_id('main_local'), df, hyperlink_urls)

Hyperlinks successfully added


In [106]:
secret_sales_in_stock.to_csv('data/secret_sales_in_stock.csv')
ulta_df.to_csv('data/ulta_df.csv')