In [1]:
import pandas as pd
import numpy as np
import requests
from retrying import retry
import re
from bs4 import BeautifulSoup
import time
import math
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow,Flow
from google.auth.transport.requests import Request
import os
import pickle
import ulta_functions as ulta
import google_api_functions as gapi
import google_sheets_credentials as creds
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import copy
import concurrent.futures
import json
import datetime

In [None]:
session = requests.Session()
all_url_info = ulta.get_url_dict(session)
urls = all_url_info.keys()

In [3]:
products = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    futures = {executor.submit(ulta.scrape_url, url, session, products, all_url_info): url for url in urls}
    for future in concurrent.futures.as_completed(futures):
        url = futures[future]
        try:
            data = future.result()
        except Exception as exc:
            print(url, ':', exc)
        else:
            products = data

https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 0
'NoneType' object has no attribute 'text' 

https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 2
'NoneType' object has no attribute 'text' 



In [4]:
session.close()

In [5]:
ulta_df = (
    pd.DataFrame.from_dict(products)
    .transpose()
    .rename_axis('product_id')
    .fillna(' ')
)

In [6]:
ulta_df

Unnamed: 0_level_0,url,brand,product,rating,no_of_reviews,sale,price,offers,options,main_category,sub_category,sub_sub_category,sale_price
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
xlsImpprod5770263,https://www.ulta.com/bye-bye-redness-neutraliz...,It Cosmetics,Bye Bye Redness Neutralizing Color-Correcting ...,4.50,2856,0,$34.00,Free Gift with Purchase!,3 Colors,makeup,face,,
xlsImpprod15711051,https://www.ulta.com/hd-pro-concealer?productI...,L.A. Girl,HD Pro Concealer,4.10,1702,0,$4.99,,43 Colors,makeup,face,,
xlsImpprod14491009,https://www.ulta.com/naked-skin-color-correcti...,Urban Decay Cosmetics,Naked Skin Color Correcting Fluid,4.30,493,0,$29.00,Free Gift with Purchase!,4 Colors,makeup,face,,
xlsImpprod3590053,https://www.ulta.com/photo-finish-reduce-redne...,Smashbox,Photo Finish Reduce Redness Primer,4.10,854,0,$15.00 - $39.00,,2 Sizes,makeup,face,,
xlsImpprod10791925,https://www.ulta.com/redness-solutions-makeup-...,Clinique,Redness Solutions Makeup Broad Spectrum SPF 15...,4.60,192,0,$29.50,Online Only Purchase $50 in Clinique product a...,4 Colors,makeup,face,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
pimprod2006610,https://www.ulta.com/seb-man-player-medium-hol...,Sebastian,SEB MAN The Player Medium Hold Gel,0.00,,0,$18.00,,,men,hair,,
xlsImpprod18571057,https://www.ulta.com/plaster-master-hard-hold-...,Billy Jealousy,Plaster Master Hard Hold Pomade,0,,0,$16.00,,,men,hair,,
xlsImpprod17361149,https://www.ulta.com/cedarwood-grooming-paste?...,Every Man Jack,Cedarwood Grooming Paste,0.00,,0,$9.99,,,men,hair,,
pimprod2006617,https://www.ulta.com/seb-man-dandy-light-hold-...,Sebastian,SEB MAN The Dandy Light Hold Pomade,0,,0,$18.00,,,men,hair,,


## loading in old data

In [7]:
old_ulta_df = (
    pd.read_csv('data/ulta_df.csv')
    .rename(columns={'price' : 'old_price', 'sale' : 'old_sale', 'options' : 'old_options'})
    .set_index('product_id')
    .loc[:, ['old_price', 'old_sale', 'old_options']]
)

old_secret_sales_in_stock = (
    pd.read_csv('data/secret_sales_in_stock.csv')
    .set_index('product_id')
    .groupby('product_id')
    .first()
)

## checking for products whose price has changed since yesterday

In [8]:
changed_prices_df = (
    pd.merge(ulta_df, old_ulta_df, on='product_id', how='inner')
    .dropna(subset=['price', 'old_price'])
    .query('price != old_price')
    .query('sale == 0 & old_sale == 0')
    .fillna(value={'old_options': ' ', 'options': ' '})
    .pipe(ulta.clean_changed_prices_df)
)

In [9]:
changed_prices_df

Unnamed: 0_level_0,url,brand,product,rating,no_of_reviews,sale,price,offers,options,main_category,sub_category,sub_sub_category,sale_price
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
xlsImpprod14111342,https://www.ulta.com/anti-residue-cleanse?prod...,Unwash,Anti-Residue Cleanse,4.5,53,0,$17.97,,,hair,shampoo & conditioner,,
pimprod2000379,https://www.ulta.com/curls-dry-cleanser?produc...,Unwash,Curls Dry Cleanser,4.2,9,0,$18.97,,,hair,shampoo & conditioner,,
pimprod2000481,https://www.ulta.com/volumizing-foam-cleanser?...,Unwash,Volumizing Foam Cleanser,4.0,22,0,$20.97,,,hair,shampoo & conditioner,,
pimprod2006353,https://www.ulta.com/revitalising-scalp-treatm...,Unwash,Revitalising Scalp Treatment,2.0,1,0,$18.97,,,hair,treatment,leave-in treatment,
xlsImpprod14771013,https://www.ulta.com/travel-size-dry-cleanser?...,Unwash,Travel Size Dry Cleanser,4.4,36,0,$6.97,,,hair,travel size,,
pimprod2004780,https://www.ulta.com/magical-leave-in-conditio...,Unwash,Magical Leave-In Conditioner,5.0,1,0,$17.97,,,hair,treatment,leave-in treatment,
pimprod2000380,https://www.ulta.com/overnight-hair-masque?pro...,Unwash,Overnight Hair Masque,3.3,19,0,$20.97,,,hair,treatment,masks,


## getting products with different color options and more than one price listed

In [10]:
ulta_df_t = ulta_df.dropna(subset=['price', 'options'])
check_prices_df = (
    ulta_df_t[ulta_df_t['options'].str.contains("Colors") & ulta_df_t['price'].str.contains("-")]
    .query('sale == 0')
)

In [11]:
check_prices_df

Unnamed: 0_level_0,url,brand,product,rating,no_of_reviews,sale,price,offers,options,main_category,sub_category,sub_sub_category,sale_price
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
pimprod2003500,https://www.ulta.com/multiglow?productId=pimpr...,ICONIC LONDON,Multiglow,3.70,3,0,$23.80 - $34.00,,3 Colors,makeup,face,highlighter,
pimprod2018114,https://www.ulta.com/shade-light-powder-contou...,KVD Vegan Beauty,Shade + Light Powder Contour Palette Refill,0,,0,$14.00 - $16.00,,6 Colors,makeup,gifts & value sets,,
pimprod2005671,https://www.ulta.com/4-in-1-love-your-selfie-l...,PÜR,4-In-1 Love Your Selfie Longwear Foundation & ...,4.00,395,0,$25.20 - $36.00,,40 Colors,makeup,face,,
xlsImpprod19111013,https://www.ulta.com/studio-fix-24-hour-smooth...,MAC,Studio Fix 24-Hour Smooth Wear Concealer,3.90,159,0,$22.00 - $23.00,,34 Colors,makeup,face,,
pimprod2006201,https://www.ulta.com/stay-woke-luminous-bright...,UOMA Beauty,Stay Woke Luminous Brightening Concealer,4.20,36,0,$12.00 - $25.00,,20 Colors,makeup,face,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
xlsImpprod17761957,https://www.ulta.com/deluxe-bronzing-mousse?pr...,Loving Tan,Deluxe Bronzing Mousse,4.00,412,0,$34.95 - $39.95,,3 Colors,skin care,suncare,,
pimprod2014445,https://www.ulta.com/one-hour-express-self-tan...,Australian Glow,One Hour Express Self Tan Mousse,4.70,127,0,$24.99 - $29.99,Free Gift with Purchase!,2 Colors,bath & body,suncare,,
pimprod2014454,https://www.ulta.com/one-hour-express-self-tan...,Australian Glow,One Hour Express Self Tan Mousse Refill,4.90,18,0,$22.99 - $26.99,Free Gift with Purchase!,2 Colors,bath & body,suncare,,
xlsImpprod14631251,https://www.ulta.com/classic-curling-wand-1-14...,NuMe,Classic Curling Wand 1 1/4'',4.50,422,0,$69.00 - $89.00,,2 Colors,tools & brushes,hair styling tools,,


## getting products with .97 in their price

In [12]:
price_97_df = (
    ulta_df[ulta_df['price'].str.contains('.97')]
    .pipe(copy.deepcopy)
)

## putting them all together removing duplicates

In [13]:
secret_sales_df = (
    pd.concat([changed_prices_df, check_prices_df, price_97_df])
    .groupby('product_id')
    .first()
)

## making sure I'm not excluding any products in the google sheet

In [14]:
not_in_secret_sales_df = ulta.get_secret_sales_not_in_df(secret_sales_df, old_secret_sales_in_stock, ulta_df)

secret_sales_df = (
    pd.concat([secret_sales_df, not_in_secret_sales_df])
    .groupby('product_id')
    .first()
    .query('sale == 0')
)

secret_sales = (
    secret_sales_df
    .transpose()
    .pipe(pd.DataFrame.to_dict)
)

In [15]:
secret_sales_df

Unnamed: 0_level_0,url,brand,product,rating,no_of_reviews,sale,price,offers,options,main_category,sub_category,sub_sub_category,sale_price
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2681,https://www.ulta.com/colour-riche-lipcolour?pr...,L'Oréal,Colour Riche Lipcolour,4.30,1173,0,$8.95 - $10.99,"Buy 2, get 1 FREE - Add 3 items to qualify!",33 Colors,makeup,lips,,
VP10636,https://www.ulta.com/leau-dissey-pour-homme-na...,Issey Miyake,L'Eau D'Issey Pour Homme Natural Spray,4.90,68,0,$57.00 - $97.00,Free Gift with Purchase!,3 Sizes,men,cologne,,
pimprod2000278,https://www.ulta.com/photoready-candid-antioxi...,Revlon,PhotoReady Candid Antioxidant Concealer,4.40,338,0,$4.99 - $9.99,"Buy 1, get 1 at 50% off!",12 Colors,makeup,face,,
pimprod2000323,https://www.ulta.com/conceal-define-full-cover...,Makeup Revolution,Conceal & Define Full Coverage Foundation,3.90,1409,0,$5.99 - $12.00,,49 Colors,makeup,face,,
pimprod2000379,https://www.ulta.com/curls-dry-cleanser?produc...,Unwash,Curls Dry Cleanser,4.20,9,0,$18.97,,,hair,shampoo & conditioner,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
xlsImpprod4070011,https://www.ulta.com/eye-studio-master-precise...,Maybelline,Eye Studio Master Precise Liquid Eyeliner,3.90,819,0,$7.99 - $8.99,"Buy 1, get 1 at 50% off!",3 Colors,makeup,eyes,eyeliner,
xlsImpprod4070041,https://www.ulta.com/studio-secrets-magic-lumi...,L'Oréal,Studio Secrets Magic Lumi Highlighter,3.90,126,0,$12.99 - $13.49,"Buy 2, get 1 FREE - Add 3 items to qualify!",4 Colors,makeup,face,,
xlsImpprod4550001,https://www.ulta.com/naked-skin-weightless-ult...,Urban Decay Cosmetics,Naked Skin Weightless Ultra Definition Liquid ...,4.50,7440,0,$20.00 - $27.97,Free Gift with Purchase!,7 Colors,makeup,face,,
xlsImpprod5050063,https://www.ulta.com/stay-matte-but-not-flat-l...,NYX Professional Makeup,Stay Matte But Not Flat Liquid Foundation,3.70,1332,0,$3.75 - $7.50,,4 Colors,makeup,face,,


## finding out which options are in stock

In [16]:
start = time.time()
driver = webdriver.Chrome(r'C:\Users\elerm\Downloads\chromedriver_win32\chromedriver.exe')
products_in_stock, secret_sales = ulta.get_products_in_stock(secret_sales, driver)
driver.close()
driver.quit()
end = time.time()

AttributeError: 'NoneType' object has no attribute 'get'

In [None]:
products_in_stock_df = (
    pd.DataFrame.from_dict(products_in_stock)
    .transpose()
    .reset_index()
    .rename(columns={'index' : 'product_id'})
    .pipe(pd.melt, id_vars=['product_id'], var_name='price2', value_name='options2')
    .dropna()
    .set_index('product_id')
)

secret_sales_df = (
    pd.DataFrame.from_dict(secret_sales)
    .transpose()
    .rename_axis('product_id')
)

secret_sales_in_stock = (
    products_in_stock_df
    .pipe(pd.merge, secret_sales_df, on='product_id', how='left')
    .pipe(pd.merge, old_secret_sales_in_stock.rename(columns={'old_price' : 'old_secret_sales_old_price'})[['old_secret_sales_old_price']], on='product_id', how='left')
    .pipe(pd.merge, old_ulta_df.rename(columns={'old_price' : 'old_ulta_df_price'})[['old_ulta_df_price']], on='product_id', how='left')
    .rename(columns={'price' : 'ulta_df_price'})
    .fillna(value={'old_secret_sales_old_price': '$0.00', 'old_ulta_df_price': '$0.00', 'ulta_df_price': '$0.00', 'options': ' '})
    .pipe(ulta.add_old_price)
    .pipe(ulta.remove_bad_deals)
    .pipe(ulta.add_age, old_secret_sales_in_stock)
    .drop(columns={'old_secret_sales_old_price', 'old_ulta_df_price', 'ulta_df_price', 'options', 'sale', 'sale_price'})
    .rename(columns={'price2' : 'price', 'options2' : 'options'})
    .pipe(ulta.convert_price_to_float)
    .pipe(ulta.add_precent_off)
    .query('percent_off != -1')
)

hyperlink_urls = secret_sales_in_stock['url'].tolist()

df = (
    secret_sales_in_stock
    .pipe(copy.deepcopy)
    .pipe(ulta.add_name)
    .loc[:, ['main_category', 'sub_category', 'sub_sub_category', 'name', 'brand', 'product', 'price', 'old_price', 'percent_off', 'options', 'offers', 'rating', 'no_of_reviews', 'age']]
    .fillna(' ')
)

In [None]:
df

In [None]:
print('updating sheet hosted on my google drive...')
#update the sheet hosted on my google drive
gapi.Create_Service(creds.get_credentials_file('main_mod'), creds.get_token_write_file('main_mod'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])
gapi.Clear_Sheet(creds.get_sheet_id('main_mod'))
gapi.Export_Data_To_Sheets(creds.get_sheet_id('main_mod'), df)
gapi.Update_Filter(creds.get_sheet_id('main_mod'), creds.get_filter_id('main_mod'), len(df), len(df.columns))
gapi.Add_Hyperlinks(creds.get_sheet_id('main_mod'), df, hyperlink_urls)
gapi.Add_Percent_Format(creds.get_sheet_id('main_mod'), len(df))

In [None]:
print('updating sheet hosted on my google drive...')
#update the sheet hosted on my google drive
gapi.Create_Service(creds.get_credentials_file('main_local'), creds.get_token_write_file('main_local'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])
gapi.Clear_Sheet(creds.get_sheet_id('main_local'))
gapi.Export_Data_To_Sheets(creds.get_sheet_id('main_local'), df)
gapi.Update_Filter(creds.get_sheet_id('main_local'), creds.get_filter_id('main_local'), len(df), len(df.columns))
gapi.Add_Hyperlinks(creds.get_sheet_id('main_local'), df, hyperlink_urls)
gapi.Add_Percent_Format(creds.get_sheet_id('main_local'), len(df))

In [None]:
secret_sales_in_stock.to_csv('data/new_secret_sales_in_stock.csv')
ulta_df.to_csv('data/new_ulta_df.csv')