In [1]:
import pandas as pd
import numpy as np
import requests
from retrying import retry
import re
from bs4 import BeautifulSoup
import time
import math
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow,Flow
from google.auth.transport.requests import Request
import os
import pickle
import ulta_functions as ulta
import google_api_functions as gapi
import google_sheets_credentials as creds
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import copy
import concurrent.futures
import json
import datetime

In [2]:
session = requests.Session()
all_url_info = ulta.get_url_dict(session)
urls = all_url_info.keys()

In [3]:
products = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    futures = {executor.submit(ulta.scrape_url, url, session, products, all_url_info): url for url in urls}
    for future in concurrent.futures.as_completed(futures):
        url = futures[future]
        try:
            data = future.result()
        except Exception as exc:
            print(url, ':', exc)
        else:
            products = data

https://www.ulta.com/skin-care-eye-treatments-eye-cream?N=27hk&No=0&Nrpp=500 0
'NoneType' object has no attribute 'text' 

https://www.ulta.com/skin-care-eye-treatments?N=270k&No=0&Nrpp=500 2
'NoneType' object has no attribute 'text' 



In [4]:
session.close()

In [5]:
ulta_df = (
    pd.DataFrame.from_dict(products)
    .transpose()
    .rename_axis('id')
)

## loading in old data

In [6]:
old_ulta_df = (
    pd.read_csv('data/ulta_df.csv')
    .rename(columns={'price' : 'old_price', 'sale' : 'old_sale', 'secret_sale' : 'old_secret_sale', 'options' : 'old_options'})
    .set_index('id')
    .loc[:, ['old_price', 'old_sale', 'old_secret_sale', 'old_options']]
)

old_secret_sales_in_stock = (
    pd.read_csv('data/secret_sales_in_stock.csv')
    .set_index('id')
    .groupby('id')
    .first()
)

## checking for products whose price has changed since yesterday

In [7]:
changed_prices_df = (
    pd.merge(ulta_df, old_ulta_df, on='id', how='inner')
    .dropna(subset=['price', 'old_price'])
    .query('price != old_price')
    .query('sale == 0 & old_sale == 0')
    .fillna(value={'old_options': ' ', 'options': ' '})
    .pipe(ulta.clean_changed_prices_df)
)

## getting products with different color options and more than one price listed

In [None]:
ulta_df_t = ulta_df.dropna(subset=['price', 'options'])
check_prices_df = (
    ulta_df_t[ulta_df_t['options'].str.contains("Colors") & ulta_df_t['price'].str.contains("-")]
    .query('sale == 0 & secret_sale == 0')
)

## getting products with .97 in their price

In [None]:
price_97_df = (
    ulta_df
    .query('secret_sale == 1 & sale == 0')
    .pipe(copy.deepcopy)
)

In [None]:
price_97_df

## putting them all together removing duplicates

In [None]:
secret_sales_df = (
    pd.concat([changed_prices_df, check_prices_df, price_97_df])
    .groupby('id')
    .first()
)

## making sure I'm not excluding any products in the google sheet

In [None]:
not_in_secret_sales_df = ulta.get_secret_sales_not_in_df(secret_sales_df, old_secret_sales_in_stock, ulta_df)

secret_sales_df = (
    pd.concat([secret_sales_df, not_in_secret_sales_df])
    .groupby('id')
    .first()
    .query('sale == 0')
)

secret_sales = (
    secret_sales_df
    .transpose()
    .pipe(pd.DataFrame.to_dict)
)

## finding out which options are in stock

In [None]:
start = time.time()
driver = webdriver.Chrome(r'C:\Users\elerm\Downloads\chromedriver_win32\chromedriver.exe')
products_in_stock, secret_sales = get_products_in_stock(secret_sales, driver)
driver.close()
driver.quit()
end = time.time()

In [None]:
products_in_stock_df = (
    pd.DataFrame.from_dict(products_in_stock)
    .transpose()
    .reset_index()
    .rename(columns={'index' : 'id'})
    .pipe(pd.melt, id_vars=['id'], var_name='price2', value_name='options2')
    .dropna()
    .set_index('id')
)

secret_sales_df = (
    pd.DataFrame.from_dict(secret_sales)
    .transpose()
    .rename_axis('id')
)

secret_sales_in_stock = (
    products_in_stock_df
    .pipe(pd.merge, secret_sales_df, on='id', how='left')
    .pipe(pd.merge, old_secret_sales_in_stock.rename(columns={'old_price' : 'old_secret_sales_old_price'})[['old_secret_sales_old_price']], on='id', how='left')
    .pipe(pd.merge, old_ulta_df.rename(columns={'old_price' : 'old_ulta_df_price'})[['old_ulta_df_price']], on='id', how='left')
    .rename(columns={'price' : 'ulta_df_price'})
    .fillna(value={'old_secret_sales_old_price': '$0.00', 'old_ulta_df_price': '$0.00', 'ulta_df_price': '$0.00', 'options': ' '})
    .pipe(ulta.add_old_price)
    .pipe(ulta.add_age)
    .drop(columns={'old_secret_sales_old_price', 'old_ulta_df_price', 'ulta_df_price', 'options', 'sale', 'secret_sale', 'sale_price'})
    .rename(columns={'price2' : 'price', 'options2' : 'options'})
    .pipe(ulta.convert_price_to_float)
    .pipe(ulta.add_precent_off)
    .query('percent_off != -1')
)

hyperlink_urls = secret_sales_in_stock['url'].tolist()

df = (
    secret_sales_in_stock
    .pipe(copy.deepcopy)
    .loc[:, ['main_category', 'sub_category', 'sub_sub_category', 'name', 'brand', 'product', 'price', 'old_price', 'percent_off', 'options', 'offers', 'rating', 'no_of_reviews', 'age']]
    .fillna(' ')
)

In [None]:
df

In [None]:
print('updating sheet hosted on my google drive...')
#update the sheet hosted on my google drive
gapi.Create_Service(creds.get_credentials_file('main_mod'), creds.get_token_write_file('main_mod'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])
gapi.Clear_Sheet(creds.get_sheet_id('main_mod'))
gapi.Export_Data_To_Sheets(creds.get_sheet_id('main_mod'), df)
gapi.Update_Filter(creds.get_sheet_id('main_mod'), creds.get_filter_id('main_mod'), len(df), len(df.columns))
gapi.Add_Hyperlinks(creds.get_sheet_id('main_mod'), df, hyperlink_urls)

In [None]:
print('updating sheet hosted on my google drive...')
#update the sheet hosted on my google drive
gapi.Create_Service(creds.get_credentials_file('main_local'), creds.get_token_write_file('main_local'), 'sheets', 'v4', ['https://www.googleapis.com/auth/spreadsheets'])
gapi.Clear_Sheet(creds.get_sheet_id('main_local'))
gapi.Export_Data_To_Sheets(creds.get_sheet_id('main_local'), df)
gapi.Update_Filter(creds.get_sheet_id('main_local'), creds.get_filter_id('main_local'), len(df), len(df.columns))
gapi.Add_Hyperlinks(creds.get_sheet_id('main_local'), df, hyperlink_urls)

In [None]:
secret_sales_in_stock.to_csv('data/secret_sales_in_stock.csv')
ulta_df.to_csv('data/ulta_df.csv')

In [8]:
df = pd.read_csv('data/current.csv')

In [9]:
df

Unnamed: 0,main_category,sub_category,sub_sub_category,name,brand,product,price,old_price,percent_off,options,offers,rating,no_of_reviews,age
0,makeup,lips,,L'OréalColour Riche Lipcolour,L'Oréal,Colour Riche Lipcolour,8.95,$10.99,18.56%,"Seine Sunset, S'Il Vous Plait, Maison Marais, ...","Buy 2, get 1 FREE - Add 3 items to qualify!",4.3,1173.0,old
1,makeup,face,,RevlonPhotoReady Candid Antioxidant Concealer,Revlon,PhotoReady Candid Antioxidant Concealer,4.99,$9.99,50.05%,"Hazelnut 075, Oat 028","Buy 1, get 1 at 50% off!",4.4,338.0,old
2,makeup,lips,,RevlonSuper Lustrous Glass Shine Lipstick,Revlon,Super Lustrous Glass Shine Lipstick,4.99,$9.99,50.05%,Cherries in the Snow,"Buy 1, get 1 at 50% off!",4.0,50.0,old
3,makeup,lips,lipstick,Wet n WildMegalast Liquid Catsuit Metallic Lip...,Wet n Wild,Megalast Liquid Catsuit Metallic Lipstick,4.99,$5.29,5.67%,Harbor A Crush,,4.1,67.0,old
4,makeup,lips,,Wet n WildMegaLast Liquid Catsuit Matte Lipstick,Wet n Wild,MegaLast Liquid Catsuit Matte Lipstick,4.99,$5.49,9.11%,"The Shade is teal, So Noir Cool",,4.2,371.0,old
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,makeup,eyes,eyeliner,Sleek MakeUPLifeproof 12 Hour Wear Metallic Ey...,Sleek MakeUP,Lifeproof 12 Hour Wear Metallic Eyeliner,3.49,$6.99,50.07%,Misinformation (blue),"Buy 1, get 1 at 50% off!",4.3,683.0,old
103,makeup,face,,RevlonPhotoReady Candid Natural Finish Anti-Po...,Revlon,PhotoReady Candid Natural Finish Anti-Pollutio...,2.74,$10.99,75.07%,"Espresso 560, Walnut 540","Buy 1, get 1 at 50% off!",4.2,565.0,old
104,makeup,face,,Urban Decay CosmeticsNaked Skin Weightless Ult...,Urban Decay Cosmetics,Naked Skin Weightless Ultra Definition Liquid ...,27.97,$27.97,,12.5 (dark w/subtle pink undertone),Free Gift with Purchase!,4.4,5113.0,old
105,makeup,face,,NYX Professional MakeupStay Matte But Not Flat...,NYX Professional Makeup,Stay Matte But Not Flat Liquid Foundation,3.75,$7.50,50.00%,Chestnut,,3.7,1332.0,old
