In [1]:
import pandas as pd
import requests
import datetime
import copy
import concurrent.futures
from bs4 import BeautifulSoup
import ulta_functions as ulta

In [19]:
ulta_df = pd.read_csv('data/ulta_df.csv').set_index('product_id').fillna(' ')

In [20]:
products = (
    ulta_df
    .pipe(copy.deepcopy)
    .loc[:, ['main_category', 'sub_category', 'sub_sub_category', 'brand', 'product', 'url']]
)
prices = (
    ulta_df
    .pipe(copy.deepcopy)
    .loc[:, ['sale', 'price', 'sale_price', 'options']]
)
ratings_and_offers = (
    ulta_df
    .pipe(copy.deepcopy)
    .loc[:, ['rating', 'no_of_reviews', 'offers']]
)

In [21]:
date = ['08/13/2020'] * len(ratings_and_offers)
ratings_and_offers['date'] = date

In [33]:
prices_t = (
    prices[prices['options'].str.contains('2 Sizes') & prices['price'].str.contains('-')]
    .pipe(copy.deepcopy)
)

In [34]:
prices_t[['price1','price2']] = prices_t.price.str.split(' - ', expand=True) 

In [37]:
prices_t = (
    prices_t
    .drop(columns={'price'})
    .reset_index()
    .pipe(pd.melt, id_vars=['product_id'], var_name='price2', value_name='options2')

In [None]:
products_in_stock_df = (
    pd.DataFrame.from_dict(products_in_stock)
    .transpose()
    .reset_index()
    .rename(columns={'index' : 'product_id'})
    .pipe(pd.melt, id_vars=['id'], var_name='price2', value_name='options2')
    .dropna()
    .set_index('product_id')
)

In [None]:
session = requests.Session()
all_url_info = ulta.get_url_dict(session)
urls = all_url_info.keys()

In [None]:
current_inventory = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    futures = {executor.submit(ulta.scrape_url, url, session, current_inventory, all_url_info): url for url in urls}
    for future in concurrent.futures.as_completed(futures):
        url = futures[future]
        try:
            data = future.result()
        except Exception as exc:
            print(url, ':', exc)
        else:
            current_inventory = data

In [None]:
session.close()

In [None]:
current_products = (
    pd.DataFrame.from_dict(current_inventory)
    .transpose()
    .rename_axis('product_id')
    .loc[:, ['main_category', 'sub_category', 'sub_sub_category', 'brand', 'product', 'url']]
)
current_prices = (
    pd.DataFrame.from_dict(current_inventory)
    .transpose()
    .rename_axis('product_id')
    .loc[:, ['sale', 'price', 'sale_price', 'options', 'offers', 'rating', 'no_of_reviews']]
)

In [None]:
query = "product_id not in {}".format(products.index.tolist())
products = pd.concat([current_products.query(query), products])

date = [datetime.datetime.today().strftime('%m/%d/%Y')] * len(current_prices)
current_prices['date'] = date
prices = pd.concat([prices, current_prices])

In [None]:
dat = pd.merge(products, prices, on='product_id').fillna(' ')