In [1]:
import pandas as pd
import numpy as np
import requests
from retrying import retry
import re
from bs4 import BeautifulSoup
import time
import math
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow,Flow
from google.auth.transport.requests import Request
import os
import pickle
import ulta_functions as ulta
import google_api_functions as gapi
import google_sheets_credentials as creds
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import copy
import concurrent.futures
import json
import datetime
import psycopg2
from psycopg2.errors import UniqueViolation
import config

In [2]:
def get_products_in_stock(driver, secret_sales):
    wait = WebDriverWait(driver, 30)
    products_in_stock = {}
    for product_id in secret_sales:
        temp = {} #used to temporarily store product data until 
        #opening product url in the driver/browser
        driver.get(secret_sales[product_id]['url'])
        #if the product doesn't exist anymore ulta wil take you to this site
        if driver.current_url == 'https://www.ulta.com/404.jsp':
            next
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ProductSwatchImage__variantHolder')))
        #getting all the product variants from the page
        product_variants = driver.find_elements_by_class_name('ProductSwatchImage__variantHolder')
        if len(product_variants) == 0:
            #products that only have one color or one size or whatever have their product variant information in a different lcoation
            product_variants = driver.find_elements_by_class_name('ProductDetail__productSwatches')
        for product_variant in product_variants:
            try:
                product_variant.click() #clicking on each variant at a time to get their price and availability
            except:         
                next #if I can't click on it I want to go to the next variant
            else:
                wait.until(EC.presence_of_element_located((By.XPATH, "/html/head/meta[10]")))
                #creating a BeautifulSoup object to extract data
                soup = BeautifulSoup(driver.page_source, features="lxml")
                #there are products that only a couple of shades are labeled as sale so I'm removing those to make sure no sale items slip through
                if soup.find('img', {'src' : 'https://images.ulta.com/is/image/Ulta/badge-sale?fmt=png-alpha'}) is not None:
                    next
                #getting price
                price = soup.find('meta', {'property' : 'product:price:amount'}).get('content')
                option = ulta.get_option(soup)
                #only adding the product variant if it's available
                if soup.find('div', {'class' : 'ProductDetail__availabilitySection ProductDetail__availabilitySection--error'}) is None:
                    temp[option] = price
        #checking if the temp dictionary is empty to make sure if there are indeed product variants in stock
        if bool(temp):
            variants_in_stock = ulta.rearrange_product_dict(temp)
            products_in_stock[product_id] = variants_in_stock
        else:
            #if there aren't any product variants in stock, I don't want them in the document
            next
    return(products_in_stock)

In [4]:
def add_new_product_2(conn, product_id, brand, product, main_category, sub_category, sub_sub_category, url, rating, no_of_reviews, offers, date, price, options):
    bool_continue = False
    cur = conn.cursor()
    #insert product into products table
    sql = """INSERT INTO products (product_id, brand, product, main_category, sub_category, sub_sub_category, url) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING products_pkey"""
    try:
        cur.execute(sql, (product_id, brand, product, main_category, sub_category, sub_sub_category, url))
        products_pkey = cur.fetchone()[0]
        bool_continue = True
    except UniqueViolation:
        cur.execute("SELECT products_pkey FROM products WHERE product_id=%s", (product_id,))
        products_pkey = cur.fetchone()[0]
        cur.execute("SELECT extra_pkey FROM extra WHERE products_pkey_foreign=%s", (products_pkey,))
        if cur.fetchone() is None: #that means a product has data in the products table but not in the extra table so we still want to add the product data in the extra table
            bool_continue = True
        else:
            bool_continue = False #that means the product already has data in the products table so it's not actually a new product
    finally:
        if bool_continue: #aka if bool_continue == True
            #insert product into extra table
            sql = """INSERT INTO extra (rating, no_of_reviews, offers, date, price, options, products_pkey_foreign) VALUES (%s, %s, %s, %s, %s, %s, %s)"""
            cur.execute(sql, (rating, no_of_reviews, offers, date, price, options, products_pkey))
        else:
            return #return aka don't do anything

In [None]:
def add_new_product(conn, product_id, brand, product, main_category, sub_category, sub_sub_category, url, rating, no_of_reviews, offers, date):
    bool_continue = False
    cur = conn.cursor()
    #insert product into products table
    sql = """INSERT INTO products (product_id, brand, product, main_category, sub_category, sub_sub_category, url) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING products_pkey"""
    try:
        cur.execute(sql, (product_id, brand, product, main_category, sub_category, sub_sub_category, url))
        products_pkey = cur.fetchone()[0]
        bool_continue = True
    except UniqueViolation:
        cur.execute("SELECT products_pkey FROM products WHERE product_id=%s", (product_id,))
        products_pkey = cur.fetchone()[0]
        cur.execute("SELECT extra_pkey FROM extra WHERE products_pkey_foreign=%s", (products_pkey,))
        if cur.fetchone() is None: #that means a product has data in the products table but not in the extra table so we still want to add the product data in the extra table
            bool_continue = True
        else:
            bool_continue = False #that means the product already has data in the products table so it's not actually a new product
    finally:
        if bool_continue: #aka if bool_continue == True
            #insert product into extra table
            sql = """INSERT INTO ratings_and_offers (rating, no_of_reviews, offers, date, products_pkey_foreign) VALUES (%s, %s, %s, %s, %s)"""
            cur.execute(sql, (rating, no_of_reviews, offers, date, products_pkey))
        else:
            return #return aka don't do anything

In [3]:
def remove_padding(df, column_name):
    no_padding = []
    for i in range(len(df)):
        no_padding.append(df.iloc[i][column_name].strip())
    df[column_name] = no_padding
    return(df)

# step 1: scrape current data from ulta.com

In [5]:
session = requests.Session()
all_url_info = ulta.get_url_dict(session)
urls = all_url_info.keys()

In [6]:
products = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    futures = {executor.submit(ulta.scrape_url, url, session, products, all_url_info): url for url in urls}
    for future in concurrent.futures.as_completed(futures):
        url = futures[future]
        try:
            data = future.result()
        except Exception as exc:
            print(url, ':', exc)
        else:
            products = data

In [7]:
session.close()

In [8]:
current_ulta_df = (
    pd.DataFrame.from_dict(products)
    .transpose()
    .rename_axis('product_id')
)

In [9]:
current_ulta_df

Unnamed: 0_level_0,brand,product,url,rating,no_of_reviews,sale,price,options,main_category,sub_category,sub_sub_category,offers,sale_price
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
xlsImpprod5770263,It Cosmetics,Bye Bye Redness Neutralizing Color-Correcting ...,https://www.ulta.com/bye-bye-redness-neutraliz...,4.50,2858,0,$34.00,3 Colors,makeup,face,,,
xlsImpprod15711051,L.A. Girl,HD Pro Concealer,https://www.ulta.com/hd-pro-concealer?productI...,4.10,1705,0,$4.99,43 Colors,makeup,face,,,
xlsImpprod14491009,Urban Decay Cosmetics,Naked Skin Color Correcting Fluid,https://www.ulta.com/naked-skin-color-correcti...,4.40,832,0,$29.00,3 Colors,makeup,face,,,
xlsImpprod3590053,Smashbox,Photo Finish Reduce Redness Primer,https://www.ulta.com/photo-finish-reduce-redne...,4.10,854,0,$15.00 - $39.00,2 Sizes,makeup,face,,,
xlsImpprod10791925,Clinique,Redness Solutions Makeup Broad Spectrum SPF 15...,https://www.ulta.com/redness-solutions-makeup-...,4.60,192,0,$31.00,4 Colors,makeup,face,,Online Only Purchase $50 in Clinique product a...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
xlsImpprod18571057,Billy Jealousy,Plaster Master Hard Hold Pomade,https://www.ulta.com/plaster-master-hard-hold-...,0,,0,$16.00,,men,hair,,,
pimprod2016011,Frederick Benjamin,Crown Control Forming Creme,https://www.ulta.com/crown-control-forming-cre...,0,,0,$16.00,,men,hair,,,
xlsImpprod17361149,Every Man Jack,Cedarwood Grooming Paste,https://www.ulta.com/cedarwood-grooming-paste?...,0.00,,0,$9.99,,men,hair,,,
pimprod2006617,Sebastian,SEB MAN The Dandy Light Hold Pomade,https://www.ulta.com/seb-man-dandy-light-hold-...,0,,0,$18.00,,men,hair,,,


In [10]:
price = []
for i in range(len(current_ulta_df)):
    if current_ulta_df.iloc[i]['sale'] == 1:
        price.append(current_ulta_df.iloc[i]['sale_price'])
    else:
        price.append(current_ulta_df.iloc[i]['price'])
        
current_ulta_df['price'] = price
current_ulta_df['date'] = [datetime.date.today().strftime('%Y-%m-%d')] * len(current_ulta_df)

current_ulta_df = (
    current_ulta_df
    .drop(columns={'sale_price'})
    .reset_index()
    .fillna(' ')
    .pipe(remove_padding, 'price')
)

In [11]:
current_ulta_df

Unnamed: 0,product_id,brand,product,url,rating,no_of_reviews,sale,price,options,main_category,sub_category,sub_sub_category,offers,date
0,xlsImpprod5770263,It Cosmetics,Bye Bye Redness Neutralizing Color-Correcting ...,https://www.ulta.com/bye-bye-redness-neutraliz...,4.50,2858,0,$34.00,3 Colors,makeup,face,,,2020-08-27
1,xlsImpprod15711051,L.A. Girl,HD Pro Concealer,https://www.ulta.com/hd-pro-concealer?productI...,4.10,1705,0,$4.99,43 Colors,makeup,face,,,2020-08-27
2,xlsImpprod14491009,Urban Decay Cosmetics,Naked Skin Color Correcting Fluid,https://www.ulta.com/naked-skin-color-correcti...,4.40,832,0,$29.00,3 Colors,makeup,face,,,2020-08-27
3,xlsImpprod3590053,Smashbox,Photo Finish Reduce Redness Primer,https://www.ulta.com/photo-finish-reduce-redne...,4.10,854,0,$15.00 - $39.00,2 Sizes,makeup,face,,,2020-08-27
4,xlsImpprod10791925,Clinique,Redness Solutions Makeup Broad Spectrum SPF 15...,https://www.ulta.com/redness-solutions-makeup-...,4.60,192,0,$31.00,4 Colors,makeup,face,,Online Only Purchase $50 in Clinique product a...,2020-08-27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16878,xlsImpprod18571057,Billy Jealousy,Plaster Master Hard Hold Pomade,https://www.ulta.com/plaster-master-hard-hold-...,0,,0,$16.00,,men,hair,,,2020-08-27
16879,pimprod2016011,Frederick Benjamin,Crown Control Forming Creme,https://www.ulta.com/crown-control-forming-cre...,0,,0,$16.00,,men,hair,,,2020-08-27
16880,xlsImpprod17361149,Every Man Jack,Cedarwood Grooming Paste,https://www.ulta.com/cedarwood-grooming-paste?...,0.00,,0,$9.99,,men,hair,,,2020-08-27
16881,pimprod2006617,Sebastian,SEB MAN The Dandy Light Hold Pomade,https://www.ulta.com/seb-man-dandy-light-hold-...,0,,0,$18.00,,men,hair,,,2020-08-27


# step 2: add data to database

### connect to database

In [12]:
params = config.config()
conn = psycopg2.connect(**params)

### get products currently in database

In [80]:
cur = conn.cursor()

In [81]:
cur.execute("""SELECT product_id, brand, product FROM products""")

In [82]:
r = cur.fetchall()

In [87]:
db_dat

Unnamed: 0,product_id,brand,product
0,xlsImpprod5770263,It Cosmetics,Bye Bye Redness Neutralizing Color-Correcting ...
1,xlsImpprod15711051,L.A. Girl,HD Pro Concealer
2,xlsImpprod14491009,Urban Decay Cosmetics,Naked Skin Color Correcting Fluid
3,xlsImpprod3590053,Smashbox,Photo Finish Reduce Redness Primer
4,xlsImpprod10791925,Clinique,Redness Solutions Makeup Broad Spectrum SPF 15...
...,...,...,...
16876,pimprod2006617,Sebastian,SEB MAN The Dandy Light Hold Pomade
16877,xlsImpprod18241089,American Crew,Techseries Boost Spray
16878,pimprod2007998,ULTA,Flamingo Shower Cap
16879,pimprod2017094,ULTA,WHIM by Ulta Beauty Pineapple Sanitizer Sling


In [84]:
cur.close()

### merge current_ulta_df and db_dat to find which products aren't in db_dat

In [85]:
merged_dat = (
    current_ulta_df
    .pipe(pd.merge, db_dat, on='product_id', how = 'left')
    .fillna(' ')
)

new = []
for i in range(len(merged_dat)):
    if merged_dat.iloc[i]['brand_y'] == ' ' or merged_dat.iloc[i]['product_y'] == ' ':
        new.append(True)
    else:
        new.append(False)

merged_dat = (
    merged_dat
    .drop(columns={'brand_y', 'product_y'})
    .rename(columns={'brand_x' : 'brand', 'product_x' : 'product'})
)

merged_dat['new'] = new
#new_products.columns = new_products.columns.astype(str).str.replace('_x', '')

In [86]:
merged_dat

Unnamed: 0,product_id,brand,product,url,rating,no_of_reviews,sale,price,options,main_category,sub_category,sub_sub_category,offers,date,new
0,xlsImpprod5770263,It Cosmetics,Bye Bye Redness Neutralizing Color-Correcting ...,https://www.ulta.com/bye-bye-redness-neutraliz...,4.50,2858,0,$34.00,3 Colors,makeup,face,,,2020-08-27,False
1,xlsImpprod15711051,L.A. Girl,HD Pro Concealer,https://www.ulta.com/hd-pro-concealer?productI...,4.10,1705,0,$4.99,43 Colors,makeup,face,,,2020-08-27,False
2,xlsImpprod14491009,Urban Decay Cosmetics,Naked Skin Color Correcting Fluid,https://www.ulta.com/naked-skin-color-correcti...,4.40,832,0,$29.00,3 Colors,makeup,face,,,2020-08-27,False
3,xlsImpprod3590053,Smashbox,Photo Finish Reduce Redness Primer,https://www.ulta.com/photo-finish-reduce-redne...,4.10,854,0,$15.00 - $39.00,2 Sizes,makeup,face,,,2020-08-27,False
4,xlsImpprod10791925,Clinique,Redness Solutions Makeup Broad Spectrum SPF 15...,https://www.ulta.com/redness-solutions-makeup-...,4.60,192,0,$31.00,4 Colors,makeup,face,,Online Only Purchase $50 in Clinique product a...,2020-08-27,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16878,xlsImpprod18571057,Billy Jealousy,Plaster Master Hard Hold Pomade,https://www.ulta.com/plaster-master-hard-hold-...,0,,0,$16.00,,men,hair,,,2020-08-27,False
16879,pimprod2016011,Frederick Benjamin,Crown Control Forming Creme,https://www.ulta.com/crown-control-forming-cre...,0,,0,$16.00,,men,hair,,,2020-08-27,False
16880,xlsImpprod17361149,Every Man Jack,Cedarwood Grooming Paste,https://www.ulta.com/cedarwood-grooming-paste?...,0.00,,0,$9.99,,men,hair,,,2020-08-27,False
16881,pimprod2006617,Sebastian,SEB MAN The Dandy Light Hold Pomade,https://www.ulta.com/seb-man-dandy-light-hold-...,0,,0,$18.00,,men,hair,,,2020-08-27,False


In [None]:
cur = conn.cursor()

In [337]:
query = """
    SELECT 
        tbl.product_id, rao.rating, rao.no_of_reviews, rao.offers, tbl.max_date, prices.price_str, tbl.max_price, tbl.option
    FROM (
        SELECT 
            products.products_pkey, products.product_id, prices.option, MAX(prices.price) as max_price, MAX(rao.date) as max_date
        FROM products
        LEFT JOIN ratings_and_offers rao ON products.products_pkey = rao.products_pkey_foreign
        LEFT JOIN prices ON rao.ratings_and_offers_pkey = prices.ratings_and_offers_pkey_foreign
        GROUP BY products.products_pkey, products.product_id, prices.option
        ) tbl
    LEFT JOIN ratings_and_offers rao ON rao.products_pkey_foreign = tbl.products_pkey AND rao.date = tbl.max_date
    LEFT JOIN prices ON prices.ratings_and_offers_pkey_foreign = rao.ratings_and_offers_pkey AND prices.option = tbl.option
    ORDER BY tbl.product_id
"""

In [338]:
cur.execute(query)

In [339]:
r = cur.fetchall()

In [345]:
db_dat = (
    pd.DataFrame(r, columns=['product_id', 'rating', 'no_of_reviews', 'offers', 'max_date', 'price_str', 'max_price', 'option'])
    .drop(columns={'max_price', 'option'})
    .pipe(pd.DataFrame.drop_duplicates)
    .sort_values(['product_id', 'max_date'])
    .pipe(pd.DataFrame.drop_duplicates, 'product_id', keep='last')
)

In [346]:
db_dat

Unnamed: 0,product_id,rating,no_of_reviews,offers,max_date,price_str
0,11077,3.30,922.0,,2020-08-23,$6.99
1,11531,4.20,64.0,,2020-08-23,$12.00 - $22.00
3,1378,4.40,249.0,,2020-08-23,$14.99
4,14353,4.20,504.0,,2020-08-23,$8.99
5,1612,4.10,6184.0,"Buy 1, get 1 at 50% off!",2020-08-23,$8.00
...,...,...,...,...,...,...
18283,xlsImpprod860352,2.90,461.0,"Buy 1, get 1 at 50% off!",2020-08-23,$9.49
18284,xlsImpprod890582,4.90,117.0,Free Gift with Purchase!,2020-08-23,$67.00 - $115.00
18287,xlsImpprod930182,4.20,503.0,,2020-08-23,$9.49
18288,xlsImpprod930218,4.30,409.0,"Buy 1, get 1 at 50% off!\r\n\t\t\t\t\t\t\n\r\n...",2020-08-23,$8.99


In [353]:
not_in_db = (
    current_ulta_df
    .pipe(pd.merge, db_dat, on='product_id', how='left')
    .dropna(subset=['price_str'])
    .query('price != price_str | rating_x != rating_y')
    .drop(columns={'rating_y', 'no_of_reviews_y', 'offers_y', 'max_date', 'price_str'})
    .rename(columns={'rating_x' : 'rating', 'no_of_reviews_x' : 'no_of_reviews', 'offers_x' : 'offers'})
)

In [355]:
not_in_db.to_csv('data/not_in_db.csv')

In [296]:
df.query("product_id == 'xlsImpprod13631035'")

Unnamed: 0,product_id,rating,no_of_reviews,offers,max_date,price_str
11207,xlsImpprod13631035,4.7,4005.0,,2020-08-23,$21.00


In [295]:
db_dat.query("product_id == 'xlsImpprod13631035'")

Unnamed: 0,product_id,rating,no_of_reviews,offers,max_date,price_str,max_price,option
11207,xlsImpprod13631035,4.7,4005.0,,2020-08-23,$21.00,21.0,99 Colors
11208,xlsImpprod13631035,4.7,2533.0,,2020-08-13,$21.00,21.0,100 Colors


In [276]:
for i in range(len(df)):
    if df.iloc[i]['product_id'] == df.iloc[i - 1]['product_id'] or df.iloc[i]['product_id'] == df.iloc[i + 1]['product_id']:
        print(df.iloc[i]['product_id'])

fs957496
fs957496
pimprod2000323
pimprod2000323
pimprod2000550
pimprod2000550
pimprod2000984
pimprod2000984
pimprod2002011
pimprod2002011
pimprod2002111
pimprod2002111
pimprod2002622
pimprod2002622
pimprod2002661
pimprod2002661
pimprod2002764
pimprod2002764
pimprod2002994
pimprod2002994
pimprod2004801
pimprod2004801
pimprod2006205
pimprod2006205
pimprod2007476
pimprod2007476
pimprod2007540
pimprod2007540
pimprod2007567
pimprod2007567
pimprod2007760
pimprod2007760
pimprod2013287
pimprod2013287
pimprod2013446
pimprod2013446
pimprod2013678
pimprod2013678
pimprod2014241
pimprod2014241
pimprod2014456
pimprod2014456
pimprod2014718
pimprod2014718
pimprod2015403
pimprod2015403
pimprod2017928
pimprod2017928
pimprod2018153
pimprod2018153
VP11355
VP11355
xlsImpprod10792013
xlsImpprod10792013
xlsImpprod11821031
xlsImpprod11821031
xlsImpprod11911129
xlsImpprod11911129
xlsImpprod12291931
xlsImpprod12291931
xlsImpprod13511109
xlsImpprod13511109
xlsImpprod13561011
xlsImpprod13561011
xlsImpprod13621057

IndexError: single positional indexer is out-of-bounds

In [267]:
len(set(db_dat['product_id'].tolist()))

17005

In [250]:
cur.execute("SELECT ratings_and_offers_pkey, date FROM ratings_and_offers WHERE products_pkey_foreign = 17086")

In [251]:
r = cur.fetchall()

In [252]:
r

[(17084, datetime.date(2020, 8, 23)), (33905, datetime.date(2020, 8, 13))]

In [256]:
cur.execute("SELECT price, price_str, option FROM prices WHERE ratings_and_offers_pkey_foreign = 33905")

In [257]:
r = cur.fetchall()

In [258]:
r

[(6.0, '$6.00 ', '22 Colors')]

### commit database changes and close the connection

In [None]:
conn.commit()
conn.close()

# step 3: find secret sales

### connect to database

In [61]:
conn = psycopg2.connect(**params)

In [62]:
cur = conn.cursor()

### query product data

In [None]:
cur.close()

In [None]:
last_ulta_df = (
    pd.DataFrame(r, columns=['product_id', 'old_price', 'old_offers', 'old_options', 'old_date'])
    .fillna(' ')
    .pipe(remove_padding, 'old_price')
)

### merge the two datasets together

In [None]:
merged_df = pd.merge(current_ulta_df, last_ulta_df, on='product_id', how='left')

### checking for products whose price has changed 

In [None]:
(
    merged_df
    .query("price != old_price & sale == 0")
    .pipe(ulta.clean_changed_prices_df)
)

### getting products with different color options and more than one price listed

In [None]:
(
    merged_df[merged_df['options'].str.contains("Colors") & merged_df['price'].str.contains("-")]
    .query('sale == 0')
    .pipe(copy.deepcopy)
)

### getting products with .97 in their price

In [None]:
(
    merged_df[merged_df['price'].str.contains('.97')]
    .pipe(copy.deepcopy)
)

In [None]:
conn.close()

In [None]:
def add_rows_products_tbl(conn, df):
    cur = conn.cursor()
    df_rows_tuple = ()
    for i in range(len(df)):
        row = (df.iloc[i]['product_id'], df.iloc[i]['brand'], df.iloc[i]['product'], df.iloc[i]['main_category'], df.iloc[i]['sub_category'], df.iloc[i]['sub_sub_category'], df.iloc[i]['url'])
        df_rows_tuple = (row,) + df_rows_tuple
    sql = """INSERT INTO products (product_id, brand, product, main_category, sub_category, sub_sub_category, url) VALUES (%s, %s, %s, %s, %s, %s, %s)"""
    cur.executemany(sql, df_rows_tuple)
    cur.close()

In [67]:
conn.commit()
conn.close()

In [None]:
add_rows_products_tbl(conn, new_products)

In [None]:
params = config.config()
conn = psycopg2.connect(**params)

In [None]:
def add_row_extra_tbl(conn, product_id, rating, no_of_reviews, offers, date, price, options):
    cur = conn.cursor()
    cur.execute("rollback;")
    cur.execute("SELECT products_pkey FROM products WHERE product_id=%s", (product_id,))
    products_pkey = cur.fetchone()[0]
    
    sql = """INSERT INTO extra (rating, no_of_reviews, offers, date, price, options, products_pkey_foreign) VALUES (%s, %s, %s, %s, %s, %s, %s)"""
    try:
        cur.execute(sql, (rating, no_of_reviews, offers, date, price, options, products_pkey))
    except UniqueViolation:
        cur.close()
        return
    else:
        cur.close()

In [None]:
for i in range(len(current_ulta_df)):
    add_row_extra_tbl(conn, current_ulta_df.iloc[i]['product_id'], current_ulta_df.iloc[i]['rating'], current_ulta_df.iloc[i]['no_of_reviews'], current_ulta_df.iloc[i]['offers'], current_ulta_df.iloc[i]['date'], current_ulta_df.iloc[i]['price'], current_ulta_df.iloc[i]['options'])

In [356]:
cur.close()

In [357]:
conn.commit()
conn.close()

In [None]:
params = config.config()
conn = psycopg2.connect(**params)

In [None]:
cur = conn.cursor()

In [None]:
sql = """SELECT products.product_id, extra.price, extra.date from products
            LEFT JOIN extra ON extra.products_pkey_foreign = products.products_pkey"""

In [None]:
cur.execute(sql)

In [None]:
r = cur.fetchall()

In [None]:
cur.close()

In [None]:
prices = pd.DataFrame(r, columns=['product_id', 'price', 'date']).astype({'date': 'str'}).query("date == '2020-08-13'")

In [None]:
merged_df = pd.merge(current_ulta_df, prices, on='product_id')

In [None]:
price_x = []
price_y = []
for i in range(len(merged_df)):
    price_x.append(merged_df.iloc[i]['price_x'].strip())
    price_y.append(merged_df.iloc[i]['price_y'].strip())
merged_df['price_x'] = price_x
merged_df['price_y'] = price_y

In [None]:
merged_df.query('price_x != price_y')

In [68]:
conn = psycopg2.connect(**params)

In [89]:
cur = conn.cursor()

In [90]:
query = """(SELECT rating, no_of_reviews, offers, date FROM ratings_and_offers rao 
            INNER JOIN (SELECT products_pkey_foreign, MAX(date) AS max_date FROM ratings_and_offers GROUP BY products_pkey_foreign) edt 
                ON rao.products_pkey_foreign = edt.products_pkey_foreign AND rao.date = edt.max_date) rao_edt"""

In [91]:
query = """
    SELECT 
        prod.products_pkey, prod.product_id, prod.brand, prod.product, rao_edt.rating, rao_edt.no_of_reviews, rao_edt.offers, rao_edt.date, pri.price, pri.price_str, pri.option
    FROM 
        products prod
    INNER JOIN 
        (SELECT rao.rating, rao.no_of_reviews, rao.offers, rao.date, rao.ratings_and_offers_pkey, rao.products_pkey_foreign FROM ratings_and_offers rao
            INNER JOIN (SELECT products_pkey_foreign, MAX(date) AS max_date FROM ratings_and_offers GROUP BY products_pkey_foreign) edt 
            ON rao.products_pkey_foreign = edt.products_pkey_foreign AND rao.date = edt.max_date
            ) AS rao_edt
    ON 
        rao_edt.products_pkey_foreign = prod.products_pkey
    LEFT JOIN 
        prices pri
    ON 
        rao_edt.ratings_and_offers_pkey = pri.ratings_and_offers_pkey_foreign
    """

In [92]:
cur.execute(query)

In [93]:
r = cur.fetchall()

In [95]:
(
    pd.DataFrame(r, columns=['products_pkey', 'product_id', 'brand', 'product', 'rating', 'no_of_reviews', 'offers', 'max_date', 'price', 'price_str', 'option'])
    .drop(columns={'price', 'option'})
    .pipe(pd.DataFrame.drop_duplicates)
)

Unnamed: 0,products_pkey,product_id,brand,product,rating,no_of_reviews,offers,max_date,price_str
0,16889,xlsImpprod5770263,It Cosmetics,Bye Bye Redness Neutralizing Color-Correcting ...,4.50,2857.0,,2020-08-23,$34.00
1,16890,xlsImpprod15711051,L.A. Girl,HD Pro Concealer,4.10,1705.0,,2020-08-23,$4.99
2,16891,xlsImpprod14491009,Urban Decay Cosmetics,Naked Skin Color Correcting Fluid,4.30,496.0,,2020-08-23,$29.00
3,16892,xlsImpprod3590053,Smashbox,Photo Finish Reduce Redness Primer,4.10,854.0,,2020-08-23,$15.00 - $39.00
5,16893,xlsImpprod10791925,Clinique,Redness Solutions Makeup Broad Spectrum SPF 15...,4.60,192.0,Online Only Purchase $50 in Clinique product a...,2020-08-23,$31.00
...,...,...,...,...,...,...,...,...,...
18083,33765,pimprod2006617,Sebastian,SEB MAN The Dandy Light Hold Pomade,0,,,2020-08-23,$18.00
18084,33766,xlsImpprod18241089,American Crew,Techseries Boost Spray,0,,,2020-08-23,$17.95
18085,33767,pimprod2007998,ULTA,Flamingo Shower Cap,2.50,4.0,,2020-08-23,$4.00
18086,33768,pimprod2017094,ULTA,WHIM by Ulta Beauty Pineapple Sanitizer Sling,3.00,3.0,Free Gift with Purchase!,2020-08-23,$2.50


In [60]:
cur.close()

In [None]:
cur = conn.cursor()

In [None]:
no_price = pd.DataFrame(r, columns=['products_pkey', 'product_id', 'brand', 'product', 'rating', 'no_of_reviews', 'offers', 'date', 'price', 'price_string', 'option']).fillna(' ').query("price == ' '")['product_id'].tolist()

In [None]:
"product_id in {listt}".format(listt = no_price)

In [None]:
prices.query("product_id in {listt}".format(listt = no_price))

In [None]:
df = (
    ulta_df
    .pipe(pd.merge, db_dat.set_index('product_id'), on='product_id', how='left')
    .fillna(' ')
)
diff_price = df[df['price'] != df['price_string']]

#### products not in products table

In [None]:
diff_price.query("price_string == ' '").loc[:, ['b']]

In [None]:
conn.commit()

In [None]:
conn.close()

In [None]:
#if you make a mistake, execute following code afterwards
#cur.execute("rollback;")

In [183]:
cur.execute("rollback;")