In [1]:
import pandas as pd
import requests
from selenium import webdriver
import datetime
import copy
import time
import psycopg2
from psycopg2.errors import UniqueViolation
import concurrent.futures
from bs4 import BeautifulSoup
import ulta_functions as ulta
import config

In [14]:
def add_row_ratings_and_offers(conn, arg, rating, no_of_reviews, offers, date):
    cur = conn.cursor()
    if type(arg) == int: #arg is a pkey so yay!
        products_pkey_foreign = arg
    elif type(arg) == str: #arg is a product_id and we need the pkey
        query = "SELECT products_pkey FROM products WHERE product_id=%s"
        cur.execute(query, (arg,))
        r = cur.fetchall()
        if len(r) != 1:
            print('PRODUCT_ID NOT UNIQUE!')
        else:
            products_pkey_foreign = r[0][0]
    sql = """INSERT INTO ratings_and_offers (rating, no_of_reviews, offers, date, products_pkey_foreign) VALUES (%s, %s, %s, %s, %s) RETURNING ratings_and_offers_pkey"""
    try:
        cur.execute(sql, (str(rating), str(no_of_reviews), str(offers), str(date), int(products_pkey_foreign)))
        ratings_and_offers_pkey = cur.fetchone()[0]
    except UniqueViolation:
        query = "SELECT ratings_and_offers_pkey FROM ratings_and_offers WHERE rating=%s AND no_of_reviews=%s AND offers=%s AND date=%s AND products_pkey_foreign=%s"
        cur.execute(query, (str(rating), str(no_of_reviews), str(offers), str(date), int(products_pkey_foreign)))
        ratings_and_offers_pkey = cur.fetchone()[0]
    finally:
        cur.close()
        return(ratings_and_offers_pkey)

In [8]:
def add_rows_prices(conn, ratings_and_offers_pkey, ratings_and_offers_id, prices):
    cur = conn.cursor()
    query = "ratings_and_offers_id == '{rao_id}'".format(rao_id = ratings_and_offers_id)
    filtered_prices = prices.query(query)
    price_rows_tuple = ()
    for i in range(len(filtered_prices)):
        price_row = (filtered_prices.iloc[i]['price'], filtered_prices.iloc[i]['option'], ratings_and_offers_pkey, filtered_prices.iloc[i]['price_string'])
        price_rows_tuple = (price_row,) + price_rows_tuple
    sql = """INSERT INTO prices (price, option, ratings_and_offers_pkey_foreign, price_string) VALUES (%s, %s, %s, %s)"""
    try:
        cur.executemany(sql, price_rows_tuple)
    except UniqueViolation:
        cur.close()
        return
    else:
        cur.close()

In [4]:
ratings_and_offers = pd.read_csv('data/ratings_and_offers.csv')
prices = pd.read_csv('data/prices.csv')

In [5]:
#if you make a mistake, execute following code afterwards
#cur.execute("rollback;")

In [6]:
params = config.config()
conn = psycopg2.connect(**params)

In [9]:
for i in range(len(ratings_and_offers)):
    product_id = ratings_and_offers.iloc[i]['product_id']
    ratings_and_offers_id = ratings_and_offers.iloc[i]['ratings_and_offers_id']
    ratings_and_offers_pkey = add_row_ratings_and_offers(conn, product_id, ratings_and_offers.iloc[i]['rating'], ratings_and_offers.iloc[i]['no_of_reviews'], ratings_and_offers.iloc[i]['offers'], ratings_and_offers.iloc[i]['date'])
    add_rows_prices(conn, ratings_and_offers_pkey, ratings_and_offers_id, prices)
print('DONE!')

DONE!


In [10]:
conn.commit()

In [11]:
conn.close()

1. pull new ulta data
2. query price for max date
3. compare new ulta price to price in table
4. if product is not in table, add product and its data to the three tables
5. if ratings and offers data has changed, add to the table
6. if price has changed, add to the table