In [14]:
import requests
import pandas as pd
import json
import psycopg2 as ps
import os

In [15]:
load_dir = "initial_load"

In [16]:
def list_csv_files(directory):
    csv_files = []
    for file in os.listdir(directory):
        if file.endswith(".csv"):
            csv_files.append(file)
    return csv_files

In [17]:
def create_df_from_csv(directory, csv_files):
    data_path = directory + '/'

    df = {}
    for file in csv_files:
        try:
            df[file] = pd.read_csv(data_path + file)
        except UnicodeDecodeError:
            df[file] = pd.read_csv(data_path + file, encoding="ISO-8859-1")
    return df


In [8]:
keys = json.load(open('keys.json'))
API_KEY = keys['API_KEY']


In [9]:
page_size = '10'
url = 'https://rebrickable.com/api/v3/lego/sets/?key=' + API_KEY + '&page_size=' + page_size
response = requests.get(url)
results = response.json()['results']
df = pd.json_normalize(results)
df.head()

Unnamed: 0,set_num,name,year,theme_id,num_parts,set_img_url,set_url,last_modified_dt
0,001-1,Gears,1965,1,43,https://cdn.rebrickable.com/media/sets/001-1/1...,https://rebrickable.com/sets/001-1/gears/,2018-05-05T20:39:47.277922Z
1,0011-2,Town Mini-Figures,1979,67,12,https://cdn.rebrickable.com/media/sets/0011-2/...,https://rebrickable.com/sets/0011-2/town-mini-...,2021-07-04T19:03:52.273186Z
2,0011-3,Castle 2 for 1 Bonus Offer,1987,199,0,https://cdn.rebrickable.com/media/sets/0011-3/...,https://rebrickable.com/sets/0011-3/castle-2-f...,2012-04-01T04:47:31.488559Z
3,0012-1,Space Mini-Figures,1979,143,12,https://cdn.rebrickable.com/media/sets/0012-1/...,https://rebrickable.com/sets/0012-1/space-mini...,2013-12-12T23:12:14.245364Z
4,0013-1,Space Mini-Figures,1979,143,12,https://cdn.rebrickable.com/media/sets/0013-1/...,https://rebrickable.com/sets/0013-1/space-mini...,2013-11-08T20:55:48.506908Z


In [None]:
def connect_to_db(host_name, dbname, username, password, port):
    try:
        conn = ps.connect(host=host_name, database=dbname, user=username, password=password, port=port)
    except ps.OperationalError as err:
        raise err
    else:
        print('Connected!')
        return conn

In [None]:
def create_table(curr):
    create_table_command = ("""CREATE TABLE IF NOT EXISTS sets (
                    set_num VARCHAR(255) PRIMARY KEY,
                    name TEXT,
                    year INTEGER,
                    theme_id INTEGER,
                    num_parts INTEGER,
                    set_img_url TEXT,
    )""")

    curr.execute(create_table_command)

In [None]:
def check_if_row_exists(curr, set_num):
    query = ("""SELECT set_num FROM sets WHERE set_num = %s""")
    curr.execute(query, (set_num,))

    return curr.fetchone() is not None

In [None]:
def update_row(curr, name, year, theme_id, num_parts, set_img_url, set_url, last_modified_dt):
    query = ("""UPDATE set_num
            SET name = %s,
                year = %s,
                theme_id = %s,
                num_parts = %s,
                set_img_url = %s,
            WHERE set_num = %s""")

    vars_to_update = (name, year, theme_id, num_parts, set_img_url, set_url, last_modified_dt)
    curr.execute(query, vars_to_update)

In [None]:
def update_db(curr, df):
    temp_df = pd.DataFrame(columns=['set_num', 'name', 'year', 'theme_id', 'num_parts', 'set_img_url'])

    for i, row in df.iterrows():
        if check_if_row_exists(curr, row['set_num']):
            update_row(curr, row['name'], row['year'], row['theme_id'], row['num_parts'], row['set_img_url'])
        else:
            temp_df = temp_df.append(row)
    return temp_df

In [None]:
def insert_into_table(curr, set_num, name, year, theme_id, num_parts, set_img_url, set_url, last_modified_dt):
    insert_into_sets = (
        """INSERT INTO sets (set_num, name, year, theme_id, num_parts, set_img_url) VALUES(%s, %s, %s, %s, %s, %s)""")

    rows_to_insert = (set_num, name, year, theme_id, num_parts,
                      set_img_url, set_url, last_modified_dt)
    curr.execute(insert_into_sets, rows_to_insert)


In [None]:
def append_from_df_to_db(curr, df):
    for i, row in df.iterrows():
        insert_into_table(curr,row['set_num'], row['name'], row['year'], row['theme_id'], row['num_parts'], row['set_img_url'])


In [18]:
connection_data = json.load(open('connection_data.json'))
host_name = connection_data['host_name']
dbname = connection_data['dbname']
username = connection_data['username']
password = connection_data['password']
port = connection_data['port']
conn = None

conn = connect_to_db(host_name, dbname, username, password, port)

NameError: name 'connect_to_db' is not defined

In [None]:
curr = conn.cursor()

In [None]:
create_table(curr)

In [None]:
new_set_df = update_db(curr, df)

In [None]:
append_from_df_to_db(curr,new_set_df)

In [None]:
conn.commit()