# Imports

In [None]:
import json
import numpy
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

# Create SQL DB connection

In [None]:
# Define a database name (we're using a dataset on births, so we'll call it birth_db)
# Set your postgres username/password, and connection specifics
username = 'postgres'
password = 'password'     # change this
host     = 'localhost'
port     = '5432'            # default port that postgres listens on
db_name  = 'books'




## 'engine' is a connection to a database
## Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine( 'postgresql://{}:{}@{}:{}/{}'.format(username, password, host, port, db_name) )
print(engine.url)






## create a database (if it doesn't exist)
if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))




con = psycopg2.connect(database = db_name, user = username, password = password, host = host)
cursor = con.cursor()

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS titles (
                isbn_13 text primary key,
                isbn_10 text,
                title text,
                subtitle text,
                publisher text
               );''')

con.commit()

# Create titles database

In [4]:
import random
import string
N = 10
def random_isbn():
    return 'XXX'+''.join(random.choices(string.ascii_uppercase + string.digits, k=N))

In [5]:
file_name = '../data/jsondump.json'

table_name = 'titles'

batch_size = 10000

with open(file_name, 'r') as file_handle:

    ix = 0
    while True:
        titles = []
        subtitles = [] 
        authors = []
        publishers = []
        isbn10s = []
        isbns13 = []

        print('starting a new block, num_blocks = ', ix)
        ix += 1

        for i in range(batch_size):

            data = next(file_handle)
            json_data = json.loads(data)

            keys = json_data.keys()



            if json_data['type']['key'] == '/type/edition':




                if 'isbn_13' in keys:
                    isbn_13 = json_data['isbn_13'][0]
                else:
                    isbn_13 = random_isbn()


                if 'isbn_10' in keys:
                    isbn_10 = json_data['isbn_10'][0]
                else:
                    isbn_10 = 'NULL'


                if 'title' in keys:
                    title = json_data['title']
                else:
                    title = 'NULL'

                if 'subtitle' in keys:
                    subtitle = json_data['subtitle']
                else:
                    subtitle = 'NULL'

                if 'publishers' in keys:
                    publisher = json_data['publishers'][0]
                else:
                    publisher = []



                command = '''
                INSERT INTO titles (isbn_13, isbn_10, title, subtitle, publisher) VALUES (%s, %s, %s, %s, %s);
                '''

                try:
                    cursor.execute(command, (isbn_13, isbn_10, title, subtitle, publisher))
                except Exception as e:
                    print(str(e))
                    print('error!', ix)
                    con.commit()
                    
                    
                    

        con.commit()



starting a new block, num_blocks =  0
starting a new block, num_blocks =  1
starting a new block, num_blocks =  2
starting a new block, num_blocks =  3
starting a new block, num_blocks =  4
starting a new block, num_blocks =  5
starting a new block, num_blocks =  6
starting a new block, num_blocks =  7
starting a new block, num_blocks =  8
starting a new block, num_blocks =  9
starting a new block, num_blocks =  10
starting a new block, num_blocks =  11
duplicate key value violates unique constraint "titles_pkey"
DETAIL:  Key (isbn_13)=(9780201712773) already exists.

error! 12
starting a new block, num_blocks =  12
starting a new block, num_blocks =  13
starting a new block, num_blocks =  14
duplicate key value violates unique constraint "titles_pkey"
DETAIL:  Key (isbn_13)=(9780738811529) already exists.

error! 15
starting a new block, num_blocks =  15
duplicate key value violates unique constraint "titles_pkey"
DETAIL:  Key (isbn_13)=(9780373751693) already exists.

error! 16
dupli

IndexError: list index out of range

# Create authors database

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS authors (
                author_id text primary key,
                author_last_name,
                author_first_name,
               );''')

con.commit()

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS titles_authors (
                isbn_13 text primary key,
                author_id text
               );''')

con.commit()