# Imports

In [1]:
import csv
import json
import sys


import numpy


from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2


import shelfy

##### Create SQL DB connection

In [2]:
# Define a database name (we're using a dataset on births, so we'll call it birth_db)
# Set your postgres username/password, and connection specifics
username = 'postgres'
password = 'password'     # change this
host     = 'localhost'
port     = '5432'            # default port that postgres listens on
db_name  = 'book_info'




## 'engine' is a connection to a database
## Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine( 'postgresql://{}:{}@{}:{}/{}'.format(username, password, host, port, db_name) )
print(engine.url)


## create a database (if it doesn't exist)
if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))



# Create connection and cursor object to insert info into db
con = psycopg2.connect(database = db_name, user = username, password = password, host = host)
cursor = con.cursor()

postgresql://postgres:password@localhost:5432/book_info
True


##### Create titles table

In [3]:
# Create the tables (if don't exist)
cursor.execute('''CREATE TABLE IF NOT EXISTS works (
                index BIGSERIAL PRIMARY KEY,
                titles TEXT);''')

cursor.execute('''CREATE TABLE IF NOT EXISTS editions (
                index BIGSERIAL PRIMARY KEY,
                titles TEXT);''')

cursor.execute('''CREATE TABLE IF NOT EXISTS authors (
                index BIGSERIAL PRIMARY KEY,
                authors TEXT);''')

cursor.execute('''CREATE TABLE IF NOT EXISTS publishers (
                index BIGSERIAL PRIMARY KEY,
                publishers TEXT);''')


# Have to commit the table creation
con.commit()

##### Fill titles

In [None]:
# Works
command = '''INSERT INTO works (titles) VALUES (%s);'''


titles_path = shelfy.SHELFY_BASE_PATH + '/raw_data/dumps/' + 'ol_dump_works_2017-12-31.txt'


num_fails = 0
with open(titles_path, 'r') as file_handle:
    
    for row in file_handle:
        try:
            title = json.loads(row.split('\t')[-1])['title']
            cursor.execute(command, (title,))
            con.commit()
        except:
            num_fails += 1
            print('failed', num_fails)
            pass

failed 1
failed 2
failed 3
failed 4
failed 5
failed 6


##### Fill editions

In [None]:
# Editions

command = '''INSERT INTO editions (titles) VALUES (%s);'''


titles_path = shelfy.SHELFY_BASE_PATH + '/raw_data/dumps/' + 'ol_dump_editions_2017-12-31.txt'





num_fails = 0
with open(titles_path, 'r') as file_handle:
    # Open reader object to parse file
    reader = csv.reader(file_handle, delimiter = '\t', quoting=csv.QUOTE_NONE)
    
    for row in reader:
        try:
            title = json.loads(row[4])['title']
            cursor.execute(command, (title,))
            con.commit()
        except:
            num_fails += 1
            print('failed', num_fails)
            pass


#####

In [None]:
file_name = '../data/jsondump.json'

table_name = 'titles'

batch_size = 10000

with open(file_name, 'r') as file_handle:

    ix = 0
    while ix < 10:
        titles = []
        subtitles = [] 
        authors = []
        publishers = []
        isbn10s = []
        isbns13 = []

        print('starting a new block, num_blocks = ', ix)
        ix += 1

        for i in range(batch_size):

            data = next(file_handle)
            json_data = json.loads(data)

            keys = json_data.keys()

            

            if json_data['type']['key'] == '/type/edition':


                print(json_data)

                if 'isbn_13' in keys:
                    isbn_13 = json_data['isbn_13'][0]
                else:
                    isbn_13 = random_isbn()


                if 'isbn_10' in keys:
                    isbn_10 = json_data['isbn_10'][0]
                else:
                    isbn_10 = 'NULL'


                if 'title' in keys:
                    title = json_data['title']
                else:
                    title = 'NULL'

                if 'subtitle' in keys:
                    subtitle = json_data['subtitle']
                else:
                    subtitle = 'NULL'

                if 'publishers' in keys:
                    publisher = json_data['publishers'][0]
                else:
                    publisher = []



                command = '''
                INSERT INTO titles (isbn_13, isbn_10, title, subtitle, publisher) VALUES (%s, %s, %s, %s, %s);
                '''

                try:
                    #cursor.execute(command, (isbn_13, isbn_10, title, subtitle, publisher))
                    pass
                except Exception as e:
                    print(str(e))
                    print('error!', ix)
                    #con.commit()
                    
                    
                    

        con.commit()



## Create titles_2 database

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS works (
                work_key text primary key,
                title text
               );''')

con.commit()

In [None]:
file_name = '../data/ol_dump_works_2017-12-31.txt'

In [None]:
with open(file_name) as file_handle:

    while True:
        line = next(file_handle)
        json_start = line.find('{')
        json_data = json.loads(line[json_start:])
        
        
        try:
            title = json_data['title']
            work_key = json_data['key'].split('/')[-1]
    
            command = '''
            INSERT INTO works (work_key, title) VALUES (%s, %s)
            '''
        
            cursor.execute(command, (work_key, title))
            con.commit()
        except Exception as e:
            print('error')
            print(str(e))
            con.commit()
            

In [None]:
print('asdf')

# Create authors database

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS authors (
                author_id text primary key,
                author_name text
               );''')

con.commit()

In [None]:
file_name = '../data/ol_dump_authors_2017-12-31.txt'

with open(file_name) as file_handle:
    while True:
        line = next(file_handle)
        author_id = line.split('/authors/')[-1].split(' ')[0].replace('",', '')
        author_name = line.split('{"name": ')[-1].split(',')[0].replace('"','')
        
        command = '''
        INSERT INTO authors (author_id, author_name) VALUES (%s, %s);
        '''
        
        cursor.execute(command, (author_id, author_name))
        con.commit()

In [None]:
cursor.execute('''CREATE TABLE IF NOT EXISTS titles_authors (
                isbn_13 text primary key,
                author_id text
               );''')

con.commit()

# Test queries

##### Titles

In [None]:
cursor.execute('''SELECT isbn_10 FROM titles;''')
titles = cursor.fetchall()
titles = [title[0] for title in titles]
titles = [title.lower() for title in titles]

In [None]:
for title in titles:
    if ('' in title):
        print(title)

##### Authors

In [None]:
cursor.execute('''SELECT author_name FROM authors;''')
authors = cursor.fetchall()
authors = [author[0] for author in authors]

In [None]:
for author in authors:
    if('randon' in author) and ('anderson' in author):
        print(author)

##### isbn-10

In [None]:
cursor.execute('''SELECT isbn_10 FROM titles;''')
titles = cursor.fetchall()
titles = [title[0] for title in titles]
titles = [title.lower() for title in titles]

In [None]:
for title in titles:
    if '0886773849' in title:
        print(title)