# Title matching

Given the actual titles of books on bookshelves (manually entered), queries each of the book information sources for the titles that they return; this is used in order to actually determine whether a true match is made or not when some book information is obtained

Saves the information to the correct place.

## Imports

In [None]:
import csv
import os
import time

import shelfy
import shelfy.models.scraper

## Load a bookshelf

In [None]:
# Set directories
bookshelf_directory = shelfy.SHELFY_BASE_PATH + '/data/shelves/'
bookshelf_name = 'home_6'
bookshelf_path = bookshelf_directory + bookshelf_name + '/titles'


# Output file paths
isbn_output_path = bookshelf_directory + bookshelf_name + '/isbns'
google_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_google'
goodreads_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_goodreads'
amazon_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon'
amazon_products_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon_products'



# Read book titles in from plain text file
book_titles = []
with open(bookshelf_path, 'r') as file_handle:
    for book_title in file_handle:
        book_titles.append(book_title.replace('\n', ''))
        
print(book_titles)

## Perform the queries

query_google_urls = [shelfy.models.scraper.get_google_search_url_from_query(book_title) for book_title in book_titles]

##### Write ISBN's to file

# Get the info

isbns = []
for i in range(len(query_google_urls)):
    print(i+1, '/', len(query_google_urls))
    
    isbn = None
    while isbn == None:
        isbn = shelfy.models.scraper.get_isbn10_from_google_search(query_google_urls[i])
    print(isbn)
    isbns.append(isbn)
    
        

# Write results to file

with open(isbn_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for isbn in isbns:
        writer.writerow([isbn])

##### Load ISBNs

In [None]:
# Load isbns
isbns = []
with open(isbn_output_path, 'r') as file_handle:
    reader = csv.reader(file_handle, delimiter = ',')
    for isbn in reader:
        isbns.append(isbn[0])
        
print(isbns)

##### Google books

In [None]:
# Get the info
# Query:
# https://www.googleapis.com/books/v1/volumes?key=[google_books_api_key]&q=isbn:[isbn10]
# Key: AIzaSyBueagspvDe8R-prJ3bmqtEnr7fPTH10Xo

google_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_google_books_api(isbns[i])
            title = book_info['title']
            
            
        except:
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])

    google_titles.append(title)
    

In [None]:
# Write results to file
with open(google_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], google_titles[i]])

##### Goodreads

In [None]:
# Get the info
# Query:
# https://www.googleapis.com/books/v1/volumes?key=[google_books_api_key]&q=isbn:[isbn10]
# Key: AIzaSyBueagspvDe8R-prJ3bmqtEnr7fPTH10Xo

goodreads_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_goodreads_api(isbns[i])
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    goodreads_titles.append(title)
    

In [None]:
# Write results to file
with open(goodreads_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], goodreads_titles[i]])

##### Amazon

In [None]:
# Get the info

amazon_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_amazon_page(isbns[i])
            print(book_info)
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    amazon_titles.append(title)
    

In [None]:
# Write results to file
with open(amazon_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], amazon_titles[i]])

##### Amazon Products API

In [None]:
# Get the info

amazon = shelfy.models.scraper.get_amazon_object()
amazon_products_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            num_attempts += 1
            book_info = shelfy.models.scraper.query_amazon_products_api(isbns[i], amazon)
            print(book_info)
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    amazon_products_titles.append(title)


In [None]:
# Write results to file
with open(amazon_products_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], amazon_products_titles[i]])

In [None]:
import IPython
IPython.display.Audio(shelfy.SHELFY_BASE_PATH + '/beep.mp3', autoplay = True)

### Write results to SQL database

In [None]:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

In [None]:
# Create database and table if not exist
# Define a database name (we're using a dataset on births, so we'll call it birth_db)
# Set your postgres username/password, and connection specifics
username = 'postgres'
password = 'password'     # change this
host     = 'localhost'
port     = '5432'            # default port that postgres listens on
db_name  = 'shelves'




## 'engine' is a connection to a database
## Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine( 'postgresql://{}:{}@{}:{}/{}'.format(username, password, host, port, db_name) )
print(engine.url)






## create a database (if it doesn't exist)
if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))



# Create connection and cursor object to insert info into db
con = psycopg2.connect(database = db_name, user = username, password = password, host = host)
cursor = con.cursor()

##### Load shelves

In [None]:
home_shelves = ['home_' + str(i) for i in range(6)]
insight_shelves = ['insight_' + str(i) for i in range(8)]

print(home_shelves)
print(insight_shelves)

In [None]:
bookshelf_name = 'home_0'


# Set directories
bookshelf_directory = shelfy.SHELFY_BASE_PATH + '/data/shelves/'
bookshelf_path = bookshelf_directory + bookshelf_name + '/titles'


# Output file paths
isbn_output_path = bookshelf_directory + bookshelf_name + '/isbns'
google_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_google'
goodreads_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_goodreads'
amazon_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon'



# Read book titles in from plain text file
book_titles = []
with open(bookshelf_path, 'r') as file_handle:
    for book_title in file_handle:
        book_titles.append(book_title.replace('\n', ''))
        
print(book_titles)

# Load isbns for shelf
isbns = []
with open(isbn_output_path, 'r') as file_handle:
    reader = csv.reader(file_handle, delimiter = ',')
    for isbn in reader:
        isbns.append(isbn[0])
        
print(isbns)

In [None]:
# Create the table (if desn't exist)
cursor.execute('''CREATE TABLE IF NOT EXISTS %s (
                isbn_10 char(10) primary key,
                title text,
                title_amazon text,
                title_goodreads text,
                title_google text,
                instances int
               );''')

# Have to commit the table creation
con.commit()


# Insert command, tailor as needed
command = '''
            INSERT INTO works (work_key, title) VALUES (%s, %s)
            '''
        
cursor.execute(command, (work_key, title))
con.commit()