# Title matching

Given the actual titles of books on bookshelves (manually entered), queries each of the book information sources for the titles that they return; this is used in order to actually determine whether a true match is made or not when some book information is obtained

Saves the information to the correct place.

## Imports

In [1]:
import csv
import os
import time

import shelfy
import shelfy.models.scraper

## Load a bookshelf

In [2]:
# Set directories
bookshelf_directory = shelfy.SHELFY_BASE_PATH + '/data/shelves/'
bookshelf_name = 'home_4'
bookshelf_path = bookshelf_directory + bookshelf_name + '/titles'


# Output file paths
isbn_output_path = bookshelf_directory + bookshelf_name + '/isbns'
google_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_google'
goodreads_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_goodreads'
amazon_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon'



# Read book titles in from plain text file
book_titles = []
with open(bookshelf_path, 'r') as file_handle:
    for book_title in file_handle:
        book_titles.append(book_title.replace('\n', ''))
        
print(book_titles)

["The Handmaid's Tale", 'The Blind Assassin', 'The Poisonwood Bible', 'Animal Farm', 'The Doomsday Book', 'The Cider House Rules', 'Stardust', 'The Marriage Plot', 'The World According to Garp', 'I Sing the Body Electric!', 'The Iliad', 'The Adventures of Tom Sawyer', 'King John', 'The Declaration of Independence and Other Great Documents of American History', 'The Killer Angels', 'Dracula', 'Spin', 'To Say Nothing of the Dog', 'Red Mars', 'Something Wicked This Way Comes', "Ender's Game", 'The Fall of Hyperion', 'Endymion', 'The Rise of Endymion', 'Dune', 'The Time Machine', '2001 A Space Odyssey', 'The Martian Chronicles', 'The Difference Engine', 'Contact']


## Perform the queries

In [3]:
query_google_urls = [shelfy.models.scraper.get_google_search_url_from_query(book_title) for book_title in book_titles]

##### Write ISBN's to file

In [4]:
# Get the info

isbns = []
for i in range(len(query_google_urls)):
    print(i+1, '/', len(query_google_urls))
    
    isbn = None
    while isbn == None:
        isbn = shelfy.models.scraper.get_isbn10_from_google_search(query_google_urls[i])
    print(isbn)
    isbns.append(isbn)
    
        

1 / 30
1480560103
2 / 30
0385720955
3 / 30
0060786507
4 / 30
0451526341
5 / 30
0553562738
6 / 30
0345387651
7 / 30
0061689246
8 / 30
125001476X
9 / 30
034536676X
10 / 30
0380789620
11 / 30
0140275363
12 / 30
0486291561
13 / 30
1605988855
14 / 30
0486411249
15 / 30
0345348109
16 / 30
185326086X
17 / 30
076534825X
18 / 30
0553575384
19 / 30
0553560735
20 / 30
0380729407
21 / 30
0812550706
22 / 30
0553288202
23 / 30
0553572946
24 / 30
0553572989
25 / 30
0441172717
26 / 30
1580493807
27 / 30
0451457994
28 / 30
1451678193
29 / 30
1501245945
30 / 30
0671004107


In [5]:
# Write results to file

with open(isbn_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for isbn in isbns:
        writer.writerow([isbn])

##### Load ISBNs

In [6]:
# Load isbns
isbns = []
with open(isbn_output_path, 'r') as file_handle:
    reader = csv.reader(file_handle, delimiter = ',')
    for isbn in reader:
        isbns.append(isbn[0])
        
print(isbns)

['1480560103', '0385720955', '0060786507', '0451526341', '0553562738', '0345387651', '0061689246', '125001476X', '034536676X', '0380789620', '0140275363', '0486291561', '1605988855', '0486411249', '0345348109', '185326086X', '076534825X', '0553575384', '0553560735', '0380729407', '0812550706', '0553288202', '0553572946', '0553572989', '0441172717', '1580493807', '0451457994', '1451678193', '1501245945', '0671004107']


##### Google books

In [7]:
# Get the info
# Query:
# https://www.googleapis.com/books/v1/volumes?key=[google_books_api_key]&q=isbn:[isbn10]
# Key: AIzaSyBueagspvDe8R-prJ3bmqtEnr7fPTH10Xo

google_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_google_books_api(isbns[i])
            title = book_info['title']
            
            
        except:
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])

    google_titles.append(title)
    

0 / 29
	 NONE 	 1480560103
1 / 29
	 The Blind Assassin 	 0385720955
2 / 29
	 The Poisonwood Bible 	 0060786507
3 / 29
	 Animal Farm 	 0451526341
4 / 29
	 Doomsday Book Book Discussion Kit 	 0553562738
5 / 29
	 The Cider House Rules 	 0345387651
6 / 29
	 Stardust 	 0061689246
7 / 29
	 The Marriage Plot 	 125001476X
8 / 29
	 The World According to Garp 	 034536676X
9 / 29
	 I Sing the Body Electric 	 0380789620
10 / 29
	 The Iliad 	 0140275363
11 / 29
	 Tom Sawyer 	 0486291561
12 / 29
	 King John 	 1605988855
13 / 29
	 The Declaration of Independence and Other Great Documents of American History, 1775-1864 	 0486411249
14 / 29
	 The Killer Angels /cby Michael Shaara ; Maps by Don Pitcher 	 0345348109
15 / 29
	 Dracula 	 185326086X
16 / 29
	 Spin 	 076534825X
17 / 29
	 To Say Nothing of the Dog 	 0553575384
18 / 29
	 Red Mars and Green Mars 	 0553560735
19 / 29
	 Something Wicked This Way Comes 	 0380729407
20 / 29
	 Ender's Game 	 0812550706
21 / 29
	 The Fall of Hyperion 	 0553288202
22

In [8]:
# Write results to file
with open(google_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], google_titles[i]])

##### Goodreads

In [9]:
# Get the info
# Query:
# https://www.googleapis.com/books/v1/volumes?key=[google_books_api_key]&q=isbn:[isbn10]
# Key: AIzaSyBueagspvDe8R-prJ3bmqtEnr7fPTH10Xo

goodreads_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_goodreads_api(isbns[i])
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    goodreads_titles.append(title)
    

0 / 29
	 The Handmaid's Tale 	 1480560103
1 / 29
	 The Blind Assassin 	 0385720955
2 / 29
	 The Poisonwood Bible 	 0060786507
3 / 29
	 Animal Farm: A Fairy Story 	 0451526341
4 / 29
	 Doomsday Book 	 0553562738
5 / 29
	 The Cider House Rules 	 0345387651
6 / 29
	 Stardust 	 0061689246
7 / 29
	 The Marriage Plot 	 125001476X
8 / 29
	 The World According to Garp 	 034536676X
9 / 29
	 I Sing the Body Electric!  	 0380789620
10 / 29
	 Ἰλιάς 	 0140275363
11 / 29
	 The Adventures of Tom Sawyer 	 0486291561
12 / 29
	 NONE 	 1605988855
13 / 29
	 The Declaration of Independence and Other Great Documents of American History 1775-1865 	 0486411249
14 / 29
	 The Killer Angels 	 0345348109
15 / 29
	 Dracula 	 185326086X
16 / 29
	 Spin 	 076534825X
17 / 29
	 To Say Nothing of the Dog 	 0553575384
18 / 29
	 Red Mars 	 0553560735
19 / 29
	 Something Wicked This Way Comes 	 0380729407
20 / 29
	 Ender's Game 	 0812550706
21 / 29
	 The Fall of Hyperion 	 0553288202
22 / 29
	 Endymion 	 0553572946
23 / 29

In [10]:
# Write results to file
with open(goodreads_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], goodreads_titles[i]])

##### Amazon

In [11]:
# Get the info

amazon_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_amazon_page(isbns[i])
            print(book_info)
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    amazon_titles.append(title)
    

0 / 29
{'title': "The Handmaid's Tale (The Classic Collection)", 'authors': 'Margaret Atwood', 'publisher': ' Brilliance Audio; Unabridged edition (July 22, 2014)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 The Handmaid's Tale (The Classic Collection) 	 1480560103
1 / 29
{'title': 'The Blind Assassin: A Novel', 'authors': 'Margaret Atwood', 'publisher': ' Anchor (August 28, 2001)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 The Blind Assassin: A Novel 	 0385720955
2 / 29
{'title': 'The Poisonwood Bible: A Novel', 'authors': 'Barbara Kingsolver', 'publisher': ' Harper Perennial Modern Classics; 1st edition (May 31, 2005)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 The Poisonwood Bible: A Novel 	 0060786507
3 / 29
{'title': 'Animal farm: A Fairy Story', 'authors': 'George Orwell', 'publisher': ' Signet; 50th Anniversary edition (1996)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 Animal farm: A Fairy Story 	 0451526341
4 / 29
{'title': 'Doomsday Book', 'authors': 'Connie Willis', 'publisher': ' Spectra (Au

In [12]:
# Write results to file
with open(amazon_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], amazon_titles[i]])

In [13]:
import IPython
IPython.display.Audio(shelfy.SHELFY_BASE_PATH + '/beep.mp3', autoplay = True)

### Write results to SQL database

In [None]:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

In [None]:
# Create database and table if not exist
# Define a database name (we're using a dataset on births, so we'll call it birth_db)
# Set your postgres username/password, and connection specifics
username = 'postgres'
password = 'password'     # change this
host     = 'localhost'
port     = '5432'            # default port that postgres listens on
db_name  = 'shelves'




## 'engine' is a connection to a database
## Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine( 'postgresql://{}:{}@{}:{}/{}'.format(username, password, host, port, db_name) )
print(engine.url)






## create a database (if it doesn't exist)
if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))



# Create connection and cursor object to insert info into db
con = psycopg2.connect(database = db_name, user = username, password = password, host = host)
cursor = con.cursor()

##### Load shelves

In [None]:
home_shelves = ['home_' + str(i) for i in range(6)]
insight_shelves = ['insight_' + str(i) for i in range(8)]

print(home_shelves)
print(insight_shelves)

In [None]:
bookshelf_name = 'home_0'


# Set directories
bookshelf_directory = shelfy.SHELFY_BASE_PATH + '/data/shelves/'
bookshelf_path = bookshelf_directory + bookshelf_name + '/titles'


# Output file paths
isbn_output_path = bookshelf_directory + bookshelf_name + '/isbns'
google_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_google'
goodreads_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_goodreads'
amazon_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon'



# Read book titles in from plain text file
book_titles = []
with open(bookshelf_path, 'r') as file_handle:
    for book_title in file_handle:
        book_titles.append(book_title.replace('\n', ''))
        
print(book_titles)

# Load isbns for shelf
isbns = []
with open(isbn_output_path, 'r') as file_handle:
    reader = csv.reader(file_handle, delimiter = ',')
    for isbn in reader:
        isbns.append(isbn[0])
        
print(isbns)

In [None]:
# Create the table (if desn't exist)
cursor.execute('''CREATE TABLE IF NOT EXISTS %s (
                isbn_10 char(10) primary key,
                title text,
                title_amazon text,
                title_goodreads text,
                title_google text,
                instances int
               );''')

# Have to commit the table creation
con.commit()


# Insert command, tailor as needed
command = '''
            INSERT INTO works (work_key, title) VALUES (%s, %s)
            '''
        
cursor.execute(command, (work_key, title))
con.commit()