# Title matching

Given the actual titles of books on bookshelves (manually entered), queries each of the book information sources for the titles that they return; this is used in order to actually determine whether a true match is made or not when some book information is obtained

Saves the information to the correct place.

## Imports

In [1]:
import csv
import os
import time

import shelfy
import shelfy.models.scraper

## Load a bookshelf

In [2]:
# Set directories
bookshelf_directory = shelfy.SHELFY_BASE_PATH + '/data/shelves/'
bookshelf_name = 'fantasy_0'
bookshelf_path = bookshelf_directory + bookshelf_name + '/titles'


# Output file paths
isbn_output_path = bookshelf_directory + bookshelf_name + '/isbns'
google_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_google'
goodreads_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_goodreads'
amazon_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon'



# Read book titles in from plain text file
book_titles = []
with open(bookshelf_path, 'r') as file_handle:
    for book_title in file_handle:
        book_titles.append(book_title.replace('\n', ''))
        
print(book_titles)

['1Q84', 'The Three Body Problem', 'Cryptonomicon', 'The Magicians', 'The Magician King', 'Good Omens', 'The Golem and the Jinni', 'Norse Mythology', 'The Halloween Tree', 'The Early Asimov', '1984', 'The Princess Bride', 'The Dragonbone Chair', 'Game of Thrones', 'Clash of Kings', 'Feast of Crows', 'Storm of Swords', 'A Dance with Dragons']


## Perform the queries

In [3]:
query_google_urls = [shelfy.models.scraper.get_google_search_url_from_query(book_title) for book_title in book_titles]

##### Write ISBN's to file

In [4]:
# Get the info

isbns = []
for i in range(len(query_google_urls)):
    print(i+1, '/', len(query_google_urls))
    
    isbn = None
    while isbn == None:
        isbn = shelfy.models.scraper.get_isbn10_from_google_search(query_google_urls[i])
    print(isbn)
    isbns.append(isbn)
    
        

1 / 18
0307476464
2 / 18
0765382032
3 / 18
0060512806
4 / 18
0452296293
5 / 18
0452298016
6 / 18
0060853980
7 / 18
0062110845
8 / 18
039360909X
9 / 18
0375803017
10 / 18
0385039794
11 / 18
0451524934
12 / 18
0156035219
13 / 18
0886773849
14 / 18
0553593714
15 / 18
0553579908
16 / 18
055358202X
17 / 18
034554398X
18 / 18
0553582011


In [5]:
# Write results to file

with open(isbn_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for isbn in isbns:
        writer.writerow([isbn])

##### Load ISBNs

In [6]:
# Load isbns
isbns = []
with open(isbn_output_path, 'r') as file_handle:
    reader = csv.reader(file_handle, delimiter = ',')
    for isbn in reader:
        isbns.append(isbn[0])
        
print(isbns)

['0307476464', '0765382032', '0060512806', '0452296293', '0452298016', '0060853980', '0062110845', '039360909X', '0375803017', '0385039794', '0451524934', '0156035219', '0886773849', '0553593714', '0553579908', '055358202X', '034554398X', '0553582011']


##### Google books

In [7]:
# Get the info
# Query:
# https://www.googleapis.com/books/v1/volumes?key=[google_books_api_key]&q=isbn:[isbn10]
# Key: AIzaSyBueagspvDe8R-prJ3bmqtEnr7fPTH10Xo

google_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_google_books_api(isbns[i])
            title = book_info['title']
            
            
        except:
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])

    google_titles.append(title)
    

0 / 17
	 1Q84 	 0307476464
1 / 17
	 The Three-Body Problem 	 0765382032
2 / 17
	 Cryptonomicon 	 0060512806
3 / 17
	 NONE 	 0452296293
4 / 17
	 The Magician King 	 0452298016
5 / 17
	 Good Omens 	 0060853980
6 / 17
	 The Golem and the Jinni 	 0062110845
7 / 17
	 Norse Mythology 	 039360909X
8 / 17
	 The Halloween tree 	 0375803017
9 / 17
	 NONE 	 0385039794
10 / 17
	 Nineteen Eighty-four 	 0451524934
11 / 17
	 The Princess Bride 	 0156035219
12 / 17
	 NONE 	 0886773849
13 / 17
	 A Game of Thrones 	 0553593714
14 / 17
	 NONE 	 0553579908
15 / 17
	 NONE 	 055358202X
16 / 17
	 A Storm of Swords 	 034554398X
17 / 17
	 A Dance with Dragons 	 0553582011


In [8]:
# Write results to file
with open(google_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], google_titles[i]])

##### Goodreads

In [9]:
# Get the info
# Query:
# https://www.googleapis.com/books/v1/volumes?key=[google_books_api_key]&q=isbn:[isbn10]
# Key: AIzaSyBueagspvDe8R-prJ3bmqtEnr7fPTH10Xo

goodreads_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_goodreads_api(isbns[i])
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    goodreads_titles.append(title)
    

0 / 17
	 いちきゅうはちよん [Ichi-Kyū-Hachi-Yon] 	 0307476464
1 / 17
	 三体 	 0765382032
2 / 17
	 Cryptonomicon 	 0060512806
3 / 17
	 The Magicians 	 0452296293
4 / 17
	 The Magician King 	 0452298016
5 / 17
	 Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch 	 0060853980
6 / 17
	 The Golem and the Jinni 	 0062110845
7 / 17
	 Norse Mythology 	 039360909X
8 / 17
	 The Halloween Tree 	 0375803017
9 / 17
	 The early Asimov or, Eleven Years of Trying 	 0385039794
10 / 17
	 Nineteen Eighty-Four 	 0451524934
11 / 17
	 The Princess Bride 	 0156035219
12 / 17
	 The Dragonbone Chair 	 0886773849
13 / 17
	 A Game of Thrones 	 0553593714
14 / 17
	 A Clash of Kings 	 0553579908
15 / 17
	 A Feast for Crows 	 055358202X
16 / 17
	 A Storm of Swords 	 034554398X
17 / 17
	 A Dance with Dragons 	 0553582011


In [10]:
# Write results to file
with open(goodreads_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], goodreads_titles[i]])

##### Amazon

In [11]:
# Get the info

amazon_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            time.sleep(1)
            num_attempts += 1
            book_info = shelfy.models.scraper.query_amazon_page(isbns[i])
            print(book_info)
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 3:
            break
        
    print('\t', title, '\t', isbns[i])
    amazon_titles.append(title)
    

0 / 17
{'title': '1Q84 (Vintage International)', 'authors': 'Haruki Murakami', 'publisher': ' Vintage; Reprint edition (January 22, 2013)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 1Q84 (Vintage International) 	 0307476464
1 / 17
{'title': 'The Three-Body Problem', 'authors': 'Cixin Liu', 'publisher': ' Tor Books (January 12, 2016)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 The Three-Body Problem 	 0765382032
2 / 17
{'title': 'Cryptonomicon', 'authors': 'Neal Stephenson', 'publisher': ' Avon Books (November 5, 2002)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 Cryptonomicon 	 0060512806
3 / 17
{'title': 'The Magicians: A Novel (Magicians Trilogy)', 'authors': 'Lev Grossman', 'publisher': ' Penguin Books (May 25, 2010)', 'isbn10': 'NONE', 'isbn13': 'NONE'}
	 The Magicians: A Novel (Magicians Trilogy) 	 0452296293
4 / 17
{'title': 'The Magician King: A Novel (Magicians Trilogy)', 'authors': 'Lev Grossman', 'publisher': ' Penguin Books; Reprint edition (May 29, 2012)', 'isbn10': 'NONE', 'isbn13': 'N

In [12]:
# Write results to file
with open(amazon_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], amazon_titles[i]])

In [13]:
print('asdf')

asdf


In [2]:
import datetime

datetime = datetime.datetime

print(type(datetime.now()))

<class 'datetime.datetime'>


In [3]:
print(datetime.datetime.now())

AttributeError: type object 'datetime.datetime' has no attribute 'datetime'

In [4]:
datetime.datetime

AttributeError: type object 'datetime.datetime' has no attribute 'datetime'