# Title matching

Given the actual titles of books on bookshelves (manually entered), queries each of the book information sources for the titles that they return; this is used in order to actually determine whether a true match is made or not when some book information is obtained

Saves the information to the correct place.

## Imports

In [1]:
import csv
import os
import time
import sys


sys.path.append('..')
import main

sys.path.append('../models')
import scraper

## Load a bookshelf

In [6]:
# Set directories
bookshelf_directory = main.SHELFY_BASE_PATH + '/data/shelves/'
bookshelf_name = 'home_6'
bookshelf_path = bookshelf_directory + bookshelf_name + '/titles'


# Output file paths
isbn_output_path = bookshelf_directory + bookshelf_name + '/isbns'
amazon_products_titles_output_path = bookshelf_directory + bookshelf_name + '/titles_amazon_products'



# Read book titles in from plain text file
book_titles = []
with open(bookshelf_path, 'r') as file_handle:
    for book_title in file_handle:
        book_titles.append(book_title.replace('\n', ''))
        
print(book_titles)

['The Astronomical Almanac 2002', 'The Early Universe Frontiers in Physics', 'Galactic Dynamics', 'The Physics of Stars', 'Galactic Astronomy', 'New Worlds, New Horizons in Astronomy and Astrophysics', 'Modern Cosmology', 'Introduction to Cosmology', 'New Worlds, New Horizons in Astronomy and Astrophysics', 'High Energy Astrophysics', 'Foundations of Astrophysics', '21st Century Astronomy', 'Classical Mechanics', 'Introduction to Quantum Mechanics', 'Introduction to Quantum Mechanics', 'Introduction to Electrodynamics', 'Classical Electrodynamics', 'Modern Quantum Mechanics', 'Modern Quantum Mechanics', 'Statistical Mechanics', 'Classical Dynamics of Particles and Systems', 'Condensed Matter Physics', 'Basic Training in Mathematics', 'Vector Calculus', 'Statistics, Data Mining, and Machine Learning in Astronomy', 'Hands-On Machine Learning with Scikit-Learn and TensorFlow', 'Effective C++ Third Edition', 'Engineering Problem Solving with C++', 'Lectures on Probability Theory and Mathem

## Perform the queries

In [7]:
query_google_urls = [scraper.get_google_search_url_from_query(book_title) for book_title in book_titles]

##### Write ISBN's to file

In [8]:
# Get the info

isbns = []
for i in range(len(query_google_urls)):
    print(i+1, '/', len(query_google_urls))
    
    isbn = None
    while isbn == None:
        isbn = scraper.get_isbn10_from_google_search(query_google_urls[i])
    print(isbn)
    isbns.append(isbn)
    
        

1 / 47
0118873156
2 / 47
0201626748
3 / 47
0691130272
4 / 47
0471987980
5 / 47
0691025657
6 / 47
0309157994
7 / 47
0122191412
8 / 47
0805389121
9 / 47
0309157994
10 / 47
0521756189
11 / 47
0321595580
12 / 47
0393603326
13 / 47
189138922X
14 / 47
1107179866
15 / 47
1107179866
16 / 47
1108420419
17 / 47
0977858278
18 / 47
0805382917
19 / 47
0805382917
20 / 47
0123821886
21 / 47
0534408966
22 / 47
0470617985
23 / 47
0306450364
24 / 47
1429215089
25 / 47
0691151687
26 / 47
1491962291
27 / 47
0321334876
28 / 47
B009DWN19I
29 / 47
1480215236
30 / 47
0977858278
31 / 47
0136020402
32 / 47
0306450364
33 / 47
0030105676
34 / 47
093570275X
35 / 47
0813350190
36 / 47
0201380277
37 / 47
1108420419
38 / 47
1421415658
39 / 47
1107028728
40 / 47
0123821886
41 / 47
0123821886
42 / 47
0486632288
43 / 47
047141526X
44 / 47
0521876222
45 / 47
0521876222
46 / 47
189138922X
47 / 47
1259587541


In [9]:
# Write results to file

with open(isbn_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for isbn in isbns:
        writer.writerow([isbn])

##### Load ISBNs

In [10]:
# Load isbns
isbns = []
with open(isbn_output_path, 'r') as file_handle:
    reader = csv.reader(file_handle, delimiter = ',')
    for isbn in reader:
        isbns.append(isbn[0])
        
print(isbns)

['0118873156', '0201626748', '0691130272', '0471987980', '0691025657', '0309157994', '0122191412', '0805389121', '0309157994', '0521756189', '0321595580', '0393603326', '189138922X', '1107179866', '1107179866', '1108420419', '0977858278', '0805382917', '0805382917', '0123821886', '0534408966', '0470617985', '0306450364', '1429215089', '0691151687', '1491962291', '0321334876', 'B009DWN19I', '1480215236', '0977858278', '0136020402', '0306450364', '0030105676', '093570275X', '0813350190', '0201380277', '1108420419', '1421415658', '1107028728', '0123821886', '0123821886', '0486632288', '047141526X', '0521876222', '0521876222', '189138922X', '1259587541']


##### Amazon Products API

In [11]:
# Get the info

amazon = scraper.get_amazon_object()
amazon_products_titles = []
for i in range(len(isbns)):
    print(i, '/', len(isbns) - 1)
    
    title = 'NONE'
    
    num_attempts = 0
    while title == 'NONE':
        try:
            num_attempts += 1
            book_info = scraper.query_amazon_products_api(isbns[i], amazon)
            print(book_info)
            title = book_info['title']
            
            
        except:
            print('\tfailed')
            pass
        
        if num_attempts > 10:
            break
        
    print('\t', title, '\t', isbns[i])
    amazon_products_titles.append(title)


0 / 46
({'isbn10': '0118873156', 'title': 'Astronomical Almanac for the Year 2002: Data for Astronomy, Space Sciences, Geodesy, Surveying, Navigation and Other Applications', 'authors': 'U. S. Naval Observatory', 'publisher': 'The Stationery Office/Tso'}, '1500')
	failed
({'isbn10': '0118873156', 'title': 'Astronomical Almanac for the Year 2002: Data for Astronomy, Space Sciences, Geodesy, Surveying, Navigation and Other Applications', 'authors': 'U. S. Naval Observatory', 'publisher': 'The Stationery Office/Tso'}, '1500')
	failed
({'isbn10': '0118873156', 'title': 'Astronomical Almanac for the Year 2002: Data for Astronomy, Space Sciences, Geodesy, Surveying, Navigation and Other Applications', 'authors': 'U. S. Naval Observatory', 'publisher': 'The Stationery Office/Tso'}, '1500')
	failed
({'isbn10': '0118873156', 'title': 'Astronomical Almanac for the Year 2002: Data for Astronomy, Space Sciences, Geodesy, Surveying, Navigation and Other Applications', 'authors': 'U. S. Naval Observ

	failed
	failed
({'isbn10': '1107179866', 'title': 'Introduction to Quantum Mechanics', 'authors': 'David J. Griffiths', 'publisher': 'Cambridge University Press'}, '3295')
	failed
	 NONE 	 1107179866
15 / 46
	failed
({'isbn10': '1108420419', 'title': 'Introduction to Electrodynamics', 'authors': 'David J. Griffiths', 'publisher': 'Cambridge University Press'}, '3860')
	failed
	failed
	failed
	 NONE 	 1108420419
16 / 46
({'isbn10': '0977858278', 'title': 'Classical Electrodynamics (Physics)', 'authors': 'Hans Ohanian', 'publisher': 'Jones & Bartlett Learning'}, '2233')
	failed
({'isbn10': '0977858278', 'title': 'Classical Electrodynamics (Physics)', 'authors': 'Hans Ohanian', 'publisher': 'Jones & Bartlett Learning'}, None)
	failed
({'isbn10': '0977858278', 'title': 'Classical Electrodynamics (Physics)', 'authors': 'Hans Ohanian', 'publisher': 'Jones & Bartlett Learning'}, '2233')
	failed
	failed
	 NONE 	 0977858278
17 / 46
({'isbn10': '0805382917', 'title': 'Modern Quantum Mechanics (

	failed
({'isbn10': '0813350190', 'title': 'An Introduction To Quantum Field Theory, Student Economy Edition (Frontiers in Physics)', 'authors': 'Michael Peskin', 'publisher': 'CRC Press'}, None)
	failed
({'isbn10': '0813350190', 'title': 'An Introduction To Quantum Field Theory, Student Economy Edition (Frontiers in Physics)', 'authors': 'Michael Peskin', 'publisher': 'CRC Press'}, None)
	failed
({'isbn10': '0813350190', 'title': 'An Introduction To Quantum Field Theory, Student Economy Edition (Frontiers in Physics)', 'authors': 'Michael Peskin', 'publisher': 'CRC Press'}, None)
	failed
	 NONE 	 0813350190
35 / 46
	failed
({'isbn10': '0201380277', 'title': 'An Introduction to Thermal Physics', 'authors': 'Daniel V. Schroeder', 'publisher': 'Pearson'}, '4485')
	failed
({'isbn10': '0201380277', 'title': 'An Introduction to Thermal Physics', 'authors': 'Daniel V. Schroeder', 'publisher': 'Pearson'}, '4485')
	failed
	failed
	 NONE 	 0201380277
36 / 46
	failed
	failed
({'isbn10': '1108420

In [12]:
# Write results to file
with open(amazon_products_titles_output_path, 'w') as file_handle:
    writer = csv.writer(file_handle, delimiter = ',')
    for i in range(len(isbns)):
        writer.writerow([isbns[i], amazon_products_titles[i]])