In [6]:
import pandas as pd
import requests
import os
import re
import json

In [7]:
books_df = pd.read_csv('./Books.txt', header=None, names=['author_name','book_name'])

In [13]:
SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q=intitle:%s+inauthor:%s"
LOCAL_FOLDER = "./google_books_data/"

In [14]:
def clean_name(name):
    name = name.replace('.', ' ').replace(',', ' ').lower().strip()
    return re.sub(' +', ' ', name)


def filter_book(resp, name, author):

    if resp.get('totalItems', 0) == 0:
        print('%%%%%.  NO RESULT FOUND %%%%%%')
        return {}

    result = list()
    author_result = list()
    for book in resp['items']:
        info = book.get('volumeInfo', {})
        #print("\n\n\n", info)
#         print( clean_name(name))
#         print(clean_name(info['title']))
#         print('\n')
        if clean_name(info['title']) == clean_name(name):
            result.append(book)
        #print([creator.lower().strip() for creator in info['authors']], author.lower().strip())
        if any([clean_name(creator) == clean_name(author) for creator in info.get('authors', [])]):
            author_result.append(book)
        
    if result:
        return result
    elif author_result:
        return author_result
    else:
        print('NO RESULTS MATCHED', resp)

    
def search_books(name, author, search_url=SEARCH_URL):
    resp = requests.get(search_url % (name, author)).content
    json_data = json.loads(resp)
    results = filter_book(json_data, name, author)
    print('Matches Found', len(results))
    return results

def save_result(book, name, author, local_folder=LOCAL_FOLDER):
    file_name = '_'.join(name.split(' ') +['<>'] + author.split(' ')) + '.json'
    local_path = os.path.join(local_folder, file_name)
    with open(local_path, 'w') as fp:
        fp.write(json.dumps(book))
    return True
    

In [32]:
def get_attribute(book_data,key):
    try:
        value = book_data[0]['volumeInfo'][key]
        return value
    except:
        return None
    
def get_book_attribtes(book_data):
    # get pageCount, categories, ratingsCount, averageRating
#     if get_attribute(book_data,'pageCount')
    page_count = get_attribute(book_data,'pageCount')
    genre = get_attribute(book_data,'categories')
    ratings_count = get_attribute(book_data,'ratingsCount')
    average_rating = get_attribute(book_data,'averageRating')
    return {'page_count':page_count,
           'genre':genre,
           'ratings_count':ratings_count,
           'average_rating':average_rating}

In [49]:
book_metadata =[]
errors = list()
for book in books_df.itertuples():
    name = book[2]
    author = book[1]
    print(book[0], 'searching name: %s | author: %s' % (name, author))
    book_data = search_books(name, author)
    if book_data:
        save_result(book_data, name, author)
        book_metadata.append([name,author,get_book_attribtes(book_data)])
    else:
        book_metadata.append([name,author,{}])
        errors.append(book)
        
print('Errors:', errors)

0 searching name: The Hitchhiker's Guide To The Galaxy | author: Douglas Adams
Matches Found 2
1 searching name: Watership Down | author: Richard Adams
Matches Found 1
2 searching name: The Five People You Meet in Heaven | author: Mitch Albom
Matches Found 1
3 searching name: Speak | author: Laurie Halse Anderson
Matches Found 1
4 searching name: I Know Why the Caged Bird Sings | author: Maya Angelou
Matches Found 3
5 searching name: Thirteen Reasons Why | author: Jay Asher
Matches Found 1
6 searching name: Foundation Series | author: Isaac Asimov
Matches Found 10
7 searching name: The Sisterhood of the Travelling Pants | author: Ann Brashares
Matches Found 10
8 searching name: A Great and Terrible Beauty | author: Libba Bray
Matches Found 1
9 searching name: The Da Vinci Code | author: Dan Brown
Matches Found 1
10 searching name: The Princess Diaries | author: Meg Cabot
Matches Found 1
11 searching name: Ender's Game | author: Orson Scott Card
Matches Found 4
12 searching name: The Hu

In [50]:
book_metadata_df=pd.DataFrame(book_metadata,columns=['name','author','book_attribtes'])

In [51]:
book_metadata_df.head()

Unnamed: 0,name,author,book_attribtes
0,The Hitchhiker's Guide To The Galaxy,Douglas Adams,"{'page_count': 764, 'genre': ['Dent, Arthur'],..."
1,Watership Down,Richard Adams,"{'page_count': 496, 'genre': ['Fiction'], 'rat..."
2,The Five People You Meet in Heaven,Mitch Albom,"{'page_count': 240, 'genre': ['Fiction'], 'rat..."
3,Speak,Laurie Halse Anderson,"{'page_count': 240, 'genre': ['Juvenile Fictio..."
4,I Know Why the Caged Bird Sings,Maya Angelou,"{'page_count': 288, 'genre': ['Biography & Aut..."


In [52]:
book_metadata_df.shape

(55, 3)

In [56]:
book_metadata_df['genre']=book_metadata_df['book_attribtes'].apply(lambda x: x['genre'] if 'genre' in x.keys() else None)
book_metadata_df['page_count']=book_metadata_df['book_attribtes'].apply(lambda x: x['page_count'] if 'page_count' in x.keys() else None)
book_metadata_df['ratings_count']=book_metadata_df['book_attribtes'].apply(lambda x: x['ratings_count'] if 'ratings_count' in x.keys() else None)
book_metadata_df['average_rating']=book_metadata_df['book_attribtes'].apply(lambda x: x['average_rating'] if 'average_rating' in x.keys() else None)

In [57]:
book_metadata_df.shape

(55, 7)

In [58]:
book_metadata_df.describe()

Unnamed: 0,page_count,ratings_count,average_rating
count,49.0,47.0,47.0
mean,345.244898,484.425532,3.968085
std,206.165238,970.178149,0.482343
min,80.0,1.0,3.0
25%,216.0,15.0,3.5
50%,300.0,86.0,4.0
75%,400.0,258.5,4.25
max,1216.0,3649.0,5.0


In [65]:
book_metadata_df.to_csv('./google_books_data/csv/book_metadata_df.csv',index=False, encoding='utf-8')

In [None]:
# Test Functions

In [60]:
book_data=search_books("Naruto", 'Masashi Kishimoto')

Matches Found 10


In [61]:
book_data[0]['volumeInfo']

{'title': 'Naruto, Vol. 66',
 'subtitle': 'The New Three',
 'authors': ['Masashi Kishimoto'],
 'publisher': 'VIZ Media LLC',
 'publishedDate': '2014-07-01',
 'description': 'Naruto will have to summon all of his Nine Tails power if he hopes to stand up against the devastating team of Obito and Madara. Luckily for the Allied Shinobi Forces, the greatest help they could ever wish for is on the way. And when Naruto, Sasuke and Sakura are finally reunited, the whole direction of the war may change dramatically! -- VIZ Media',
 'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': '9781421578408'},
  {'type': 'ISBN_10', 'identifier': '1421578409'}],
 'readingModes': {'text': True, 'image': False},
 'pageCount': 187,
 'printType': 'BOOK',
 'categories': ['Comics & Graphic Novels'],
 'maturityRating': 'NOT_MATURE',
 'allowAnonLogging': True,
 'contentVersion': '1.1.2.0.preview.2',
 'panelizationSummary': {'containsEpubBubbles': True,
  'containsImageBubbles': True,
  'epubBubbleVersion': 

In [62]:
get_book_attribtes(book_data)

{'page_count': 187,
 'genre': ['Comics & Graphic Novels'],
 'ratings_count': None,
 'average_rating': None}

In [63]:
len(book_data)

10