In [92]:
!pip install isbnlib


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [93]:
import isbnlib
import pandas as pd
import requests

In [94]:
isbn_df = pd.read_csv('isbn_numbers_fixed.csv')

In [95]:
isbn_df.head()

Unnamed: 0,ISBN-10,ISBN-13
0,010836724X,9780108367243.0
1,0108368041,9780108368042.0
2,0108368661,9780108368660.0
3,0108623939,9780108623936.0
4,0761404147,


In [96]:
# check nan
isbn_df.isnull().sum()

ISBN-10    17374881
ISBN-13     6238034
dtype: int64

In [97]:
isbn_df.dropna(inplace=True)

Randomly select 100 rows from the dataframe

In [98]:
isbn_df = isbn_df.sample(n=100)
isbn_10 = isbn_df['ISBN-10'].tolist()

## Fetch Metadata Using ISBNLib
Use the `isbnlib.meta()` function to fetch metadata for each ISBN. The function returns a dictionary with metadata information. If the ISBN is not found, the function returns an empty dictionary.

In [99]:
def fetch_metadata(isbn, service="goob"):
    try:
        metadata = isbnlib.meta(isbn, service=service)
        return metadata if metadata else {}
    except Exception as e:
        print(f"Error fetching metadata for ISBN {isbn} using service {service}: {e}")
        return {}

In [100]:
def normalize_metadata(isbn_df, services):
    for service in services:
        isbn_df[f"{service}_metadata"] = isbn_df['ISBN-10'].apply(lambda x: fetch_metadata(x, service=service))
        normalized = pd.json_normalize(isbn_df[f"{service}_metadata"])
        normalized.columns = [f"{service}_{col}" for col in normalized.columns]
        isbn_df = pd.concat([isbn_df, normalized], axis=1)
        isbn_df.drop(columns=[f"{service}_metadata"], inplace=True)

    return isbn_df

In [102]:
services = ["goob", "openl","wiki"]
isbn_df = normalize_metadata(isbn_df, services)
isbn_df.head()


Error fetching metadata for ISBN 3642039359 using service goob: isbn request != isbn response (9783642039355 not in [{'type': 'ISBN_13', 'identifier': '9789402311051'}, {'type': 'ISBN_10', 'identifier': '940231105X'}])
Error fetching metadata for ISBN 1929345186 using service goob: isbn request != isbn response (9781929345182 not in [{'type': 'ISBN_13', 'identifier': '9781929345199'}, {'type': 'ISBN_10', 'identifier': '1929345194'}])
Error fetching metadata for ISBN 8884510430 using service goob: isbn request != isbn response (9788884510433 not in [{'type': 'ISBN_13', 'identifier': '9789460235726'}, {'type': 'ISBN_10', 'identifier': '9460235727'}])
Error fetching metadata for ISBN 9029006307 using service goob: isbn request != isbn response (9789029006309 not in [{'type': 'ISBN_13', 'identifier': '9789029542852'}, {'type': 'ISBN_10', 'identifier': '9029542853'}])
Error fetching metadata for ISBN 0979986206 using service openl: the service is down (try later) (service timeout)
Error fet

Unnamed: 0,ISBN-10,ISBN-13,goob_ISBN-13,goob_Title,goob_Authors,goob_Publisher,goob_Year,goob_Language,openl_ISBN-13,openl_Title,openl_Authors,openl_Publisher,openl_Year,openl_Language,wiki_ISBN-13,wiki_Title,wiki_Authors,wiki_Publisher,wiki_Year,wiki_Language
32013897,9876144219,9789876144216,,,,,,,,,,,,,,,,,,
18484108,673170519,9780673170514,,,,,,,,,,,,,,,,,,
8677937,471768499,9780471768494,,,,,,,,,,,,,,,,,,
22766966,9861341854,9789861341859,,,,,,,,,,,,,,,,,,
34555467,3642039359,"9783642039362, 9783642039355",,,,,,,,,,,,,,,,,,


it is a dictionary, so we need to convert it to a dataframe

## Google Books Data

In [103]:
def get_book_data(isbn):
    url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}"
    response = requests.get(url)
    data = response.json()

    if "items" in data:
        book = data["items"][0]["volumeInfo"]
        return {
            "ISBN": isbn,
            "Title": book.get("title", "N/A"),
            "Authors": ", ".join(book.get("authors", ["Unknown"])),
            "Published Date": book.get("publishedDate", "N/A"),
            "Description": book.get("description", "No description available"),
            "Page Count": book.get("pageCount", "N/A"),
            "Categories": ", ".join(book.get("categories", ["Unknown"])),
            "Thumbnail": book.get("imageLinks", {}).get("thumbnail", "")
        }
    else:
        return {"ISBN": isbn, "Title": "Not Found"}

In [104]:
books_data = [get_book_data(isbn) for isbn in isbn_10]

books_df = pd.DataFrame(books_data)

In [105]:
books_df

Unnamed: 0,ISBN,Title,Authors,Published Date,Description,Page Count,Categories,Thumbnail
0,9876144219,Not Found,,,,,,
1,0673170519,Not Found,,,,,,
2,0471768499,Life's Missing Instruction Manual,Joe Vitale,2006-02-24,Bestselling author and marketing guru Joe Vita...,0,Business & Economics,http://books.google.com/books/content?id=0hr8D...
3,9861341854,幾點鐘去看牙比較不會痛?,"Ranga Yogeshwar, 優哥希瓦, 姬健梅 (德語)",2012,No description available,311,Science,
4,3642039359,Not Found,,,,,,
...,...,...,...,...,...,...,...,...
95,0860235602,Not Found,,,,,,
96,8470755420,Roy Lichtenstein,"Roy Lichtenstein, Jack Cowart",2007,This book offers the first complete and unedit...,0,Art,
97,3659581070,Not Found,,,,,,
98,1345538456,Not Found,,,,,,


## Goodreads Data


In [106]:
!mkdir classic_book_metadata

mkdir: classic_book_metadata: File exists


In [107]:

isbn_df['ISBN-13'].to_csv('isbn_13.txt', index=False, header=False)


In [108]:

with open('isbn_13.txt', 'r') as f:
    lines = f.readlines()

# Remove invalid lines like "nan"
valid_lines = [line.strip() for line in lines if line.strip().isdigit()]

with open('isbn_13_cleaned.txt', 'w') as f:
    f.write("\n".join(valid_lines))

In [109]:
valid_lines

['9789876144216',
 '9780673170514',
 '9780471768494',
 '9789861341859',
 '9780982774960',
 '9780367635244',
 '9789500411226',
 '9781904777427',
 '9789994024261',
 '9780979986208',
 '9781892998019',
 '9780606257008',
 '9781931824705',
 '9783319969794',
 '9784805761267',
 '9788873991601',
 '9780003013719',
 '9780134869292',
 '9789693515121',
 '9787308093170',
 '9781902618012',
 '9780627000430',
 '9788484417255',
 '9781624141799',
 '9780538817554',
 '9780765396280',
 '9780971373440',
 '9780787952266',
 '9783869161099',
 '9785995002512',
 '9780300022971',
 '9786082040233',
 '9780439870290',
 '9780965742689',
 '9788416287093',
 '9780602303853',
 '9781583408926',
 '9781484263051',
 '9780404101015',
 '9789871021406',
 '9781425986650',
 '9781427299574',
 '9780860658931',
 '9781842170717',
 '9788497944311',
 '9786059691833',
 '9780875563046',
 '9789539888747',
 '9781591882077',
 '9788420534718',
 '9780108623950',
 '9780713665079',
 '9780994606723',
 '9788475740379',
 '9780340517451',
 '97809765

In [110]:
!python get_books.py --isbn_list_path isbn_13_cleaned.txt --output_directory_path classic_book_metadata --format csv

ISBNs to scrape: ['9789876144216', '9780673170514', '9780471768494', '9789861341859', '9780982774960', '9780367635244', '9789500411226', '9781904777427', '9789994024261', '9780979986208', '9781892998019', '9780606257008', '9781931824705', '9783319969794', '9784805761267', '9788873991601', '9780003013719', '9780134869292', '9789693515121', '9787308093170', '9781902618012', '9780627000430', '9788484417255', '9781624141799', '9780538817554', '9780765396280', '9780971373440', '9780787952266', '9783869161099', '9785995002512', '9780300022971', '9786082040233', '9780439870290', '9780965742689', '9788416287093', '9780602303853', '9781583408926', '9781484263051', '9780404101015', '9789871021406', '9781425986650', '9781427299574', '9780860658931', '9781842170717', '9788497944311', '9786059691833', '9780875563046', '9789539888747', '9781591882077', '9788420534718', '9780108623950', '9780713665079', '9780994606723', '9788475740379', '9780340517451', '9780976516200', '9784831864536', '978236914378

In [111]:
goodreads_df = pd.read_csv('classic_book_metadata/all_books.csv')
goodreads_df

Unnamed: 0,isbn,book_url,title,author,authorlink,average_rating,num_pages,genres,publication_info,format,rating_distribution,cover_image_uri,book_details
0,9789876144216,https://www.goodreads.com/search?q=9789876144216,Atlas De Las Minorias,Jean Sellier,https://www.goodreads.com/author/show/282157.J...,5.00,[None],[],"['Published January 1, 2014']",['Paperback'],"{'5': '1', '4': '0', '3': '0', '2': '0', '1': ...",https://images-na.ssl-images-amazon.com/images...,Professional Kingston 8GB MicroSDHC Card for i...
1,9780673170514,https://www.goodreads.com/search?q=9780673170514,Im Ess/Pers Finance 23773,unknown author,https://www.goodreads.com/author/show/22294257...,0.00,[None],[],"['Published March 1, 1998']",['Hardcover'],"{'5': '0', '4': '0', '3': '0', '2': '0', '1': ...",https://dryofg8nmyqjw.cloudfront.net/images/no...,
2,9780471768494,https://www.goodreads.com/search?q=9780471768494,Life's Missing Instruction Manual : The Guideb...,Joe Vitale,https://www.goodreads.com/author/show/8600.Joe...,3.50,['158'],[],"['First published January 1, 2006']","['158 pages, Hardcover']","{'5': '228', '4': '211', '3': '236', '2': '105...",https://images-na.ssl-images-amazon.com/images...,Bestselling author and marketing guru Joe Vita...
3,9789861341859,https://www.goodreads.com/search?q=9789861341859,幾點鐘去看牙比較不會痛？：德國最熱門科學節目為你知識充電,Ranga Yogeshwar,https://www.goodreads.com/author/show/2740605....,3.53,['320'],[],"['First published April 17, 2009']","['320 pages, Paperback']","{'5': '27', '4': '75', '3': '64', '2': '22', '...",https://images-na.ssl-images-amazon.com/images...,★令50萬德國人拍案叫絕！來自歐洲最強經濟體，激盪知性與想像的發現之旅！★108問，家事到天...
4,9780982774960,https://www.goodreads.com/search?q=9780982774960,The Greenest Building: How the Bullitt Center ...,Mary Adam Thomas,https://www.goodreads.com/author/show/4819325....,4.33,['184'],[],"['Published January 15, 2016']","['184 pages, Perfect Paperback']","{'5': '2', '4': '0', '3': '1', '2': '0', '1': ...",https://images-na.ssl-images-amazon.com/images...,The Greenest How the Bullitt Center Changes th...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,9781799784616,https://www.goodreads.com/search?q=9781799784616,Reckless,Aurora Rose Reynolds,https://www.goodreads.com/author/show/7215619....,4.10,[None],[],"['First published August 30, 2022']",['Audio CD'],"{'5': '1,888', '4': '1,779', '3': '985', '2': ...",https://images-na.ssl-images-amazon.com/images...,From New York Times and USA Today bestselling ...
71,9788470755422,https://www.goodreads.com/search?q=9788470755422,Roy Lichtenstein: Beginning to End,Avis Berman,https://www.goodreads.com/author/show/228395.A...,4.14,[None],[],"['First published May 1, 2007']",['Hardcover'],"{'5': '2', '4': '4', '3': '1', '2': '0', '1': ...",https://images-na.ssl-images-amazon.com/images...,The Fundacion Juan March (Madrid) presents a s...
72,9783659581076,https://www.goodreads.com/search?q=9783659581076,Pomegranate Processing Technology: Pomegranate...,Sangram Dhumal,https://www.goodreads.com/author/show/25579226...,0.00,['344'],[],"['Published July 13, 2015']","['344 pages, Paperback']","{'5': '0', '4': '0', '3': '0', '2': '0', '1': ...",https://images-na.ssl-images-amazon.com/images...,The book is a part of author's research for hi...
73,9781345538458,https://www.goodreads.com/search?q=9781345538458,Bulletin. History Series Volume 1,University of Wisconsin,https://www.goodreads.com/author/show/1048434....,0.00,['610'],[],"['Published October 27, 2015']","['610 pages, Hardcover']","{'5': '0', '4': '0', '3': '0', '2': '0', '1': ...",https://images-na.ssl-images-amazon.com/images...,This work has been selected by scholars as bei...


## ISBNDB Data

In [112]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
from pandas import json_normalize
import requests as req

In [113]:
load_dotenv()
API_KEY = os.getenv('API_KEY')
BASE_URL = "https://api2.isbndb.com/book/"

In [114]:
h = {
    'Authorization': API_KEY
}

isbndb = []


In [115]:
isbn_df.head()

Unnamed: 0,ISBN-10,ISBN-13,goob_ISBN-13,goob_Title,goob_Authors,goob_Publisher,goob_Year,goob_Language,openl_ISBN-13,openl_Title,openl_Authors,openl_Publisher,openl_Year,openl_Language,wiki_ISBN-13,wiki_Title,wiki_Authors,wiki_Publisher,wiki_Year,wiki_Language
32013897,9876144219,9789876144216,,,,,,,,,,,,,,,,,,
18484108,673170519,9780673170514,,,,,,,,,,,,,,,,,,
8677937,471768499,9780471768494,,,,,,,,,,,,,,,,,,
22766966,9861341854,9789861341859,,,,,,,,,,,,,,,,,,
34555467,3642039359,"9783642039362, 9783642039355",,,,,,,,,,,,,,,,,,


In [116]:
isbn_10

['9876144219',
 '0673170519',
 '0471768499',
 '9861341854',
 '3642039359',
 '0982774966',
 '0367635240',
 '3110266059',
 '9500411229',
 '1904777422',
 '9994024264',
 '0979986206',
 '1892998017',
 '0606257004',
 '1929345186',
 '1931824703',
 '331996979X',
 '4805761261',
 '8873991602',
 '0857090224',
 '0003013715',
 '013486929X',
 '9693515129',
 '7308093174',
 '1902618017',
 '0627000436',
 '8484417255',
 '162414179X',
 '0538817550',
 '0765396289',
 '0971373442',
 '0787952265',
 '3869161094',
 '8984337714',
 '5995002511',
 '0300022972',
 '6082040236',
 '0439870291',
 '0965742687',
 '8416287090',
 '0602303850',
 '1583408924',
 '1484263057',
 '0404101011',
 '9871021402',
 '142598665X',
 '1427299579',
 '0860658937',
 '1842170716',
 '8497944313',
 '6059691838',
 '087556304X',
 '9539888743',
 '1591882079',
 '8420534714',
 '0108623955',
 '0713665076',
 '0994606729',
 '8475740375',
 '034051745X',
 '0976516209',
 '4831864536',
 '2369143789',
 '6586130751',
 '1627120300',
 '1374917540',
 '01955826

In [117]:
for isbn in isbn_10:
    isbn_str = str(isbn)  # Convert ISBN to string
    resp = req.get("https://api2.isbndb.com/book/" + isbn_str, headers=h)
    if resp.status_code == 200:
        isbndb.append(resp.json())
    elif resp.status_code == 404:
        print(f"ISBN {isbn_str} not found. Error 404: {resp.text}")
    else:
        print(f"Error {resp.status_code}: {resp.text}")

ISBN 0673170519 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 9994024264 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 0606257004 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 0627000436 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 0765396289 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 8984337714 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 6082040236 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 1427299579 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 9539888743 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 0108623955 not found. Error 404: {"errorType":"string","errorMessage":"Not Found","trace":[]}
ISBN 65861

In [118]:
print(isbndb)



In [119]:
isbn_data = json_normalize(isbndb, sep='_')
isbn_data

Unnamed: 0,book_publisher,book_synopsis,book_language,book_image,book_title_long,book_dimensions,book_dimensions_structured_length_unit,book_dimensions_structured_length_value,book_dimensions_structured_width_unit,book_dimensions_structured_width_value,...,book_binding,book_isbn,book_isbn10,book_edition,book_pages,book_related_eBook,book_other_isbns,book_related_Page Fidelity,book_related_ePub,book_related_Kindle Edition
0,Kingston,Professional Kingston 8GB MicroSDHC Card for i...,es,https://images.isbndb.com/covers/1089600348570...,Atlas De Las Minorias,"Height: 0.5 Inches, Length: 1 Inches, Weight: ...",inches,1.00000,inches,0.100000,...,Paperback,9876144219,9876144219,,,,,,,
1,Wiley,Bestselling author and marketing guru Joe Vita...,en,https://images.isbndb.com/covers/2644949348235...,Life's Missing Instruction Manual : The Guideb...,"Height: 8.562975 Inches, Length: 5.70865 Inche...",inches,5.70865,inches,0.692912,...,Hardcover,0471768499,0471768499,1,176.0,1118046382,"[{'isbn': '9781118046388', 'binding': 'ebook'}]",,,
2,Prerequisite,Language:Chinese.Pub Date: 2012 02 Publisher: ...,zh,https://images.isbndb.com/covers/3301383485699...,What time to see teeth will not hurt? : German...,,,,,,...,Paperback,9861341854,9861341854,,311.0,,,,,
3,Springer,Der Mensch entwickelt sich vor allem in fruher...,de,https://images.isbndb.com/covers/2030119348348...,Entwicklungspsychologie des Kindes- und Jugend...,Weight: 1.5873282864 Pounds,,,,,...,Perfect Paperback,3642039359,3642039359,2010,297.0,3642039367,"[{'isbn': '9783642039362', 'binding': 'print'}]",,,
4,Ecotone Publishing,The Greenest Building: How the Bullitt Center ...,en,https://images.isbndb.com/covers/2490037348253...,The Greenest Building: How the Bullitt Center ...,Weight: 1.2 Pounds,,,,,...,Perfect Paperback,0982774966,0982774966,,184.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,Brilliance Audio,From New York Times and USA Today bestselling ...,en,https://images.isbndb.com/covers/2203204348282...,"Reckless (Adventures in Love, 3)","Height: 5.4 inches, Length: 6.3 inches, Weight...",inches,6.30000,inches,0.700000,...,Audio CD,1799784614,1799784614,Unabridged,,,,,,
75,Quotes,,en,https://images.isbndb.com/covers/7195803482494...,The Golden Years - Coventry Hippodrome (Art & ...,,,,,,...,Hardcover,0860235602,0860235602,,160.0,,,,,
76,Fundacion Juan March,This book offers the first complete and unedit...,en,https://images.isbndb.com/covers/2798885348520...,Roy Lichtenstein: Beginning to End,"Height: 11.31 Inches, Length: 9.72 Inches, Wei...",inches,9.72000,inches,0.970000,...,Hardcover,8470755420,8470755420,First Edition,174.0,,,,,
77,LAP LAMBERT Academic Publishing,The book is a part of author's research for hi...,en,https://images.isbndb.com/covers/2720366348349...,Pomegranate Processing Technology: Pomegranate...,"Height: 8.66 Inches, Length: 5.91 Inches, Weig...",inches,5.91000,inches,0.780000,...,Paperback,3659581070,3659581070,1,344.0,,,,,
