In [1]:
import requests
import time
import json
import pandas as pd
from tqdm import tqdm

In [2]:
books = pd.read_csv("data\Books.csv", low_memory=False)

In [3]:
books = books.drop(["Image-URL-S", "Image-URL-M"], axis=1)
books.rename(columns = {'Image-URL-L': 'Image-URL'}, inplace=True)

In [4]:
# Fetch book information from Google Books API
def get_book_info(isbn):
    headers = {
        'content-type': "application/json"
    }
    url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}"
    delay = 5
    
    for _ in range(3):
        time.sleep(delay)
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            
            if 'items' in data:
                volume_info = data["items"][0]["volumeInfo"]
                description = volume_info.get("description", "")
                page_count = volume_info.get("pageCount", 0)
                categories = volume_info.get("categories", [])
                return description, page_count, categories
            else:
                return "", 0, []
        else:
            delay *= 2

    return "", 0, []

In [5]:
descriptions = []
page_counts = []
categories_list = []

In [6]:
for isbn in tqdm(books['ISBN'], desc='Fetching book details'):
    description, page_count, categories = get_book_info(isbn)
    descriptions.append(description)
    page_counts.append(page_count)
    categories_list.append(categories)

Fetching book details:   2%|▏         | 4758/271360 [9:45:36<546:53:20,  7.38s/it]     

KeyboardInterrupt



In [None]:
books['Description'] = descriptions
books['Page-Count'] = page_counts
books['Categories'] = categories_list

books.to_json('data\books_data.json', orient='records', lines=True)