In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from time import sleep

In [None]:
data = pd.read_csv('preprocessed_book.csv')
data.head()

In [None]:
data.shape

In [None]:
def get_book_description(isbn):
    url = f'https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}'
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if 'items' in data and len(data['items']) > 0:
            volume_info = data['items'][0]['volumeInfo']
            description = volume_info.get('description', "Description not found")
            return description
    return "Description not found"

In [None]:
def process_chunk(chunk, chunk_index):
    chunk['description'] = chunk['isbn'].apply(get_book_description)
    chunk.to_csv(f'books_with_descriptions_chunk_{chunk_index}.csv', index=False)

# Read the data in chunks
chunk_size = 10000  # Adjust the chunk size as needed
chunk_index = 0

for chunk in pd.read_csv('preprocessed_book.csv', chunksize=chunk_size):
    process_chunk(chunk, chunk_index)
    chunk_index += 1
    sleep(1)  # Sleep to prevent hitting the API rate limit

# Combine all the chunks into a single DataFrame
all_chunks = []
for i in range(chunk_index):
    all_chunks.append(pd.read_csv(f'books_with_descriptions_chunk_{i}.csv'))

combined_data = pd.concat(all_chunks, ignore_index=True)
combined_data.to_csv('books_with_descriptions.csv', index=False)