In [44]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [45]:
import requests
import csv
import time
from collections import defaultdict

In [46]:
def fetch_books(api_key, start_index=0, max_results=40):
    base_url = "https://www.googleapis.com/books/v1/volumes"
    params = {
        "q": "subject:*",
        "key": api_key,
        "maxResults": max_results,
        "startIndex": start_index
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [47]:
def extract_book_data(item):
    volume_info = item.get('volumeInfo', {})
    return {
        'title': volume_info.get('title', ''),
        'authors': ', '.join(volume_info.get('authors', [])),
        'publisher': volume_info.get('publisher', ''),
        'published_date': volume_info.get('publishedDate', ''),
        'description': volume_info.get('description', '')[:500],
        'categories': ', '.join(volume_info.get('categories', [])),
        'average_rating': volume_info.get('averageRating', ''),
        'ratings_count': volume_info.get('ratingsCount', ''),
        'language': volume_info.get('language', '')
    }

In [48]:
books_api_key = 'blank'

In [49]:
def main():
    api_key = books_api_key  # Replace with your actual API key
    start_index = 0
    max_results = 40
    total_books = 0

    csv_filename = 'google_books_data.csv'
    fieldnames = ['title', 'authors', 'publisher', 'published_date', 'description', 
                  'categories', 'average_rating', 'ratings_count', 'language']

    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        while True:
            print(f"Fetching books starting from index {start_index}...")
            books_data = fetch_books(api_key, start_index, max_results)
            
            if books_data is None or 'items' not in books_data:
                print("No more books to fetch or an error occurred.")
                break

            for item in books_data['items']:
                book_data = extract_book_data(item)
                writer.writerow(book_data)
                total_books += 1

            if len(books_data['items']) < max_results:
                print("Reached the end of available books.")
                break

            start_index += max_results
            time.sleep(1)  # To avoid hitting API rate limits

    print(f"\nTotal books processed and saved to {csv_filename}: {total_books}")

In [50]:
if __name__ == "__main__":
    main()

Fetching books starting from index 0...
An error occurred: 400 Client Error: Bad Request for url: https://www.googleapis.com/books/v1/volumes?q=subject%3A%2A&key=blank&maxResults=40&startIndex=0
No more books to fetch or an error occurred.

Total books processed and saved to google_books_data.csv: 0
