In [1]:
import pandas as pd 


In [2]:
df=pd.read_csv("Goodreads_books_with_genres.csv")

In [3]:
required_columns=['Title','Author','isbn','isbn13','publication_date','publisher']
df=df[required_columns]
df.to_csv("test_dataset.csv",index=False)
df.head()

Unnamed: 0,Title,Author,isbn,isbn13,publication_date,publisher
0,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,0439785960,9780439785969,9/16/2006,Scholastic Inc.
1,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,0439358078,9780439358071,9/1/2004,Scholastic Inc.
2,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,0439554896,9780439554893,11/1/2003,Scholastic
3,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,043965548X,9780439655484,5/1/2004,Scholastic Inc.
4,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,0439682584,9780439682589,9/13/2004,Scholastic


In [4]:
# Assuming your columns are named 'isbn' and 'isbn13'
# 1. Fill missing isbn13 values with the 10-digit isbn values if available
df['isbn13'] = df['isbn13'].fillna(df['isbn'])

# 2. Drop the redundant 10-digit column to maintain systemic control
df = df.drop(columns=['isbn'])

# 3. Rename 'isbn13' to a clean, universal 'ISBN'
df = df.rename(columns={'isbn13': 'ISBN'})


In [5]:
df.to_csv("test_dataset.csv")

In [4]:
import pandas as pd
import requests
import time
import os

def fetch_google_metadata(isbn):
    """Clean API Flow: Direct data retrieval without browser overhead."""
    isbn_clean = str(isbn).split('.')[0].strip()
    if not isbn_clean or isbn_clean == 'nan':
        return None
        
    url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn_clean}"
    try:
        # High-contrast logic: 10s timeout to prevent system hangs
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            data = response.json()
            if "items" in data:
                info = data["items"][0]["volumeInfo"]
                return {
                    "Title": info.get("title", "N/A"),
                    "Author": ", ".join(info.get("authors", ["Unknown"])),
                    "publisher": info.get("publisher", "N/A"),
                    "publication_date": info.get("publishedDate", "N/A")
                }
    except Exception:
        pass
    return None

def process_library_fast(input_csv, output_csv):
    # 1. Load the structural layer
    if not os.path.exists(input_csv):
        print(f"Error: {input_csv} not found.")
        return
        
    df = pd.read_csv(input_csv)
    total_rows = len(df)
    
    # 2. Logic Orange: Identify the best search column
    search_col = 'isbn13' if 'isbn13' in df.columns else 'ISBN'
    
    print(f"Total rows: {total_rows}. Starting Enrichment using {search_col}...")

    # 3. Active Flow: Row-by-row mapping
    for index, row in df.iterrows():
        isbn = row[search_col]
        
        # Fetch fresh data
        metadata = fetch_google_metadata(isbn)
        
        if metadata:
            # High-precision mapping: Overwrites ONLY the targeted columns
            df.at[index, 'Title'] = metadata['Title']
            df.at[index, 'Author'] = metadata['Author']
            df.at[index, 'publisher'] = metadata['publisher']
            df.at[index, 'publication_date'] = metadata['publication_date']
            print(f"[OK] {index+1}/{total_rows}: {metadata['Title']}")
        else:
            print(f"[SKIP] {index+1}/{total_rows}: No data for {isbn}")

        # 4. Structural Integrity: Save progress every 25 rows
        if (index + 1) % 25 == 0:
            df.to_csv(output_csv, index=False)
            print(f"--- Progress: {((index+1)/total_rows)*100:.2f}% ---")
            
    # 5. Final Save
    df.to_csv(output_csv, index=False)
    print(f"\nGrid Enriched. System saved to {output_csv}")

# --- EXECUTION ---
process_library_fast("test_dataset.csv", "library_enriched.csv")

Total rows: 11127. Starting Enrichment using ISBN...
[SKIP] 1/11127: No data for 9780439785969
[OK] 2/11127: Harry Potter
[OK] 3/11127: Harry Potter and the Chamber of Secrets
[OK] 4/11127: Harry Potter and the Prisoner of Azkaban
[OK] 5/11127: Harry Potter
[OK] 6/11127: Unauthorized Harry Potter and the Deathly Hallows News
[OK] 7/11127: Harry Potter and the Prisoner of Azkaban
[OK] 8/11127: The Ultimate Hitchhiker's Guide
[OK] 9/11127: The Ultimate Hitchhiker's Guide to the Galaxy
[OK] 10/11127: The Hitchhiker's Guide to the Galaxy 25th Anniversary Edition
[SKIP] 11/11127: No data for 9780739322208
[OK] 12/11127: The Ultimate Hitchhiker's Guide
[OK] 13/11127: A Short History of Nearly Everything
[OK] 14/11127: Bill Bryson's African Diary
[OK] 15/11127: Bryson's Dictionary of Troublesome Words
[OK] 16/11127: In a Sunburned Country
[OK] 17/11127: I'm a Stranger Here Myself
[OK] 18/11127: The Lost Continent
[OK] 19/11127: Neither Here Nor There:
[OK] 20/11127: Notes from a Small Island


KeyboardInterrupt: 