## This is a file to test methods on how to gather data using the "Original dataset" csv file that is in this Data folder.

In [40]:
import pandas as pd

In [41]:
df = pd.read_csv("Original dataset.csv")
df2 = df.head()
df2

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher,Unnamed: 12
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,439785960,9780440000000.0,eng,652,2095690,27591,9/16/2006,Scholastic Inc.,
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,439358078,9780440000000.0,eng,870,2153167,29221,9/1/2004,Scholastic Inc.,
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,439554896,9780440000000.0,eng,352,6333,244,11/1/2003,Scholastic,
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9780440000000.0,eng,435,2339585,36325,5/1/2004,Scholastic Inc.,
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,439682584,9780440000000.0,eng,2690,41428,164,9/13/2004,Scholastic,


In [42]:
for isbn in df2['isbn']:
    print(isbn) #print each ISBN from each row

439785960
439358078
439554896
043965548X
439682584


## To this point I can read in a CSV and select a specific column and its contents

### Set up requests

In [43]:
import requests

In [80]:
# Define a function to call the Google Books API and extract the page count and description
def get_book_info(isbn):
    url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}"
    response = requests.get(url)
    data = response.json()
    if 'totalItems' in data and data['totalItems'] > 0:
        book_info = data['items'][0]['volumeInfo']
        if 'description' in book_info:
            description = book_info['description']
        else:
            description = None
        if 'mainCategory' in book_info:
            main_category = book_info['mainCategory']
        else:
            main_category = None
        if 'categories' in book_info:
            categories = ', '.join(book_info['categories'])
        else:
            categories = None
        return description, main_category, categories
    else:
        return None, None, None

# Create a new dataframe to store the book info
descriptions = []
main_categories = []
categories_list = []
for isbn in df2['isbn']:
    description, main_category, categories = get_book_info(isbn)
    descriptions.append(description)
    main_categories.append(main_category)
    categories_list.append(categories)

results_df = pd.DataFrame({
    'Main Category': main_categories,
    'Categories': categories_list,
    'Description': descriptions
}, index=df2['isbn'])

# Print the first few rows of the results dataframe
print(results_df.head())

           Main Category        Categories  \
isbn                                         
439785960           None              None   
439358078           None              None   
439554896           None              None   
043965548X          None  Juvenile Fiction   
439682584           None              None   

                                                  Description  
isbn                                                           
439785960                                                None  
439358078                                                None  
439554896                                                None  
043965548X  During his third year at Hogwarts School for W...  
439682584                                                None  


In [82]:
results_df

Unnamed: 0_level_0,Main Category,Categories,Description
isbn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
439785960,,,
439358078,,,
439554896,,,
043965548X,,Juvenile Fiction,During his third year at Hogwarts School for W...
439682584,,,


In [83]:
temp = results_df
temp

Unnamed: 0_level_0,Main Category,Categories,Description
isbn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
439785960,,,
439358078,,,
439554896,,,
043965548X,,Juvenile Fiction,During his third year at Hogwarts School for W...
439682584,,,


In [72]:
temp.dropna(subset=['Description'], inplace=True)
temp

Unnamed: 0,isbn,Page Count,Description
3,043965548X,547.0,During his third year at Hogwarts School for W...
9,1400052920,0.0,NEW YORK TIMES BESTSELLER • “Extremely funny ....
12,076790818X,546.0,One of the world’s most beloved writers and Ne...
16,076790382X,308.0,A classic from the New York Times bestselling ...
27,097669400X,0.0,Provides information on creating Web-based app...
...,...,...,...
11098,2070323285,640.0,Montaillou : un petit village de montagnards e...
11105,9681907191,0.0,"Había una vez un valle, y en el valle, tres gr..."
11109,8466318771,0.0,"""Siglo XII. Una campesina adolescente se viste..."
11111,8466302298,563.0,La relacion amorosa del joven escritor Varguit...


In [68]:
temp_df = pd.merge(df,
                 results_df,
                 on='isbn', how='right')
temp_df

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher,Unnamed: 12,Page Count,Description
0,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9.78044E+12,eng,435,2339585,36325,5/1/2004,Scholastic Inc.,,547.0,During his third year at Hogwarts School for W...
1,14,The Hitchhiker's Guide to the Galaxy (Hitchhik...,Douglas Adams,4.22,1400052920,9.7814E+12,eng,215,4930,460,8/3/2004,Crown,,0.0,NEW YORK TIMES BESTSELLER • “Extremely funny ....
2,21,A Short History of Nearly Everything,Bill Bryson,4.21,076790818X,9.78077E+12,eng,544,248558,9396,9/14/2004,Broadway Books,,546.0,One of the world’s most beloved writers and Ne...
3,25,I'm a Stranger Here Myself: Notes on Returning...,Bill Bryson,3.9,076790382X,9.78077E+12,eng,304,49240,2211,6/28/2000,Broadway Books,,308.0,A classic from the New York Times bestselling ...
4,45,Agile Web Development with Rails: A Pragmatic ...,Dave Thomas/David Heinemeier Hansson/Leon Bree...,3.84,097669400X,9.78098E+12,eng,558,1430,59,7/28/2005,Pragmatic Bookshelf,,0.0,Provides information on creating Web-based app...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,45531,Montaillou village occitan de 1294 à 1324,Emmanuel Le Roy Ladurie/Emmanuel Le Roy-Ladurie,3.96,2070323285,9.78207E+12,fre,640,15,2,6/31/1982,Folio histoire,,640.0,Montaillou : un petit village de montagnards e...
597,45568,El Superzorro,Roald Dahl/Horacio Elena,4.05,9681907191,9.78968E+12,spa,96,174,20,6/1/2000,Alfaguara Infantil,,0.0,"Había una vez un valle, y en el valle, tres gr..."
598,45583,Historia del rey transparente,Rosa Montero,3.9,8466318771,9.78847E+12,spa,592,1266,90,9/1/2006,Punto de Lectura,,0.0,"""Siglo XII. Una campesina adolescente se viste..."
599,45592,La tía Julia y el escribidor,Mario Vargas Llosa,3.92,8466302298,9.78847E+12,spa,566,162,10,3/1/2001,Suma,,563.0,La relacion amorosa del joven escritor Varguit...


## testing other api

In [None]:
isbn = "9780441172719" # Example ISBN

# Send a request to the ISBNdb API to retrieve the book information
response = requests.get(f"https://api2.isbndb.com/book/{isbn}", headers={"Authorization": "YOUR_API_KEY"})

# Check if the request was successful
if response.status_code == 200:
    # Parse the response JSON to extract the book information
    book_data = response.json()["book"]
    
    # Extract the book description and categories
    description = book_data.get("summary")
    categories = book_data.get("subjects")
    
    # Print the book description and categories
    print("Description:", description)
    print("Categories:", categories)
else:
    # Print an error message if the request failed
    print("Error:", response.json()["error"])