# CSV + API

In this reboot, we are going to use:

- The [Goodreads books](https://www.kaggle.com/jealousleopard/goodreadsbooks) dataset from Kaggle.
- The [Open Library Books API](https://openlibrary.org/dev/docs/api/books)

The goal of this livecode is to load the data from a CSV + loop over rows to enrich each row with information such as:

- List of subjects (Science, Humor, Travel, etc.)
- The cover URL of the book
- Other information you'd find useful in the JSON API

First, download the CSV in the local folder:

In [1]:
!curl -L https://gist.githubusercontent.com/ssaunier/351b17f5a7a009808b60aeacd1f4a036/raw/books.csv > books.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1509k  100 1509k    0     0  2542k      0 --:--:-- --:--:-- --:--:-- 2695k


In [2]:
!ls -lh

total 4392
-rw-r--r--  1 josephgulay  staff   579B Mar 24 12:11 README.md
-rw-r--r--@ 1 josephgulay  staff    77K Apr  9 16:49 Recap.ipynb
-rw-r--r--  1 josephgulay  staff   1.5M Apr  9 16:49 books.csv


Then import the usual suspects!

# Import CSV and turn into a dataframe

In [3]:
import pandas as pd

books_df = pd.read_csv('books.csv')
books_df

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,# num_pages,ratings_count,text_reviews_count
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling-Mary GrandPré,4.56,0439785960,9780439785969,eng,652,1944099,26249
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling-Mary GrandPré,4.49,0439358078,9780439358071,eng,870,1996446,27613
2,3,Harry Potter and the Sorcerer's Stone (Harry P...,J.K. Rowling-Mary GrandPré,4.47,0439554934,9780439554930,eng,320,5629932,70390
3,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.41,0439554896,9780439554893,eng,352,6267,272
4,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling-Mary GrandPré,4.55,043965548X,9780439655484,eng,435,2149872,33964
...,...,...,...,...,...,...,...,...,...,...
13714,47699,M Is for Magic,Neil Gaiman-Teddy Kristiansen,3.82,0061186422,9780061186424,eng,260,11317,1060
13715,47700,Black Orchid,Neil Gaiman-Dave McKean,3.72,0930289552,9780930289553,eng,160,8710,361
13716,47701,InterWorld (InterWorld #1),Neil Gaiman-Michael Reaves,3.53,0061238961,9780061238963,en-US,239,14334,1485
13717,47708,The Faeries' Oracle,Brian Froud-Jessica Macbeth,4.43,0743201116,9780743201117,eng,224,1550,38


In [4]:
books_df = books_df[['title', 'authors', 'isbn']]
books_df

Unnamed: 0,title,authors,isbn
0,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling-Mary GrandPré,0439785960
1,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling-Mary GrandPré,0439358078
2,Harry Potter and the Sorcerer's Stone (Harry P...,J.K. Rowling-Mary GrandPré,0439554934
3,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,0439554896
4,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling-Mary GrandPré,043965548X
...,...,...,...
13714,M Is for Magic,Neil Gaiman-Teddy Kristiansen,0061186422
13715,Black Orchid,Neil Gaiman-Dave McKean,0930289552
13716,InterWorld (InterWorld #1),Neil Gaiman-Michael Reaves,0061238961
13717,The Faeries' Oracle,Brian Froud-Jessica Macbeth,0743201116


# Figure out how to interact with URL 


In [5]:
import requests

isbn = '0439785960'

url = "https://openlibrary.org/api/books"

params = {"bibkeys" : f"ISBN:{isbn}",
          "format" : 'json',
          "jscmd" : 'data'}

response = requests.get(url, params).json()

print(response)

{'ISBN:0439785960': {'url': 'https://openlibrary.org/books/OL24280830M/Harry_Potter_and_the_Half-Blood_Prince', 'key': '/books/OL24280830M', 'title': 'Harry Potter and the Half-Blood Prince', 'authors': [{'url': 'https://openlibrary.org/authors/OL23919A/J._K._Rowling', 'name': 'J. K. Rowling'}], 'identifiers': {'amazon': ['0439785960'], 'goodreads': ['53178655'], 'isbn_10': ['0439785960'], 'isbn_13': ['9780439785969'], 'oclc': ['70666878', '819153929'], 'openlibrary': ['OL24280830M']}, 'publishers': [{'name': 'Scholastic'}], 'publish_places': [{'name': 'New York, USA'}], 'publish_date': '2006-09', 'subjects': [{'name': 'orphans', 'url': 'https://openlibrary.org/subjects/orphans'}, {'name': 'foster homes', 'url': 'https://openlibrary.org/subjects/foster_homes'}, {'name': 'romans', 'url': 'https://openlibrary.org/subjects/romans'}, {'name': 'magie', 'url': 'https://openlibrary.org/subjects/magie'}, {'name': 'adolescence', 'url': 'https://openlibrary.org/subjects/adolescence'}, {'name': '

In [6]:
#How to get cover URL
response[f'ISBN:{isbn}']['cover']['small']

'https://covers.openlibrary.org/b/id/9326654-S.jpg'

In [7]:
books_small_df = books_df.head(10)
books_small_df

Unnamed: 0,title,authors,isbn
0,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling-Mary GrandPré,0439785960
1,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling-Mary GrandPré,0439358078
2,Harry Potter and the Sorcerer's Stone (Harry P...,J.K. Rowling-Mary GrandPré,0439554934
3,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,0439554896
4,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling-Mary GrandPré,043965548X
5,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling-Mary GrandPré,0439682584
6,"Unauthorized Harry Potter Book Seven News: ""Ha...",W. Frederick Zimmerman,0976540606
7,Harry Potter Collection (Harry Potter #1-6),J.K. Rowling,0439827604
8,The Ultimate Hitchhiker's Guide: Five Complete...,Douglas Adams,0517226952
9,The Ultimate Hitchhiker's Guide to the Galaxy,Douglas Adams,0345453743


In [23]:
import requests

# isbn_works = '0439785960'
# isbn_not_working = ''
def get_cover_url(isbn):
    url = "https://openlibrary.org/api/books"

    params = {"bibkeys" : f"ISBN:{isbn}",
              "format" : 'json',
              "jscmd" : 'data'}

    response = requests.get(url, params).json()
    #If cover is not in the dictionary, just pass
    try:
        response[f'ISBN:{isbn}']['cover']['small']
        return response[f'ISBN:{isbn}']['cover']['small']
    except:
        return None
    




In [25]:
print(get_cover_url('0345453743'))

None


In [14]:
isbn_not_working = '0345453743'
url = "https://openlibrary.org/api/books"

params = {"bibkeys" : f"ISBN:{isbn}",
          "format" : 'json',
          "jscmd" : 'data'}

response = requests.get(url, params).json()
response[f'ISBN:{isbn_not_working}']

{'url': "https://openlibrary.org/books/OL17044900M/The_ultimate_hitchhiker's_guide_to_the_galaxy",
 'key': '/books/OL17044900M',
 'title': "The ultimate hitchhiker's guide to the galaxy",
 'authors': [{'url': 'https://openlibrary.org/authors/OL272947A/Douglas_Adams',
   'name': 'Douglas Adams'}],
 'number_of_pages': 815,
 'pagination': 'xvi, 815 p. ;',
 'by_statement': 'Douglas Adams',
 'identifiers': {'goodreads': ['13'],
  'librarything': ['7015245'],
  'isbn_10': ['0345453743'],
  'openlibrary': ['OL17044900M']},
 'publishers': [{'name': 'Del Rey'}],
 'publish_places': [{'name': 'New York'}],
 'publish_date': '1996',
 'subjects': [{'name': 'comic science fiction',
   'url': 'https://openlibrary.org/subjects/comic_science_fiction'},
  {'name': 'Vogons', 'url': 'https://openlibrary.org/subjects/vogons'},
  {'name': 'Humorous fiction',
   'url': 'https://openlibrary.org/subjects/humorous_fiction'},
  {'name': 'Imaginary voyages',
   'url': 'https://openlibrary.org/subjects/imaginary_vo

In [10]:
books_small_df['name_url'] = None

for index, row in books_small_df.iterrows():
    isbn = row['isbn']
    print(isbn)
    books_small_df.loc[index, "name_url"] = get_cover_url(isbn)

books_small_df
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books_small_df['name_url'] = None


0439785960
0439358078
0439554934
0439554896
043965548X
0439682584
0976540606
0439827604
0517226952
0345453743


KeyError: 'cover'

In [None]:
books_small_df['subjects'] = None

books_small_df

In [None]:
isbn = '0439785960'



def get_subject_list(isbn):
    url = "https://openlibrary.org/api/books"
    params = {"bibkeys" : f"ISBN:{isbn}",
              "format" : 'json',
              "jscmd" : 'data'}

    response = requests.get(url, params).json()

    subjects_list = []
    for item in response[f'ISBN:{isbn}']['subjects']:
        subjects_list.append(item['name'])
    
    return subjects_list[0:5]

In [None]:
isbn = '0439785960'
get_subject_list(isbn)

In [None]:
for index, row in books_small_df.iterrows():
    isbn = row['isbn']
    row['subjects'] = get_subject_list(isbn)
    
books_small_df

# Add one more column

In [None]:
import requests

isbn = '0439785960'

url = "https://openlibrary.org/api/books"

params = {"bibkeys" : f"ISBN:{isbn}",
          "format" : 'json',
          "jscmd" : 'data'}

response = requests.get(url, params).json()

print(response)

In [None]:
url = "https://openlibrary.org/api/books"
params = {"bibkeys" : f"ISBN:{isbn}",
              "format" : 'json',
              "jscmd" : 'data'}

response = requests.get(url, params).json()
response[f'ISBN:{isbn}']['publish_date']

In [None]:
def get_publish_date(isbn):
    url = "https://openlibrary.org/api/books"
    params = {"bibkeys" : f"ISBN:{isbn}",
              "format" : 'json',
              "jscmd" : 'data'}

    response = requests.get(url, params).json()

    return response[f'ISBN:{isbn}']['publish_date']



In [None]:
get_publish_date(isbn)

In [None]:
books_small_df['publish_date'] = None
books_small_df

In [None]:
for index, row in books_small_df.iterrows():
    isbn = row['isbn']
    row['publish_date'] = get_publish_date(isbn)
    
books_small_df

# Convert to date time

In [None]:
books_small_df['publish_date_2'] = pd.to_datetime(books_small_df['publish_date'])
books_small_df

In [None]:
# Apply to all rows

