In [19]:
import requests 
import time
import json
import os
import datetime

import pandas as pd

In [2]:
API_KEY = "bdnTOY3Wx7hYKcG7xjRo5ALgdcEYGiLv"

In [6]:

def retrieve_newest_monthly_lists():
    """
    Retrieve the 5 newest monthly lists from the NYTimes Books API and output them to a JSON file.
    
    Args:
        API_KEY (str): The NYTimes Books API key for authentication.
        
    Returns:
        list: A list of dictionaries containing the "list_name_encoded" and "oldest_published_date" keys 
            for the 5 newest monthly lists.
    """

     # Set up the API endpoint URL and parameters
    url = "https://api.nytimes.com/svc/books/v3/lists/names.json"
    params = {
        "api-key": f"{API_KEY}"
    }

    response = requests.get(url, params=params)
    data = response.json()
    
    # Filter for monthly lists
    data = [data for data in data["results"] if data["updated"]=="MONTHLY"]
    
    # Sort for finding the newest 4 lists
    data = sorted(data, key=lambda data: data['newest_published_date'], reverse=True)[:5]

    # Create a new list with only "list_name_encoded" and "oldest_publish_date" keys
    output_data = [{key:value for key,value in d.items() 
               if key in ['list_name_encoded', 'oldest_published_date']} 
              for d in data]
    
    # Write output to a JSON file
    with open("newest_monthly_lists.json", "w") as f:
        json.dump(output_data, f, indent=4)
    
    # Return the output data as the function of output
    return output_data



In [5]:

def retrieve_books(path_to_file, end_date=datetime.datetime.today().strftime('%Y-%m-%d')):
    """
    Retrieve the books for each monthly list specified in a JSON file.

    Args:
        path_to_file (str): The path to the input JSON file containing a list of monthly lists.
        end_date (str, optional): The last date for which to retrieve books. If None, all available books will be retrieved. Defaults to None.

    Returns:
        None
    """

    # Read the input data from the JSON file
    with open(path_to_file, 'r') as f:
        data = json.load(f)

    # Loop through each monthly list and retrieve the books for each month
    for lst in data:
        
        list_name_encoded = lst['list_name_encoded']
        oldest_published_date = lst['oldest_published_date']

        print(f"\t{list_name_encoded}")
        # Create a list to hold the API responses for each month
        responses = []

        # Initialize the API endpoint URL and parameters
        url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
        params = {
            'api-key': f'{API_KEY}'
        }

        # Loop until there is no more data available or we reach the end date
        while True:
            # Send a GET request to the API endpoint with the current parameters
            url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
            response = requests.get(url, params=params)

            # Check if the response was successful
            if response.status_code == 200:
                # Parse the response data as JSON
                data = response.json()

                # Append the response data to the list of responses
                responses.append(data)

                print(data["results"]['next_published_date'])
                # Check if there is more data available
                if data["results"]['next_published_date'] <= end_date:
                    # Update the parameters with the next_published_date
                    oldest_published_date = data["results"]['next_published_date']
                else:
                    break
                # Check if there is a timeout error
            elif response.status_code == 429:
                print('\t\t waiting ...')
                time.sleep(10)
        
            else:
                print(f'Error: {response.status_code} - {response.reason}')
                break

        # Write the list of responses to a NDJSON file
        output_filename = f'{list_name_encoded}.ndjson'
        with open(output_filename, 'w') as f:
            for response in responses:
                f.write(json.dumps(response) + '\n')

        # Print the path to the output file
        print(f'Output data saved to {os.path.abspath(output_filename)}')



In [7]:
# Execute names script
retrieve_newest_monthly_lists()

[{'list_name_encoded': 'audio-fiction', 'oldest_published_date': '2018-03-11'},
 {'list_name_encoded': 'audio-nonfiction',
  'oldest_published_date': '2018-03-11'},
 {'list_name_encoded': 'business-books',
  'oldest_published_date': '2013-11-03'},
 {'list_name_encoded': 'graphic-books-and-manga',
  'oldest_published_date': '2019-10-13'},
 {'list_name_encoded': 'mass-market-monthly',
  'oldest_published_date': '2019-10-13'}]

In [68]:
dax = [{'list_name_encoded': 'audio-fiction', 'oldest_published_date': '2018-03-11'},
 {'list_name_encoded': 'audio-nonfiction',
  'oldest_published_date': '2018-03-11'},
 {'list_name_encoded': 'business-books',
  'oldest_published_date': '2013-11-03'},
 {'list_name_encoded': 'graphic-books-and-manga',
  'oldest_published_date': '2019-10-13'},
 {'list_name_encoded': 'mass-market-monthly',
  'oldest_published_date': '2019-10-13'}]

In [73]:
num_res = {}

for d in dax:
    list_name_encoded = d['list_name_encoded'] #'audio-fiction'
    oldest_published_date = d['oldest_published_date'] #'2018-03-11'

    print(f"\t{list_name_encoded}")
    # Create a list to hold the API responses for each month
    responses = []
    offset = 0
    limit = 100

    # Initialize the API endpoint URL and parameters
    url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
    params = {
        'api-key': f'{API_KEY}',
    }
    
    url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
    response = requests.get(url, params=params)
    response = response.json()

    num_res[list_name_encoded] = response['num_results']

	audio-fiction
	audio-nonfiction
	business-books
	graphic-books-and-manga
	mass-market-monthly


In [98]:
params = {
    'api-key': f'{API_KEY}',
    'offset':20
}

url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
response = requests.get(url, params=params)
response = response.json()

#response = response.get("results", [])

In [99]:
response

{'status': 'OK',
 'copyright': 'Copyright (c) 2023 The New York Times Company.  All Rights Reserved.',
 'num_results': 0,
 'results': []}

In [86]:
url = f'https://api.nytimes.com/svc/books/v3/lists/best-sellers/history.json?api-key=bdnTOY3Wx7hYKcG7xjRo5ALgdcEYGiLv&offset=40'
response = requests.get(url)
response = response.get("results", [])

AttributeError: 'Response' object has no attribute 'get'

In [55]:
#pd.DataFrame.from_dict(response)#['results'], orient='index')

In [85]:
len(response['results'])

20

In [75]:
response

{'status': 'OK',
 'copyright': 'Copyright (c) 2023 The New York Times Company.  All Rights Reserved.',
 'num_results': 0,
 'results': []}

In [None]:
def main_request(url, endpoint, offset):
    

In [None]:
responses = []

# Initialize the API endpoint URL and parameters
url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
params = {
    'api-key': f'{API_KEY}'
    'offset':'0'
}
page = 1
# Loop until there is no more data available or we reach the end date
while True:
    # Send a GET request to the API endpoint with the current parameters
    url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
    response = requests.get(url, params=params)
    
    num_results = 

    # Parse the response data as JSON
    data = response.json()

    # Append the response data to the list of responses
    responses.append(data)

    print(data["results"]['next_published_date'])

    # Check to make sure the data contains all books
    # Books API only returns 20 books at a time
    while data['num_results'] > 20:

        # Use offset to get the next batch of results
        params['offset'] = page*20

        response = requests.get(url, params=params)
        response = response.json()
        response = response.get("results", [])

        # Go to the net page
        page += 1

    # Check if there is more data available
    if data["results"]['next_published_date'] <= end_date:
        # Update the parameters with the next_published_date
        oldest_published_date = data["results"]['next_published_date']
    else:
        break
        # Check if there is a timeout error
    elif response.status_code == 429:
        print('\t\t waiting ...')
        time.sleep(10)

    else:
        print(f'Error: {response.status_code} - {response.reason}')
        break

# Write the list of responses to a NDJSON file
output_filename = f'{list_name_encoded}.ndjson'
with open(output_filename, 'w') as f:
    for response in responses:
        f.write(json.dumps(response) + '\n')

# Print the path to the output file
print(f'Output data saved to {os.path.abspath(output_filename)}')

In [117]:

list_name_encoded = 'audio-fiction'
oldest_published_date = '2018-03-11'
end_date = datetime.datetime.today().strftime('%Y-%m-%d')

print(f"\t{list_name_encoded}")
# Create a list to hold the API responses for each month
responses = []

# Initialize the API endpoint URL and parameters
url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
params = {
    'api-key': f'{API_KEY}',
    'offset':0
}

# Loop until there is no more data available or we reach the end date
while True:
    # Send a GET request to the API endpoint with the current parameters
    url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
    response = requests.get(url, params=params)

    # Check if the response was successful
    if response.status_code == 200:
        # Parse the response data as JSON
        data = response.json()
        
        # Append the response data to the list of responses
        responses.append(data)
        
        num_results = data['num_results']
        page_num = 1

        
        # Go through all books
        while num_results >= 20:
            print("Nexxt page")
            # Use offset to get the next batch of results
            params['offset'] = page_num*20
            next_page = requests.get(url, params=params)
            
            if next_page.status_code == 200:
                next_page = response.json()
                next_page = response.get("results", [])
                data['results'].extend(new_results)
            
                # Iterate to the next page
                page_num += 1
            
            if next_page.status_code == 429:
                print('\t\t waiting ...')
                time.sleep(10)
                continue
            
        # Check if there is more data available
        if data["results"]['next_published_date'] <= end_date:
            # Update the parameters with the next_published_date
            oldest_published_date = data["results"]['next_published_date']
        else:
            break
    
    # Check if there is a timeout error
    elif response.status_code == 429:
        print('\t\t waiting ...')
        time.sleep(10)

    else:
        print(f'Error: {response.status_code} - {response.reason}')
        break

# Write the list of responses to a NDJSON file
output_filename = f'{list_name_encoded}.ndjson'
with open(output_filename, 'w') as f:
    for response in responses:
        f.write(json.dumps(response) + '\n')

# Print the path to the output file
print(f'Output data saved to {os.path.abspath(output_filename)}')

	audio-fiction
		 waiting ...
		 waiting ...
		 waiting ...
		 waiting ...


KeyboardInterrupt: 

In [109]:
list_name_encoded = 'audio-fiction'
oldest_published_date = '2018-03-11'

responses = []

# Initialize the API endpoint URL and parameters
url = f'https://api.nytimes.com/svc/books/v3/lists/{oldest_published_date}/{list_name_encoded}.json'
params = {
    'api-key': f'{API_KEY}',
    'offset':'0'
}
response = requests.get(url, params=params)
data = response.json()

num_results = data['num_results']

In [112]:
num_results % 20

15

In [115]:
22 % 20

2

In [108]:
response.json()

{'status': 'OK',
 'copyright': 'Copyright (c) 2023 The New York Times Company.  All Rights Reserved.',
 'num_results': 15,
 'last_modified': '2019-08-29T21:23:18-04:00',
 'results': {'list_name': 'Audio Fiction',
  'list_name_encoded': 'audio-fiction',
  'bestsellers_date': '2018-02-24',
  'published_date': '2018-03-11',
  'published_date_description': 'first',
  'next_published_date': '2018-04-01',
  'previous_published_date': '',
  'display_name': 'Audio Fiction',
  'normal_list_ends_at': 15,
  'updated': 'MONTHLY',
  'books': [{'rank': 1,
    'rank_last_week': 0,
    'weeks_on_list': 0,
    'asterisk': 0,
    'dagger': 0,
    'primary_isbn10': '',
    'primary_isbn13': '9780307913159',
    'publisher': 'Random House Audio',
    'description': 'It’s 2044, life on a resource-depleted Earth has grown increasingly grim, and the key to a vast fortune is hidden in a virtual-reality world. 15 hours, 46 minutes unabridged. Read by Wil Wheaton.',
    'price': '0.00',
    'title': 'READY PLAY

In [None]:

if __name__ == "__main__":

    # Execute names script
    retrieve_newest_monthly_lists()

    # Execute second part
    retrieve_books("newest_monthly_lists.json")