In [1]:
import dotenv
import json
import os
import psycopg2
import requests

dotenv.load_dotenv()

NYT_API_KEY = os.getenv("NYT_API_KEY")
POSTGRES_DATABASE = os.getenv("POSTGRES_DATABASE")
POSTGRES_HOST = os.getenv("POSTGRES_HOST")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_PORT = os.getenv("POSTGRES_PORT")
POSTGRES_USER = os.getenv("POSTGRES_USER")

def get_response(url):
    response = requests.get(url)
    if response.status_code == 200:
        return json.loads(response.text)
    else:
        # Throw exception on failed request.
        print("Request failed with status code:", response.status_code)


# Year is YYYY; month is MM if between 10 and 12, else M.
def get_archive(year, month):
    url = f"https://api.nytimes.com/svc/archive/v1/{year}/{month}.json?api-key={NYT_API_KEY}"
    return get_response(url)["response"]["docs"]


# psql postgresql://user:pw@host:port/db
def save_archive(archive):
    conn = psycopg2.connect(
        database = POSTGRES_DATABASE,
        host = POSTGRES_HOST,
        password = POSTGRES_PASSWORD,
        port = POSTGRES_PORT,
        user = POSTGRES_USER)

    with conn:
        with conn.cursor() as cursor:
            insert_query = "INSERT INTO archives_raw (article_json, article_jsonb) VALUES (%s, %s)"
            for article in archive:
                json_data = json.dumps(article)
                cursor.execute(insert_query, (json_data, json_data))

            conn.commit()

In [2]:
archive = get_archive(2021, 1)
save_archive(archive)

In [4]:
# Archives
# ########
# abstract
# web_url
# uri
# ["headline"]["main"]
# pub_date
# section_name
# ["byline"]["original"]

# Material can be news article or an editorial
# - Keep type field when processing -- should separate editorials from news