In [1]:
# Load the .env file to set the API Key
from dotenv import load_dotenv

# os allows us to check for system variables set by the .env file
import os


load_dotenv()

True

In [2]:
# allows us to request the data from 
import requests

# allows to recognize and load the data in the form that it is encoded: 
#    JavaScript Object Notation (JSON)
import json

# allows us to work with the data in a variety of ways before sending
# it to the output
import pandas as pd

In [3]:
# Setting some constants here: (1) the BASE_URL and HEADERS 
BASE_URL = 'https://www.courtlistener.com/api/rest/v3/'

# This will be appended to the BASE_URL to get the specific data for
# this exercise. There are MANY other ways that you can collect data
# from CourtListener
CAFC_OPINIONS_URL_STRING = "opinions/?cluster__docket__court__id=cafc"

# The Headers object will be passed into the API request in order to 
# include our API key.
HEADERS = {'Authorization': 'Token {}'.format(os.getenv("CL_API_KEY"))}

# This is the file that we will be saving the data
CAFC_OPINIONS_CSV = "cafc_opinions_data.csv"

In [4]:
# Request the CAFC Opinion Data from CourtListener
CAFC_OPINIONS_JSON = requests.get(
    url     = BASE_URL + CAFC_OPINIONS_URL_STRING, 
    headers = HEADERS
).json()["results"]

In [5]:
# Convert the JSON results from the API request into a pandas "DataFrame"
cafc_opinions_df = pd.DataFrame(CAFC_OPINIONS_JSON)

In [6]:
# Create an empty array that stores case names
case_names = []

# Iterate over the cluster URLs in the DataFrame
for value in cafc_opinions_df["cluster"]:
    
    # Send a request to CourtListener for the cluster
    cluster = requests.get(value, headers=HEADERS)
    
    # If there is no case name, set it to the resource_uri       
    try:
        case_names.append(cluster.json()["case_name"])
    except:
        case_names.append("NA")
        print(value + " HAS NO CASE NAME")
        pass

# Add the 
cafc_opinions_df["case_name"] = case_names    

In [8]:
# Use this command if you want to view the first few rows of the DataFrame
# to make sure it looks correct

# cafc_opinions_df.head()

In [9]:
cafc_opinions_df.to_csv(
    "cafc_opinions.csv", 
    mode = 'w', 
    columns = ['case_name', 'date_created', 'download_url', 'page_count'],
    index=False
)

In [10]:
# Here's a sneak peek of what we are going to start with in Part 2
# cafc_opinions_df.to_json(
#     "cafc_opinions.json", 
#     orient="records"
# )