In [1]:
# Load the .env file to set the API Key
from dotenv import load_dotenv

# os allows us to check for system variables set by the .env file
import os


load_dotenv()

True

In [2]:
# allows us to request the data from 
import requests

# allows to recognize and load the data in the form that it is encoded: 
#    JavaScript Object Notation (JSON)
import json

# allows us to work with the data in a variety of ways before sending
# it to the output
import pandas as pd

In [3]:
# Setting some constants here: (1) the BASE_URL and HEADERS 
BASE_URL = 'https://www.courtlistener.com/api/rest/v3/'

# This will be appended to the BASE_URL to get the specific data for
# this exercise. There are MANY other ways that you can collect data
# from CourtListener
CAFC_OPINIONS_URL_STRING = "opinions/?cluster__docket__court__id=cafc"

# The Headers object will be passed into the API request in order to 
# include our API key.
HEADERS = {'Authorization': 'Token {}'.format(os.getenv("CL_API_KEY"))}

# This is the file that we will be saving the data
# CAFC_OPINIONS_CSV = "cafc_opinions_data.csv"

In [5]:
# Request the CAFC Opinion Data from CourtListener
CAFC_OPINIONS_JSON = requests.get(
    url     = BASE_URL + CAFC_OPINIONS_URL_STRING, 
    headers = HEADERS
).json()["results"]

In [7]:
# Convert the JSON results from the API request into a pandas "DataFrame"
cafc_opinions_df = pd.DataFrame(CAFC_OPINIONS_JSON)

Unnamed: 0,resource_uri,id,absolute_url,cluster,author,joined_by,date_created,date_modified,author_str,per_curiam,...,local_path,plain_text,html,html_lawbox,html_columbia,html_anon_2020,xml_harvard,html_with_citations,extracted_by_ocr,opinions_cited
0,https://www.courtlistener.com/api/rest/v3/opin...,4710003,/opinion/4906224/andra-group-lp-v-victorias-se...,https://www.courtlistener.com/api/rest/v3/clus...,,[],2021-08-09T08:03:00.255776-07:00,2021-08-09T08:03:00.723083-07:00,,False,...,pdf/2021/08/09/andra_group_lp_v._victorias_sec...,Case: 20-2009 Document: 43 Page: 1 Fil...,,,,,,,False,[]
1,https://www.courtlistener.com/api/rest/v3/opin...,4710002,/opinion/4906223/cajun-services-unlimited-llc-...,https://www.courtlistener.com/api/rest/v3/clus...,,[],2021-08-09T08:02:59.569827-07:00,2021-08-09T08:03:00.042026-07:00,,False,...,pdf/2021/08/09/cajun_services_unlimited_llc_v....,Case: 20-1997 Document: 57 Page: 1 Fil...,,,,,,,False,[]
2,https://www.courtlistener.com/api/rest/v3/opin...,4710001,/opinion/4906222/satco-products-inc-v-thread-g...,https://www.courtlistener.com/api/rest/v3/clus...,,[],2021-08-09T08:01:59.584743-07:00,2021-08-09T08:02:59.457074-07:00,,False,...,pdf/2021/08/09/satco_products_inc._v._thread_g...,Case: 21-1141 Document: 32 Page: 1 Fil...,,,,,,,False,[]
3,https://www.courtlistener.com/api/rest/v3/opin...,4709674,/opinion/4905895/buffington-v-mcdonough/,https://www.courtlistener.com/api/rest/v3/clus...,,[],2021-08-06T08:03:35.291823-07:00,2021-08-06T08:29:50.903748-07:00,,False,...,pdf/2021/08/06/buffington_v._mcdonough.pdf,Case: 20-1479 Document: 62 Page: 1 Fi...,,,,,,"<pre class=""inline"">Case: 20-1479 Document:...",False,[https://www.courtlistener.com/api/rest/v3/opi...
4,https://www.courtlistener.com/api/rest/v3/opin...,4709673,/opinion/4905894/ingevity-corporation-v-itc/,https://www.courtlistener.com/api/rest/v3/clus...,,[],2021-08-06T08:03:34.554076-07:00,2021-08-06T08:57:34.426563-07:00,,False,...,pdf/2021/08/06/ingevity_corporation_v._itc_1.pdf,Case: 20-1800 Document: 87-2 Page: 1 Fi...,,,,,,"<pre class=""inline"">Case: 20-1800 Document: ...",False,[https://www.courtlistener.com/api/rest/v3/opi...


In [8]:
# Create an empty array that stores case names
case_names = []

# Iterate over the cluster URLs in the DataFrame
for value in cafc_opinions_df["cluster"]:
    
    # Send a request to CourtListener for the cluster
    cluster = requests.get(value, headers=HEADERS)
    
    # If there is no case name, set it to the resource_uri       
    try:
        case_names.append(cluster.json()["case_name"])
    except:
        case_names.append("NA")
        print(value + " HAS NO CASE NAME")
        pass

# Add the 
cafc_opinions_df["case_name"] = case_names    

In [11]:
cafc_opinions_df.to_csv(
    "cafc_opinions.csv", 
    mode = 'w', 
    columns = ['case_name', 'date_created', 'download_url', 'page_count'],
    index=False
)

In [16]:
cafc_opinions_df.to_json(
    "cafc_opinions.json", 
    orient="records"
)