Scirpt that gets SCOTUS cases from Oyez.org

Pulled from https://github.com/smitp415/CSCI_544_Final_Project/tree/main

# Imports

In [1]:
from requests_futures.sessions import FuturesSession
import concurrent.futures as cf
import json

# Create an async session for get requests

In [2]:
session = FuturesSession()

In [3]:
x = [session.get('https://api.oyez.org/cases?per_page=1')]
for f in cf.as_completed(x):
    print(f.result().url)
    print(f.result().json())

https://api.oyez.org/cases?per_page=1
[{'ID': 49051, 'name': 'American Trucking Assns., Inc. v. United States', 'href': 'https://api.oyez.org/cases/1966/510', 'view_count': 0, 'docket_number': '510', 'timeline': [{'event': 'Decided', 'dates': [-124826400], 'href': 'https://api.oyez.org/case_timeline/case_timeline/51665'}], 'question': None, 'citation': {'volume': '382', 'page': '372', 'year': '1966', 'href': 'https://api.oyez.org/case_citation/case_citation/12979'}, 'term': '1966', 'description': None, 'justia_url': 'https://supreme.justia.com/cases/federal/us/382/372/'}]


# Fetch case data from oyez.org and convert it into JSON

Note that this data will not include the entire case data, but instead will have 'href' fields that points to the data.



In [4]:
future_data = session.get('https://api.oyez.org/cases?per_page=0')
response_data = future_data.result().json()

# Replace the 'href' fields in each case with the actual fetched data

In [5]:
data = {}

# A list of futures of all the async get requests
futures_cases = []
for i, element in enumerate(response_data):
    # response_data contains brief info about the case, instead we will
    # fetch each case individually from its 'href' field for more details
    href = element['href']
    futures_cases.append(session.get(href))

# Wait until all get requests are completed
# Order of completetion is arbitrary, first-done first-serve (the get requests)
futures_decisions = []
futures_opinions = []
futures_decisions_lookup = {}
futures_opinions_lookup = {}
for f in cf.as_completed(futures_cases):
    try:
        case_resp = f.result().json()
        
        # extract the case ID and save it to the data dictionary
        case_id = case_resp['ID']
        data[case_id] = case_resp
        
        # extract the decisions, if none exist, give it an empty list
        try:
            decisions = case_resp['decisions']
        except:
            decisions = []
        
        # extract the written opinions, give it an empty list
        try:
            opinions = case_resp['written_opinion']
        except:
            opinions = []
            
        # reset the decisions of the case, since we want to fill
        # it ourselves using the 'href' of each decision for more details
        case_resp['decisions'] = []
        for decision in decisions:
            try:
                # extract the decision href
                href = decision['href']
                futures_decisions.append(session.get(href))
                futures_decisions_lookup[href] = case_id
            except:
                pass
        
        # reset the opinions of the case, since we want to fill
        # it ourselves using the 'href' of each decision for more details
        case_resp['opinions'] = []
        for opinion in opinions:
            try:
                # extract the opinion href
                href = opinion['href']
                futures_opinions.append(session.get(href))
                futures_opinions_lookup[href] = case_id
            except:
                pass
    except:
        pass


In [6]:
for f in cf.as_completed(futures_decisions):
    resp = f.result()
    href = resp.url
    case_id = futures_decisions_lookup[href]
    data[case_id]['decisions'].append(resp.json())

In [7]:
for f in cf.as_completed(futures_opinions):
    resp = f.result()
    href = resp.url
    case_id = futures_opinions_lookup[href]
    data[case_id]['opinions'].append(resp.json())

In [8]:
print(f'Successfully downloaded {len(data)} cases')

Successfully downloaded 8335 cases


# Export the data to JSON files

In [11]:
values = list(data.values())

with open('cases.json', 'w') as f:
    json.dump(values, f)