In [None]:
import json
from datetime import date
import traceback

import requests
from ratelimit import limits, sleep_and_retry

YEARS_TO_GO_BACK = 35


@sleep_and_retry
@limits(calls=10, period=10)  # no more than 1 call per second
def get_http_json(url):
    print(f"Getting {url}")
    response = requests.get(url)
    parsed = response.json()
    return parsed


def get_case(term, docket):
    """Get the info of the case and fetch all
    transcripts that the info links to"""
    url = f"https://api.oyez.org/cases/{term}/{docket}"
    docket_data = get_http_json(url)

    return docket_data


def write_case(term, docket, docket_data):
    """
    Writes term-docket.json file with docket_data
    For each transcript, writes the term-docket-t##.json file
    """
    with open(f"case_briefs_up_to_2025/{term}.{docket}.json", "a") as docket_file:
        json.dump(docket_data, docket_file, indent=2)


def fetch_missing(cases):
    """
    cases is a map of tuples to Summary (term, docket) : {SUMMARY}
    For each case, fetch the docket and transcript data and write to a file
    
    return set of cases that this was successful for
    """
    count = 0
    total = len(cases)
    successful = set()
    for term, docket in cases.keys():
        ## pull the file
        count += 1
        print(f"Trying: {term}/{docket}\t\t{count}/{total}")
        try:
            docket_data = get_case(term, docket)

            write_case(term, docket, docket_data)
            successful.add((term, docket))
        except Exception as exc:
            traceback.print_exc()
            print(f"Failed for {term}/{docket}, continuing anyways")
    return successful


def load_known_cases():
    known_summaries = []
    known_map = {
        (summary["term"], summary["docket_number"]): summary
        for summary in known_summaries
    }
    return (known_summaries, known_map)


def find_missing(known_map, years):
    """
    Fetch all summaries for given years and find any that are
    missing in the local "known_map"
    """
    to_fetch = {}
    for year in years:
        summary_url = f"https://api.oyez.org/cases?per_page=0&filter=term:{year}"
        summaries = get_http_json(summary_url)
        for summary in summaries:
            if (summary["term"], summary["docket_number"]) not in known_map:
                to_fetch[(summary["term"], summary["docket_number"])] = summary

    return to_fetch


def years_to_recheck():
    """
    Makes a list of years going back to
    YEARS_TO_GO_BACK
    e.g. [2018, 2019]
    """
    cur_year = date.today().year
    return list(range(cur_year - YEARS_TO_GO_BACK + 1, cur_year + 1))


def main():
    """
    Find any cases that the server is updated with but we don't have locally
    and fetch the case info and transcripts for them.
    For all cases this is succesful for, also update case_summaries
    """
    (known_summaries, known_map) = load_known_cases()
    missing_summaries = find_missing(known_map, years_to_recheck())

    print(f"Missing {len(missing_summaries)} cases")
    print(missing_summaries.keys())

    succesful = fetch_missing(missing_summaries)

    for term, docket in succesful:
        known_summaries.append(missing_summaries[(term, docket)])

    print(f"Updated {len(succesful)} records!")
    if len(succesful) > 0:
        with open("./case_summaries.json", "a") as handle:
            json.dump(known_summaries, handle, indent=2)


if __name__ == "__main__":
    main()

Getting https://api.oyez.org/cases?per_page=0&filter=term:1991
Getting https://api.oyez.org/cases?per_page=0&filter=term:1992
Getting https://api.oyez.org/cases?per_page=0&filter=term:1993
Getting https://api.oyez.org/cases?per_page=0&filter=term:1994
Getting https://api.oyez.org/cases?per_page=0&filter=term:1995
Getting https://api.oyez.org/cases?per_page=0&filter=term:1996
Getting https://api.oyez.org/cases?per_page=0&filter=term:1997
Getting https://api.oyez.org/cases?per_page=0&filter=term:1998
Getting https://api.oyez.org/cases?per_page=0&filter=term:1999
Getting https://api.oyez.org/cases?per_page=0&filter=term:2000
Getting https://api.oyez.org/cases?per_page=0&filter=term:2001
Getting https://api.oyez.org/cases?per_page=0&filter=term:2002
Getting https://api.oyez.org/cases?per_page=0&filter=term:2003
Getting https://api.oyez.org/cases?per_page=0&filter=term:2004
Getting https://api.oyez.org/cases?per_page=0&filter=term:2005
Getting https://api.oyez.org/cases?per_page=0&filter=te

In [15]:
import os
from os import listdir
from os.path import isfile, join

mypath = "./case_briefs_up_to_2025"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

In [13]:
onlyfiles

['2009.08-1529-t01.json',
 '2006.05-547-t01.json',
 '2010.09-10245-t01.json',
 '2014.13-485-t01.json',
 '2009.08-7621-t01.json',
 '2002.01-1118-t01.json',
 '1993.92-1662-t01.json',
 '2008.105_orig-t01.json',
 '2012.11-697-t01.json',
 '2020.19-1414-t01.json',
 '2010.09-1298-t01.json',
 '1994.93-823-t01.json',
 '1999.8_orig-t01.json',
 '2000.00-292-t01.json',
 '2007.06-1463-t01.json',
 '2008.08-660-t01.json',
 '2015.14-1375-t01.json',
 '2023.22-807-t01.json',
 '1991.91-860-t01.json',
 '2007.06-11543-t01.json',
 '2011.10-1261-t01.json',
 '1992.92-602-t01.json',
 '1992.92-603-t01.json',
 '2000.00-201-t01.json',
 '2013.12-515-t01.json',
 '1996.96-5955-t01.json',
 '2008.08-495-t01.json',
 '1995.95-668-t01.json',
 '1992.92-357-t01.json',
 '2015.15-290-t01.json',
 '1998.98-470-t01.json',
 '2006.06-618-t01.json',
 '2009.08-1175-t01.json',
 '2004.03-855-t01.json',
 '1995.94-7427-t01.json',
 '2019.18-1109-t01.json',
 '2021.20-1472-t01.json',
 '2005.04-1067-t01.json',
 '2005.04-980-t01.json',
 '20

In [16]:
folder_path = "./transcripts_up_to_2024"

for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path):
        if filename[:-9] not in [f[:-5] for f in onlyfiles]:
            # os.remove(file_path)
            print(f"Deleted {filename}")
        

Deleted 2008.105_orig-t01.json
Deleted 1999.8_orig-t01.json
Deleted 1997.120_orig-t01.json
Deleted 2007.134_orig-t01.json
Deleted 2009.138_orig-t01.json
Deleted 2009.132_orig-t01.json
Deleted 1992.91_522-t01.json
Deleted 2003.129_orig-t01.json
Deleted 2000.130_orig-t01.json
Deleted 1991.118_orig-t01.json
Deleted 2019.18-1587-t01.json
Deleted 1992.111_orig-t01.json
Deleted 1994.108_orig-t01.json
Deleted 2010.137_orig-t01.json
Deleted 1996.84_orig-t01.json
Deleted 1995.121_orig-t01.json
Deleted 2004.128_orig-t01.json
Deleted 2000.105_orig-t01.json
Deleted 1992.108_orig-t01.json
Deleted 2004.105_orig-t01.json
Deleted 2016.15-1498-t01.json
Deleted 1994.105_orig-t01.json
Deleted 2016.15-1204-t01.json
Deleted 1991.112_orig-t01.json
