# Code from the githuub #
If we want to obtain data outselves, if not just download and open json files

In [131]:
#!/usr/bin/env python3
"""
Script to find new/missing cases and update
the repo with that
"""

import json
from datetime import date
import traceback

import requests
# from ratelimit import limits, sleep_and_retry

YEARS_TO_GO_BACK = 2


# @sleep_and_retry
# @limits(calls=10, period=10)  # no more than 1 call per second
def get_http_json(url):
    print(f"Getting {url}")
    response = requests.get(url)
    parsed = response.json()
    return parsed


def get_case(term, docket):
    """Get the info of the case and fetch all
    transcripts that the info links to"""
    url = f"https://api.oyez.org/cases/{term}/{docket}"
    docket_data = get_http_json(url)

    if not (
        "oral_argument_audio" in docket_data and docket_data["oral_argument_audio"]
    ):
        # no oral arguments for this case yet
        # fail so we will try again later
        print(f"No oral arguments for docket {docket}")
        return (docket_data, [])

    oral_argument_audio = docket_data["oral_argument_audio"]
    transcripts = []
    for link in oral_argument_audio:
        t = get_http_json(link["href"])
        transcripts.append(t)

    return docket_data, transcripts


# def write_case(term, docket, docket_data, transcripts):
#     """
#     Writes term-docket.json file with docket_data
#     For each transcript, writes the term-docket-t##.json file
#     """
#     with open(f"oyez/cases/{term}.{docket}.json", "w") as docket_file:
#         json.dump(docket_data, docket_file, indent=2)

#     count = 0
#     for t in transcripts:
#         count += 1
#         t_filename = "oyez/cases/{}.{}-t{:0>2d}.json".format(term, docket, count)
#         with open(t_filename, "w") as t_file:
#             json.dump(t, t_file, indent=2)


# def fetch_missing(cases):
#     """
#     cases is a map of tuples to Summary (term, docket) : {SUMMARY}
#     For each case, fetch the docket and transcript data and write to a file
    
#     return set of cases that this was succesful for
#     """
#     count = 0
#     total = len(cases)
#     succesful = set()
#     for term, docket in cases.keys():
#         ## pull the file
#         count += 1
#         print(f"Trying: {term}/{docket}\t\t{count}/{total}")
#         try:
#             docket_data, transcripts = get_case(term, docket)
#             if not transcripts:
#                 # No transcripts for this case yet
#                 continue

#             write_case(term, docket, docket_data, transcripts)
#             succesful.add((term, docket))
#         except Exception as exc:
#             traceback.print_exc()
#             print(f"Failed for {term}/{docket}, continuing anyways")
#     return succesful


# def load_known_cases():
#     with open("oyez/case_summaries.json") as handle:
#         known_summaries = json.load(handle)
#     known_map = {
#         (summary["term"], summary["docket_number"]): summary
#         for summary in known_summaries
#     }
#     return (known_summaries, known_map)


# def find_missing(known_map, years):
#     """
#     Fetch all summaries for given years and find any that are
#     missing in the local "known_map"
#     """
#     to_fetch = {}
#     for year in years:
#         summary_url = f"https://api.oyez.org/cases?per_page=0&filter=term:{year}"
#         summaries = get_http_json(summary_url)
#         for summary in summaries:
#             if (summary["term"], summary["docket_number"]) not in known_map:
#                 to_fetch[(summary["term"], summary["docket_number"])] = summary

#     return to_fetch


# def years_to_recheck():
#     """
#     Makes a list of years going back to
#     YEARS_TO_GO_BACK
#     e.g. [2018, 2019]
#     """
#     cur_year = date.today().year
#     return list(range(cur_year - YEARS_TO_GO_BACK + 1, cur_year + 1))


# def main():
#     """
#     Find any cases that the server is updated with but we don't have locally
#     and fetch the case info and transcripts for them.
#     For all cases this is succesful for, also update case_summaries
#     """
#     (known_summaries, known_map) = load_known_cases()
#     missing_summaries = find_missing(known_map, years_to_recheck())

#     print(f"Missing {len(missing_summaries)} cases")
#     print(missing_summaries.keys())

#     succesful = fetch_missing(missing_summaries)

#     for term, docket in succesful:
#         known_summaries.append(missing_summaries[(term, docket)])

#     print(f"Updated {len(succesful)} records!")
#     if len(succesful) > 0:
#         with open("oyez/case_summaries.json", "w") as handle:
#             json.dump(known_summaries, handle, indent=2)


# if __name__ == "__main__":
#     main()

In [117]:
term = '1965'
docket = '14_orig'
res = get_case(term, docket)

Getting https://api.oyez.org/cases/1965/14_orig
Getting https://api.oyez.org/case_media/oral_argument_audio/14026
Getting https://api.oyez.org/case_media/oral_argument_audio/15067
Getting https://api.oyez.org/case_media/oral_argument_audio/15465


In [114]:
def getTranscript(transcripts):
    output = ''
    for t in transcripts:
        sections = t['transcript']['sections']
        for section in sections:
            turns = section['turns']
            for turn in turns:
                try:
                    speaker = ' <' + turn['speaker']['name'] + '> '
                except:
                    speaker = ' <UNK> '
                output += speaker
                texts = turn['text_blocks']
                for text in texts:
                    output += (' ' + text['text'])
    return output

In [132]:
getTranscript(transcripts)

' <Earl Warren>  Number 14, Original, State of Louisiana, Plaintiff, versus the State of Mississippi. Mr. Madden, you may proceed with your arguments. <John L. Madden>  Mr. Chief Justice and Associate Justices of this Honorable Court. A first few words that I shall say will -- I will -- ameliorating to this Court, because I said substantially the same thing I gave before standing before this Court when I had the privilege of doing so I introduced the argument in this manner, my recollection of how we came into this Court and why. And we have a new justice on the Supreme Court in the meantime. And I\'ll just simply say that this controversy had its inception in a suit that was filed by certain Mississippi landowners and a District Court of the United States in Mississippi. It was directed solely against Humble Oil and Refining Company, Louisiana\'s mineral lessee. The Mississippi landowners alleged in their complaint, as they were the owners of an oil well which was drilled by Carter Oi

In [129]:
def getAudio(transcripts):
    num_files = len(transcripts)
    audio_list = []
    for t in transcripts:
        media_dicts = t['media_file']
        #just incase theres more than one, there shouldnt be but they re in a weird list
        for media_dict in media_dicts:
            audio_list.append(media_dict['href'])
    return [num_files,audio_list]
            

In [130]:
getAudio(transcripts)

[3,
 ['https://s3.amazonaws.com/oyez.case-media.mp3/case_data/1963/14%20ORIG/19651116a_14orig.delivery.mp3',
  'https://s3.amazonaws.com/oyez.case-media.mp3/case_data/1965/14%20ORIG/19631210a_14orig_part1.delivery.mp3',
  'https://s3.amazonaws.com/oyez.case-media.mp3/case_data/1965/14%20ORIG/19631210a_14orig_part2.delivery.mp3']]