Downloaded "All court data" from https://www.courtlistener.com/api/bulk-data/courts/all.tar.gz and put it in the directory courts-all

In [12]:
import requests
from bs4 import BeautifulSoup

In [20]:
try:
    # getting list of federal court circuits and districts from file
    f = open('./html/USFedCircuitComposition.html')
    page = f.read()
    assert len(page) > 100
    soup = BeautifulSoup(page, 'html.parser')
    
except:
    print("List of federal court circuits and districts not found, retrieving from Wikipedia instead")
    r = requests.get('https://en.m.wikipedia.org/w/index.php?title=Template:USFedCircuitComposition&mobileaction=toggle_view_mobile')
    # saving the list to a file
    f = open('./html/USFedCircuitComposition.html', "w")
    f.write(r.text)
    soup = BeautifulSoup(r.text, 'html.parser')
    

list of federal court circuits and districts not found, retrieving from Wikipedia instead


In [21]:
fedTable = soup.table


In [22]:
import pprint

appealsTo = {}

for line in fedTable.find_all(['p', 'li']):
    if line.a.get('title') != None:
        title = line.a.get('title').strip()
        if title.startswith('United States Court of Appeals') and title.endswith('Circuit'):
            circuit = title
        if title == circuit:
            appealsTo[title] = "Supreme Court of the United States"
        else:
            appealsTo[title] = circuit
            
pprint.pprint(appealsTo)
                

{'Armed Services Board of Contract Appeals': 'United States Court of Appeals '
                                             'for the Federal Circuit',
 'Bureau of Justice Assistance': 'United States Court of Appeals for the '
                                 'Federal Circuit',
 'Civilian Board of Contract Appeals': 'United States Court of Appeals for the '
                                       'Federal Circuit',
 'District Court for the Northern Mariana Islands': 'United States Court of '
                                                    'Appeals for the Ninth '
                                                    'Circuit',
 'District Court of Guam': 'United States Court of Appeals for the Ninth '
                           'Circuit',
 'District Court of the Virgin Islands': 'United States Court of Appeals for '
                                         'the Third Circuit',
 'Government Accountability Office': 'United States Court of Appeals for the '
                                

In [26]:
# Turns the CourtListener format of federal court names into the format used by Wikipedia.

def longFedCircuitName(fullName):
    if fullName.startswith("Court of Appeals"):
        fullName = fullName.replace("Court of Appeals", "United States Court of Appeals", 1)
    return fullName

def longFedDistrictName(fullName):
    fullName = fullName.replace(",", " for the").replace("D.", "District of").replace("C.", "Central ").replace("M.", "Middle ")
    fullName = fullName.replace("E.", "Eastern ").replace("W.", "Western ").replace("S.", "Southern ").replace("N.", "Northern ")
    if not fullName.endswith(("Northern Mariana Islands","Guam","Virgin Islands")):
        if fullName.startswith("District Court"):
            fullName = fullName.replace("District Court", "United States District Court", 1)
    return fullName

def longFedCourtName(fullName, j):
    assert j in ('F', 'FD', 'FS')
    if j in ('FD', 'FS'):
        return longFedDistrictName(fullName)
    if j == 'F':
        return longFedCircuitName(fullName)

longFedCourtName('District Court, N.D. Mississippi', 'FD')

'United States District Court for the Northern District of Mississippi'

In [30]:
import json, os

foldername = "./courts-all/"

courts = []

for dirpath, dirnames, filenames in os.walk(foldername):
    for filename in filenames:
        source = foldername + filename
        markup = open(source)
        court = json.load(markup)
        if "jurisdiction" in court:
            if court['jurisdiction'] in ('F', 'FD', 'FS'): # will need to import other kinds of courts later
                courts.append(court)

for court in courts:
    court['unabbreviated_name'] = longFedCourtName(court['full_name'], court['jurisdiction'])


In [34]:
# This is failing because I don't have a way to handle the defunct court
# A comprehensive solution would capture the date range that every court appealed to every other one...
# Remember to add WikiData URIs for each court
# This is fun!
                
for court in courts:
    if not court['appeals_to']:
        if court['unabbreviated_name'] == 'United States Supreme Court':
            court['appeals_to'] = None
        else:
            court['appeals_to'] = appealsTo[court['unabbreviated_name']]

KeyError: 'appeals_to'

In [77]:
appealsTo[longFedCourtName('District Court, N.D. California')]

'United States Court of Appeals for the Ninth Circuit'

In [82]:
courts

[{'citation_string': 'W.D. Tex.',
  'date_modified': '2013-10-22T15:56:39.023000Z',
  'end_date': None,
  'full_name': 'District Court, W.D. Texas',
  'has_opinion_scraper': False,
  'has_oral_argument_scraper': False,
  'in_use': True,
  'jurisdiction': 'FD',
  'position': 251.0,
  'resource_uri': 'http://www.courtlistener.com/api/rest/v3/courts/txwd/',
  'short_name': 'W.D. Texas',
  'start_date': None,
  'unabbreviated_name': 'United States District Court for the Western District of Texas',
  'url': 'http://www.txwd.uscourts.gov/'},
 {'citation_string': 'N.D. Ind.',
  'date_modified': '2017-04-21T21:42:23.700554Z',
  'end_date': None,
  'full_name': 'District Court, N.D. Indiana',
  'has_opinion_scraper': False,
  'has_oral_argument_scraper': False,
  'in_use': True,
  'jurisdiction': 'FD',
  'position': 211.0,
  'resource_uri': 'http://www.courtlistener.com/api/rest/v3/courts/innd/',
  'short_name': 'N.D. Indiana',
  'start_date': '1928-04-21',
  'unabbreviated_name': 'United State

In [12]:
import pandas as pd

df = pd.DataFrame(courts)
df = df.set_index('position').sort_index()

In [13]:
df

Unnamed: 0_level_0,citation_string,date_modified,end_date,full_name,has_opinion_scraper,has_oral_argument_scraper,in_use,jurisdiction,resource_uri,short_name,start_date,url
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
140.0,FISA Ct. Rev.,2014-05-27T15:58:53.457000Z,,Foreign Intelligence Surveillance Court of Review,False,False,True,FS,http://www.courtlistener.com/api/rest/v3/court...,Foreign Intelligence Surveillance Court of Review,1978-10-25,http://www.fisc.uscourts.gov/
156.0,Bankr. N.D. Ala.,2013-08-15T01:21:42Z,,"United States Bankruptcy Court, N.D. Alabama",False,False,True,FB,http://www.courtlistener.com/api/rest/v3/court...,N.D. Alabama,,http://www.alnb.uscourts.gov/
187.0,Bankr. D. Kan.,2013-08-14T23:43:21Z,,"United States Bankruptcy Court, D. Kansas",False,False,True,FB,http://www.courtlistener.com/api/rest/v3/court...,D. Kansas,,http://www.ksb.uscourts.gov/
189.2,Bankr. W.D. Mich.,2013-08-15T00:48:40Z,,"United States Bankruptcy Court, W.D. Michigan",False,False,True,FB,http://www.courtlistener.com/api/rest/v3/court...,W.D. Michigan,,http://www.miwb.uscourts.gov/
189.35,Bankr. S.D. Miss.,2013-08-15T16:05:56Z,,"United States Bankruptcy Court, S.D. Mississippi",False,False,True,FB,http://www.courtlistener.com/api/rest/v3/court...,S.D. Mississippi,,http://www.mssb.uscourts.gov/
240.55,,2017-03-07T23:09:13.812273Z,,"District Court, D. Michigan",False,False,False,FD,http://www.courtlistener.com/api/rest/v3/court...,D. Michigan,,
246.0,W.D. Okla.,2013-08-14T22:23:53Z,,"District Court, W.D. Oklahoma",False,False,True,FD,http://www.courtlistener.com/api/rest/v3/court...,W.D. Oklahoma,,http://www.uscourts.gov/
299.4,N. Mar. I.,2013-08-15T19:41:12Z,,"District Court, Northern Mariana Islands",False,False,True,FD,http://www.courtlistener.com/api/rest/v3/court...,Northern Mariana Islands,,http://www.nmid.uscourts.gov/
322.0,Ariz.,2013-08-14T16:46:30Z,,Arizona Supreme Court,True,False,True,S,http://www.courtlistener.com/api/rest/v3/court...,Arizona Supreme Court,,http://www.azcourts.gov/azsupremecourt.aspx
359.1,Haw. App.,2013-08-14T22:42:16Z,,Hawaii Intermediate Court of Appeals,True,False,True,SA,http://www.courtlistener.com/api/rest/v3/court...,Hawaii Intermediate Court of Appeals,,http://www.courts.state.hi.us/


In [14]:
os.walk(foldername)

<generator object walk at 0x10d1e7308>

In [15]:
help(os.walk)

Help on function walk in module os:

walk(top, topdown=True, onerror=None, followlinks=False)
    Directory tree generator.
    
    For each directory in the directory tree rooted at top (including top
    itself, but excluding '.' and '..'), yields a 3-tuple
    
        dirpath, dirnames, filenames
    
    dirpath is a string, the path to the directory.  dirnames is a list of
    the names of the subdirectories in dirpath (excluding '.' and '..').
    filenames is a list of the names of the non-directory files in dirpath.
    Note that the names in the lists are just names, with no path components.
    To get a full path (which begins with top) to a file or directory in
    dirpath, do os.path.join(dirpath, name).
    
    If optional arg 'topdown' is true or not specified, the triple for a
    directory is generated before the triples for any of its subdirectories
    (directories are generated top down).  If topdown is false, the triple
    for a directory is generated after the 

In [5]:
import json

courts = json.loads(r.text)

In [6]:
courts

{'count': 419,
 'next': 'https://www.courtlistener.com/api/rest/v3/courts/?Authorization=Token+6844d0c4a5c70e28ec91668b51c60422db6250c3&page=2',
 'previous': None,
 'results': [{'citation_string': 'SCOTUS',
   'date_modified': '2014-10-31T01:59:15.952000Z',
   'end_date': None,
   'fjc_court_id': '',
   'full_name': 'Supreme Court of the United States',
   'has_opinion_scraper': True,
   'has_oral_argument_scraper': True,
   'in_use': True,
   'jurisdiction': 'F',
   'pacer_court_id': None,
   'position': 1.0,
   'resource_uri': 'https://www.courtlistener.com/api/rest/v3/courts/scotus/',
   'short_name': 'Supreme Court',
   'start_date': '1789-09-24',
   'url': 'http://supremecourt.gov/'},
  {'citation_string': '1st Cir.',
   'date_modified': '2014-10-31T01:57:47.787000Z',
   'end_date': None,
   'fjc_court_id': '1',
   'full_name': 'Court of Appeals for the First Circuit',
   'has_opinion_scraper': True,
   'has_oral_argument_scraper': True,
   'in_use': True,
   'jurisdiction': 'F',
