# Case Law API

In [1]:
import requests
import json
from bs4 import BeautifulSoup

In [2]:
counties = ['Albany',
'Allegany',
'Bronx',
'Broome',
'Cattaraugus',
'Cayuga',
'Chautauqua',
'Chemung',
'Chenango',
'Clinton',
'Columbia',
'Cortland',
'Delaware',
'Dutchess',
'Erie',
'Essex',
'Franklin',
'Fulton',
'Genesee',
'Greene',
'Hamilton',
'Herkimer',
'Jefferson',
'Kings',
'Lewis',
'Livingston',
'Madison',
'Monroe',
'Montgomery',
'Nassau',
'New York City',
'Niagara',
'Oneida',
'Onondaga',
'Ontario',
'Orange',
'Orleans',
'Oswego',
'Otsego',
'Putnam',
'Queens',
'Rensselaer',
'Richmond',
'Rockland',
'St. Lawrence',
'Saratoga',
'Schenectady',
'Schoharie',
'Schuyler',
'Seneca',
'Steuben',
'Suffolk',
'Sullivan',
'Tioga',
'Tompkins',
'Ulster',
'Warren',
'Washington',
'Wayne',
'Westchester',
'Wyoming',
'Yates']

# Case Categorization
The `categorize_cases` function puts county specific cases in its own county dictionary entry, but it is also added to the overarching New York State category (NYS).

In [3]:
def removeHTML(s):
    html_str = BeautifulSoup(s, features='html.parser')
    return html_str.get_text()

#formats dictionary to categorize by state
def categorize_cases(all_cases, results):
    total_cases = 0
    for case in results:
        total_cases += 1
        casebody = case['casebody']['data']
        casebody = removeHTML(casebody) if not casebody is None else ''
        case_county = case['court']['name']
        info = (case['name'], case['id'], casebody, case['frontend_url'], case_county)
        for county in counties:
            if county.lower() in case_county.lower():
                if county in all_cases:
                    all_cases[county].append(info)
                else:
                    all_cases[county] = [info]
        all_cases['NYS'].append(info)
    return total_cases

# How to collect the cases from the API
The code block below has already been set up by Lily to get any desired amount of case information, but only the first 500 cases (assuming you have 500 cases in your case allowance) will contain meaningful text data. 

If each group member can collect 500 cases across 2 days we'd have a total of 5000 cases.

David: Collect cases from January and February of 2017 and earlier.  
Lily: Collect cases from March and April of 2017 and earlier.  
Kevin: Collect cases from May and June of 2017 and earlier.  
Keivan: Collect cases from July, August, and September of 2017 and earlier.  
Andrei: Collect cases from October, November, and December of 2017 and earlier.  

You can control the date range with `max_date` and `min_date` in the code block below; for instance, David's time period will be represented with  
`max_date = '&decision_date_max=2017-02-28'`  
`min_date = '&decision_date_min=2017-01-01'`  

The final code you have to change is to replace the dummy token `xyz123` in the code block below with your own authorization token you'll receive after registering on CaseLaw.

Make sure to run the code carefully since you only have 1 chance per day to retrieve all 500 cases.

In [4]:
#querying data from API

max_date = '&decision_date_max=YYYY-MM-DD'
min_date = '&decision_date_min=YYYY-MM-DD'
data = {'next': 'https://api.case.law/v1/cases/?jurisdiction=ny&full_case=true&body_format=html&page_size=100&ordering=-decision_date'}
data['next'] += max_date + min_date
all_cases = {'NYS': []}
total_cases = 0
true_total = 0
iter_count = 0
# Collect a maximum of 500 court cases due to access limits
while 'next' in data:
    # insert your API key in after "Token ..."
    r = requests.get(data['next'], headers={'Authorization' : 'Token xyz123'})
    data = r.json()
    true_total = data['count']
    total_cases += categorize_cases(all_cases, data['results'])
    iter_count += 1
    if iter_count >= 5:
        break

if true_total != total_cases:
    print('Did not get all NYS cases, but make sure you got at least some')

Did not get all NYS cases, but make sure you got at least some


# Resulting File
The resulting file will be placed in the same directory as this notebook; make sure it's not overwritten.

In [5]:
#dumping dict to static JSON
with open('./tester_static.json', 'w') as json_file:
    json.dump(all_cases, json_file)