In [1]:
#test using BlackstoneNLP

#import standard library modules
import sys
import json
from collections import Counter
from typing import List, Any

#modules from the community
import spacy
from dataclasses import dataclass #backported this module from 3.7

#import lxml

from spacy import displacy
from blackstone.displacy_palette import ner_displacy_options

#blackstone improved citations
from blackstone.pipeline.sentence_segmenter import SentenceSegmenter
from blackstone.rules import CITATION_PATTERNS

import en_core_web_sm


#BeautifulSoup modules

In [2]:
nlp = spacy.load('en_blackstone_proto') #the blackstone model
nlp2 = en_core_web_sm.load()
#load sentence segmenter
sentence_segmenter = SentenceSegmenter(nlp.vocab, CITATION_PATTERNS)
nlp.add_pipe(sentence_segmenter, before="parser")

#nlp=spacy.load('en_core_web_sm') #the default spacy model

In [5]:

def doc_from_file(filename: str, model: Any):
    '''Takes a specified already cleaned file, loads it, and produces a document object'''
    with open(filename, 'r') as in_file:
        data = in_file.read()
        doc = model(data)
        return doc

bx = doc_from_file(filename='cleaned_shreveport.txt', model=nlp)

<b>In the section above we can see the law's text displayed using displaCy. We have highlighted the cases, provisions, laws, etc in the text.</b>


In [6]:
#display_casename_citations_filtered(doc)
def get_casename_citations_filtered(doc):
    #one
    '''Takes a spacy doc object and returns a dictionary of cases using the blackstone nlp model
    args:
        doc: the spacy doc object
    returns:
        
    '''
    cases = (i for i in doc.ents if i.label_ == 'CASENAME')
    actual_cases=[]
    results = {case.text:[item for item in case] for case in cases}
    return results

def get_actual_cases(case_list: dict) -> List:
    '''Takes the cases and removes some of the ones that are not cases
    like the ones without a v in them'''
    actual_cases = []
    for k, v in case_list.items():
        for i in v:
            if i.text == 'v.' and i.pos_ == 'ADP' and i.dep_ == 'prep':
            #if i.pos_ == 'ADP' and i.dep_ == 'prep':
                actual_cases.append(k)
    return actual_cases

def get_cases_from_doc(doc):
    return get_actual_cases(get_casename_citations_filtered(doc))

In [7]:
result_one = get_cases_from_doc(bx)

In [8]:
def clean_case_text(case_list: List[str]) -> List[str]:
    '''Takes a list of cases and removes the newline characters at the end'''
    clean_cases = (str(i) for i in case_list)
    clean_cases = (i.rstrip('\n') for i in case_list)
    return clean_cases

In [9]:
result_one = list(clean_case_text(result_one))
print(result_one)

['Gibbons v. Ogden', 'Mobile County v. Kimball', 'Smith v. Alabama', 'Minnesota Rate Cases (Simpson v. Shepard)', 'Southern R. Co. v. United States,', 'Interstate Commerce Commission v. Goodrich Transit Co.', 'Illinois C. R. Co. v. Behrens', "Employers' Liability Cases (Howard v. Illinois C. R. Co.)", 'N. R. Co. v. Eubank']


In [10]:
#print(bx)
displacy.render(bx, style='ent', options=ner_displacy_options)

In [18]:
#get a list of tags labelled "JUDGE"
judges = []
for ent in bx.ents:
    if ent.label_ == 'JUDGE':
        print(ent.text, ent.label_)
        judges.append(ent)

WEST TEXAS RAILWAY JUDGE
RACIFIC RAILWAY COMPANY JUDGE
James G. Wilson JUDGE
Thomas J. Freeman JUDGE
Mr. Justice Hughes JUDGE
Mr. Justice JUDGE


In [42]:
#Now that we've gotten the judges, let's remove the ones who aren't actually judges
cleaned_judges = [str(i) for i in cleaned_judges]
cleaned_judges2 = [nlp2(i) for i in cleaned_judges]

In [56]:
cleaned_judges3 = []

print(cleaned_judges2)
for i in cleaned_judges2:
    for token in i:
        #print(token.text,token.pos_, token.ent_type)
        if token.ent_type == 380:
            cleaned_judges3.append(i)
print(cleaned_judges3)

[WEST TEXAS RAILWAY, RACIFIC RAILWAY COMPANY, James G. Wilson, Thomas J. Freeman, Mr. Justice Hughes, Mr. Justice]
[James G. Wilson, James G. Wilson, James G. Wilson, Thomas J. Freeman, Thomas J. Freeman, Thomas J. Freeman, Mr. Justice Hughes, Mr. Justice]


In [59]:
cleaned_judges3 = list(set(cleaned_judges3))
cleaned_judges3

[James G. Wilson, Mr. Justice Hughes, Mr. Justice, Thomas J. Freeman]

In [None]:
def remove_statutes(case_list: List):
    '''removes statutes, each of which have a numeric elemnt'''
    pass

In [None]:
#how to get the cases
#get_actual_cases(get_casename_citations_filtered(doc))

In [29]:
#a = ''
#dir(a.ljust(20))

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',
 'zfill']

In [None]:
cases_found = get_cases_from_doc(doc)
print('Cases found: ', [(i, i.label_) for i in cases_found])

In [21]:
#print(dir(spacy.tokens.Token))
#for i in cases_found:
#    for token in i:
#        print(token.text, token.pos_, token.dep_, [i for i in token.lefts], [i for i in token.rights])

In [385]:
@dataclass
class CaseWithNLP:
    case_name: str
    name_token: spacy.tokens.Token
    plaintiff: spacy.tokens.Token
    respondant: spacy.tokens.Token
    

In [388]:
def get_case_list(doc):
    result = get_cases_from_doc(doc)
    return result

In [390]:
res = make_case_with_nlp(doc)
print(res)

[United States v. Standard Oil Co., Dalehite v. United States,]


In [None]:
def 