In [1]:
#default things to import

#test using BlackstoneNLP

#import standard library modules
import sys
import json
from collections import Counter
from typing import List, Any

#modules from the community
import spacy
from dataclasses import dataclass #backported this module from 3.7

#import lxml

from spacy import displacy
from spacy.lang.en import English
from spacy.pipeline import EntityRuler



from blackstone.displacy_palette import ner_displacy_options

#blackstone improved citations
from blackstone.pipeline.sentence_segmenter import SentenceSegmenter
from blackstone.rules import CITATION_PATTERNS

import en_core_web_sm


In [2]:
nlp = spacy.load('en_blackstone_proto')
sentence_segmenter = SentenceSegmenter(nlp.vocab, CITATION_PATTERNS)

ruler = EntityRuler(nlp)
#patterns = [{"label": "LEGISLATURE", "pattern": "Congress"}]
patterns2 = [{"label": "LEGISLATURE", "pattern": [{"LOWER": "congress"}], "id": "LEGISLATURE"}]
            #{"label": "GPE", "pattern": [{"LOWER": "san"}, {"LOWER": "francisco"}]}]
ruler.add_patterns(patterns2)
nlp.add_pipe(sentence_segmenter, before="parser")
nlp.add_pipe(ruler, before='ner')

In [3]:
#get the text into a variable from the .txt file
def text_from_file(filename: str) -> str:
    with open(filename, 'r') as in_file:
        data = in_file.read()
        return data

booker=text_from_file(filename='quanta_case_text.txt')
#bx=spacy.load(booker)

In [4]:
bx = nlp(booker)

In [5]:
#for ent in bx.ents:
#    print(ent.text, ent.label_)
#dir(spacy.lang)

#booker_string = text_from_file('booker_train.txt')
#import os
#os.chdir(model_data_path)
#os.listdir()
#test_string = """There was before us no dispute as to the relevant statutory scheme or the law as the judge had to apply it. There was no dispute but that the judge had to consider in particular the circumstances in which the evidence came to be made (see section 114(2)(d)), the reliability of the witness Wilson (section 114(2)(e)) and how reliable the making of the statement appears to be (section 114(2)(f)). There was no dispute between the parties that the judge was bound to apply section 114(2) in considering the propriety of reading the transcripts pursuant to section 116 (see R v Cole & Ors [2008] 1 Cr App R No 5, paragraph 6, 7 and 21). Quite apart from those specific provisions the ultimate consideration had to be and remains the fairness of allowing that course to be adopted as Pitchford LJ said in R v Ibrahim [2010] EWCA Crim 1176"""

Supreme Court of United States COURT
Thomas G. Hungar JUDGE
J. Scott Ballenger JUDGE
Barry J. JUDGE
Maxwell A. Fox JUDGE
Jeffrey T. Green JUDGE
Jeffrey P. Kushan JUDGE
*2113 PROVISION
Court of Appeals COURT
I
 JUDGE
'641 PROVISION
'379 PROVISION
'733 PROVISION
Terms 334 PROVISION
Book 124 PROVISION
'641 PROVISION
'379 PROVISION
'379 PROVISION
C 01 PROVISION
Aug. 20 PROVISION
'733 PROVISION
*2114 PROVISION
-38 PROVISION
'" JUDGE
Petitioners 8 PROVISION
Petitioners 8 PROVISION
Respondent 9 PROVISION
App. PROVISION
Petitioners 9 PROVISION
District Court COURT
*2115 PROVISION
65 USPQ 2d 1589 CASENAME
United States v. Univis Lens Co. CASENAME
316 U.S. 241 CITATION
248 F.Supp.2d 912 CITATION
Court of Appeals COURT
L.Ed.2d 805 PROVISION
Bloomer v. Millinger CASENAME
Adams v. Burke CASENAME
Henry v. A.B. Dick Co. CASENAME
224 U.S. 1 CITATION
Bauer & Cie v. O'Donnell CASENAME
229 U.S. 1 CITATION
Motion Picture Patents Co. v. Universal Film Mfg. CASENAME
243 U.S. 502 CITATION
§ 8 PROVISION
" 243

In [6]:
displacy.render(bx, style='ent', options=ner_displacy_options)
#bx = nlp()

In [67]:
for ent in bx.ents:
    print(ent.text, ent.label_)

section 114(2)(d) PROVISION
section 114(2)(e) PROVISION
section 114(2)(f) PROVISION
section 114(2) PROVISION
section 116 PROVISION
R v Cole & Ors CASENAME
[2008] 1 Cr App R No 5 CITATION
Pitchford LJ JUDGE
R v Ibrahim CASENAME
[2010] EWCA Crim 1176 CITATION


In [62]:
#display_casename_citations_filtered(doc)
def get_casename_citations_filtered(doc):
    #on
    '''Takes a spacy doc object and returns a dictionary of cases using the blackstone nlp model
    args:
        doc: the spacy doc object
    returns:
        
    '''
    cases = (i for i in doc.ents if i.label_ == 'CASENAME')
    actual_cases=[]
    results = {case.text:[item for item in case] for case in cases}
    return results

def get_actual_cases(case_list: dict) -> List:
    '''Takes the cases and removes some of the ones that are not cases
    like the ones without a v in them'''
    actual_cases = []
    for k, v in case_list.items():
        for i in v:
            if i.text == 'v.' and i.pos_ == 'ADP' and i.dep_ == 'prep':
            #if i.pos_ == 'ADP' and i.dep_ == 'prep':
                actual_cases.append(k)
    return actual_cases

def get_cases_from_doc(doc):
    return get_actual_cases(get_casename_citations_filtered(doc))

In [68]:
result_one = get_cases_from_doc(bx)
#result_one = list(clean_case_text(result_one))
#print(result_one)

In [71]:
for ent in bx.ents:
    print(ent.text, ent.label_)

section 114(2)(d) PROVISION
section 114(2)(e) PROVISION
section 114(2)(f) PROVISION
section 114(2) PROVISION
section 116 PROVISION
R v Cole & Ors CASENAME
[2008] 1 Cr App R No 5 CITATION
Pitchford LJ JUDGE
R v Ibrahim CASENAME
[2010] EWCA Crim 1176 CITATION
