In [1]:
import simplejson as json  
import os
import lzma
import re
import pandas as pd
import numpy as np

In [2]:
os.chdir("/Users/samk/Documents/GitHub/UNH_MS_Work/data/raw/New Hampshire-20180831-text/data/")


In [3]:
#Loading data
cases = []
 
with lzma.open('data.jsonl.xz') as infile:
    for line in infile:
        #decode the file into a convenient format
        record = json.loads(str(line, 'utf-8'))
        #if the decision date on the case matches one we're interested in, add to our list
        cases.append(record)

print("Number of Cases: {}".format(len(cases)))

Number of Cases: 21266


In [4]:
#Extracting meta data and text of cases
def parse_cases(case):
    
    elem_dict = {}
    try:
        elem_dict["id"] = case['id']
    except IndexError:
        elem_dict["id"] = np.nan
    try:
        elem_dict["name"] = case['name']
    except IndexError:
        elem_dict["name"] = np.nan
    try:   
        elem_dict["name_abbreviation"] = case['name_abbreviation']
    except IndexError:
        elem_dict["name_abbreviation"] = np.nan
    try:
        elem_dict["parties"] = case['casebody']['data']['parties']
    except IndexError:
         elem_dict["parties"] = np.nan
    try:
        elem_dict["plaintiff"] = case['name_abbreviation'].split('v.')[0].strip()
    except IndexError:
        elem_dict["plaintiff"] = np.nan
    try:
        elem_dict["defendant"] = case['name_abbreviation'].split('v.')[1].strip()
    except IndexError:
        elem_dict["defendant"] = np.nan
    try:
        elem_dict["decision_date"] = case['decision_date']
    except IndexError:
        elem_dict["decision_date"] = np.nan
    try:
        elem_dict["docket_number"] = case['docket_number']
    except IndexError:
        elem_dict["docket_number"] = np.nan
        
    citation = [citation for citation in case['citations'] if citation['type'] == 'official'][0]
    elem_dict["citation_type"] = citation['type']
    elem_dict["citation_num"] = citation['cite']
    try:
        elem_dict["reporter"] = case['reporter']['full_name']
    except IndexError:
        elem_dict["reporter"] = np.nan
    try:
        elem_dict["court_id"] = case['court']['id']
    except IndexError:
        elem_dict["court_id"] = np.nan
    elem_dict["court_name"] = case['court']['name']
    elem_dict["jurisdiction_id"] = case['jurisdiction']['id']
    elem_dict["jurisdiction_name"] = case['jurisdiction']['name_long']
    try:
        elem_dict["plaintiff_attorneys"] = case['casebody']['data']['attorneys'][0]
    except IndexError:
        elem_dict["plaintiff_attorneys"] = np.nan
    try:
        elem_dict["defendants_attorneys"] = case['casebody']['data']['attorneys'][1]
    except IndexError:
        elem_dict["defendants_attorneys"] = np.nan
    elem_dict["judges"] = case['casebody']['data']['judges']
    opinion_type = [opinion['type'] for opinion in case['casebody']['data']['opinions']]
    elem_dict["opinion_type"]= opinion_type
    opinion_author = [opinion['author'] for opinion in case['casebody']['data']['opinions']]
    elem_dict["opinion_author"] = opinion_author
    elem_dict["head_matter"] = case['casebody']['data']['head_matter']
    opinion_text = [opinion['text'] for opinion in case['casebody']['data']['opinions']]
    elem_dict["opinion_text"] = opinion_text
    
    return elem_dict

In [5]:
#parsing the cases
parsed_cases = []
for case in cases:
    doc = parse_cases(case)
    parsed_cases.append(doc)

In [6]:
#Dumping Cases in Pandas DF
cases_df = pd.DataFrame(parsed_cases)
cases_df.head()

Unnamed: 0,citation_num,citation_type,court_id,court_name,decision_date,defendant,defendants_attorneys,docket_number,head_matter,id,...,jurisdiction_name,name,name_abbreviation,opinion_author,opinion_text,opinion_type,parties,plaintiff,plaintiff_attorneys,reporter
0,144 N.H. 131,official,8797,New Hampshire Supreme Court,1999-08-30,,"Tober Law Offices, P.A., of Portsmouth (Stephe...",No. LD-97-009,"No. LD-97-009\nFeld’s Case\nAugust 30, 1999\nT...",105092,...,New Hampshire,Feld’s Case,Feld’s Case,"[JOHNSON, J.]","[JOHNSON, J.\nThe Supreme Court Committee on P...",[majority],[Feld’s Case],Feld’s Case,"Griffith & Associates, PLLC, of Wilton (John P...",New Hampshire Reports
1,144 N.H. 13,official,8797,New Hampshire Supreme Court,1999-07-13,Bennett,"Janice S. Peterson, public defender, of Keene,...",No. 97-387,No. 97-387\nThe State of New Hampshire v. Eric...,105095,...,New Hampshire,The State of New Hampshire v. Eric Bennett,State v. Bennett,"[BRODERICK, j., THAYER, J.,]","[BRODERICK, j.\nAfter a jury trial in Superior...","[majority, concurrence]",[The State of New Hampshire v. Eric Bennett],State,"Philip T. McLaughlin, attorney general (John C...",New Hampshire Reports
2,144 N.H. 138,official,8797,New Hampshire Supreme Court,1999-08-30,,"Shaines & McEachern, P.A., of Portsmouth (Paul...",No. LD-97-008,"No. LD-97-008\nRoberge’s Case\nAugust 30, 1999...",105099,...,New Hampshire,Roberge’s Case,Roberge’s Case,[PER CURIAM.],[MEMORANDUM OPINION\nPER CURIAM.\nThe Supreme ...,[majority],[Roberge’s Case],Roberge’s Case,"Griffith & Associates, PLLC, of Wilton (John P...",New Hampshire Reports
3,144 N.H. 107,official,8797,New Hampshire Supreme Court,1999-08-03,,"Wiggin & Nourie, P.A., of Manchester (Scott A....",No. 97-399,"No. 97-399\nAppeal of Donald E. Savage, Jr. (N...",105102,...,New Hampshire,"Appeal of Donald E. Savage, Jr. (New Hampshire...",Appeal of Savage,"[BRODERICK, J.]","[BRODERICK, J.\nThe petitioner, Donald E. Sava...",[majority],"[Appeal of Donald E. Savage, Jr. (New Hampshir...",Appeal of Savage,"Fitzgerald, & Sessler, P.A., of Laconia (Shawn...",New Hampshire Reports
4,144 N.H. 44,official,8797,New Hampshire Supreme Court,1999-07-21,,"Philip T. McLaughlin, attorney general (Dougla...",No. 97-522,"No. 97-522\nAppeal of William H. Morgan, R.PH....",105108,...,New Hampshire,"Appeal of William H. Morgan, R.PH. (New Hampsh...",Appeal of Morgan,"[HORTON, J.]","[HORTON, J.\nThe petitioner, William H. Morgan...",[majority],"[Appeal of William H. Morgan, R.PH. (New Hamps...",Appeal of Morgan,"William, II. Loftus, P.C., of Lebanon (William...",New Hampshire Reports


In [7]:
cases_df.to_csv('/Users/samk/Documents/GitHub/UNH_MS_Work/data/processed/New_Hampshire.csv', index = False)

In [11]:
list_of_citations=[]
for i in range(cases_df.shape[0]):
   # phrases starting with 'see', case-insensitive
   list_see_citations = (re.findall("see\s.*\d{4}\)", cases_df['opinion_text'][i][0], flags=re.IGNORECASE))

   # phrases with 'also', case-insensitive
   list_also_citations = (re.findall("[^s].also\s.*\d{4}\)", cases_df['opinion_text'][i][0], flags=re.IGNORECASE))

   # phrases that have the 'v.' and end in a year with format 'XXXX)', it also grabs up to 50 characters preceding the 'v.', or a period, whichever comes first
   list_versus_citations = (re.findall("\.+\s.{0,50}[v]\..{0,100}[0-9]{4}\)", cases_df['opinion_text'][i][0], flags=re.IGNORECASE|re.MULTILINE))

   if len(list_see_citations)!=0:
       print(i, list_see_citations)
       print(i, list_also_citations)
       print(i, list_versus_citations)
       print()
       print()
       list_of_citations.append([i,list_see_citations, list_also_citations])
   else:
       pass


0 ['See 12 V.S.A. § 4711 (courts have power “to declare rights, status and other legal relations”); Robtoy v. City of St. Albans, 132 Vt. 503, 504, 321 A.2d 45, 46 (1974)', 'See Morgan v. Kroupa, 167 Vt. 99, 104, 702 A.2d 630, 633 (1997). Findings are viewed in the light most favorable to the judgment, disregarding modifying evidence, and will not be disturbed merely because they are contradicted by substantial evidence; rather, an appellant must show that there is no credible evidence to support the finding. See Bianchi v. Lorenz, 166 Vt. 555, 562, 701 A.2d 1037, 1041 (1997)', 'See State v. Fisher, 167 Vt. 36, 43, 702 A.2d 41, 45-46 (1997)', 'See Jensvold v. Town & Country Motors, Inc., 162 Vt. 580, 584, 649 A.2d 1037, 1041 (1994)', 'See Bianchi, 166 Vt. at 562, 701 A.2d at 1041 (findings must be upheld if supported by any credible evidence); see also Refac Int’l, Ltd. v. Hitachi, Ltd., 921 F.2d 1247, 1255 (Fed. Cir. 1990) (extent of party’s noncompliance with discovery request suppor



206 ['see Samplid Enters., Inc. v. First Vermont Bank, 165 Vt. 22, 25, 676 A2d 774, 776 (1996)', 'See Massachusetts Mun. Wholesale Elec. Co. v. State, 161 Vt. 346, 363, 639 A.2d 995, 1006 (1994). The requirement of an actual or justiciable controversy means that the consequences of the dispute “must be so set forth that the court can see that they are not based upon fear or anticipation but are reasonably to be expected.” Id. (citation omitted). The concept, which we first expressed in Village of Bennington v. Hawks, 100 Vt. 37, 134 A. 638 (1926)']
206 []
206 ['. v. First Vermont Bank, 165 Vt. 22, 25, 676 A2d 774, 776 (1996)', '. See Massachusetts Mun. Wholesale Elec. Co. v. State, 161 Vt. 346, 363, 639 A.2d 995, 1006 (1994)']


207 ['See Thompson v. Dewey’s S. Royalton, Inc., 169 Vt. 274, 276, 733 A.2d 65, 67 (1999) (questions of law are reviewed on nondeferential and plenary basis); State v. Koch, 169 Vt. 109, 112, 730 A.2d 577, 580 (1999)', 'See 24 V.S.A. § 872; 24 V.S.A. § 1236(1

332 []
332 ['. See State v. Welch, 162 Vt. 635, 636, 650 A.2d 516, 517 (1994)']


333 ['See White Current Corp. v. Vermont Elec. Coop., 158 Vt. 216, 223, 609 A.2d 222, 226 (1992). Absent an abuse of discretion, a trial court’s imposition of discovery sanctions will not be disturbed. See id.; see also Agency of Natural Resources v. Glens Falls Ins. Co., 169 Vt. 426, 434, 736 A.2d 768, 773 (1999)', 'See State v. Bernier, 157 Vt. 265, 268, 597 A.2d 789, 791 (1991). Moreover, an erroneous evidentiary ruling is grounds for reversal only if it affects a substantial right of the party. See In re B.S., 163 Vt. 445, 454, 659 A.2d 1137, 1143 (1995)', 'See Everett v. Town of Bristol, 164 Vt. 638, 639, 674 A.2d 1275, 1277 (1996)', 'See Gallerani & Sons, Inc. v. State Highway Bd., 133 Vt. 485, 486, 346 A.2d 529, 530 (1975)', 'See Quirion v. Forcier, 161 Vt. 15, 21, 632 A.2d 365, 369 (1993)', 'See State v. Perry, 151 Vt. 637, 642, 563 A.2d 1007, 1011 (1989)', 'See Haynes v. Golub Corp., 166 Vt. 228,

494 ['See Langle v. Kurkul, 146 Vt. 513, 517, 510 A.2d 1301, 1304 (1986). Clearly, the first of these elements, duty, is central to a negligence claim, and its existence is primarily a question of law. Denis Bail Bonds, Inc. v. State, 159 Vt. 481, 487, 622 A.2d 495, 499 (1993). The imposition of a duty is “‘an expression of the sum total of those considerations of policy which lead the law to say that the plaintiff is entitled to protection.’” Id. (quoting W Prosser & W Keeton, The Law of Torts § 53, at 358 (5th ed. 1984)', 'See Restatement (Second) of Torts § 314A(3) (1965)', 'See Langle, 146 Vt. at 519-20, 510 A.2d at 1304-05. These factors may include the degree of certainty that plaintiff suffered injury, the closeness of the connection between defendant’s conduct and plaintiff’s injury, the moral blame attached to defendant’s conduct, the policy of preventing future harm, the burden to the defendant, the consequences to the community of finding a duty, and the availability and cos

953 []
953 []


954 ['See 13 V.S.A. §§ 4941-4968. The proceeding is not a criminal prosecution, but rather “a unique statutory procedure aimed at implementing the extradition provision of the federal constitution.” Lovejoy v. State, 148 Vt. 239, 242, 531 A.2d 921, 923 (1987)']
954 []
954 []


955 ['See State v. Ross, 152 Vt. 462, 465, 568 A.2d 335, 337 (1989). And children who are repeatedly sexually abused over several months or years have difficulty establishing the dates of particular assaults. See, e.g., People v. Naugle, 393 N.W.2d 592, 596 (Mich. Ct. App. 1986)', 'See State v. Gomes, 162 Vt. 319, 322, 648 A.2d 396, 399 (1994)', 'See id. at 581. In this case, the record indicates that the State made efforts to narrow the time frame, but the children were unable to be more specific than setting the date at a time during which they lived on Coventry Street. See id. (whether indictment is sufficiently specific must be decided case-by-case by considering all relevant facts; one factor

1941 ['See 1 Rev. Stat. of Indiana (1852)']
1941 []
1941 ['. See 1 Rev. Stat. of Indiana (1852)']


2254 ['See also Robbins v. Chicago, 4 Wall. 652. But I do not care to dwell on my personal views, further than to state that this part of the ruling was by a? divided court. The master finds that “ the orator’s care and services during that time (from July 27, 1874 to May 17, 1875)']
2254 ['e also Robbins v. Chicago, 4 Wall. 652. But I do not care to dwell on my personal views, further than to state that this part of the ruling was by a? divided court. The master finds that “ the orator’s care and services during that time (from July 27, 1874 to May 17, 1875)']
2254 []




IndexError: list index out of range

In [27]:
cases[0]

{'id': 174216,
 'name': 'Agency of Natural Resources, State of Vermont v. Glens Falls Insurance Co., Continental Insurance Co., Liberty Mutual Insurance Co. and Tamarack Services of South Burlington',
 'name_abbreviation': 'Agency of Natural Resources v. Glens Falls Insurance',
 'decision_date': '1999-06-25',
 'docket_number': 'No. 98-073',
 'first_page': '426',
 'last_page': '437',
 'citations': [{'type': 'parallel', 'cite': '736 A.2d 768'},
  {'type': 'official', 'cite': '169 Vt. 426'}],
 'volume': {'volume_number': '169'},
 'reporter': {'full_name': 'Vermont Reports'},
 'court': {'id': 8945,
  'name': 'Vermont Supreme Court',
  'name_abbreviation': 'Vt.',
  'jurisdiction_url': None,
  'slug': 'vt'},
 'jurisdiction': {'id': 17,
  'slug': 'vt',
  'name': 'Vt.',
  'name_long': 'Vermont',
  'whitelisted': False},
 'casebody': {'status': 'ok',
  'data': {'attorneys': ['William, H. Sorrell, Attorney General, and William, Griffin, Chief Assistant Attorney General, Montpelier, for Plaintiff

In [24]:
cases_df['opinion_text'][0]

["Johnson, J.\nThis case concerns a dispute among the parties and their insurers over the allocation of costs for the cleanup of a series of gasoline leaks. The superior court ruled that Liberty Mutual Insurance Company (Liberty) was responsible for approximately 92% of the costs, that the Continental/Glens Falls Insurance Company (Continental) was responsible for the balance, and that Liberty’s counterclaim against the State of Vermont for contribution was without merit. Liberty appeals, contending the court erred in: (1) dismissing its counterclaim against the State; (2) making certain findings concerning the amount of petroleum released and the allocation of remediation costs among the parties; (3) ordering a setoff against Liberty’s restitution award from Continental; and (4) declining to award prejudgment interest. Continental has cross-appealed, contending the court erred in rejecting its defenses of waiver, unclean hands, and laches against Liberty’s cross-claim for restitution.