# Dependencies

In [1]:
import pandas as pd
import pickle
import datetime
from dateutil.relativedelta import relativedelta
from datetime import date
from pprint import pprint
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
from keras.utils import to_categorical
from matplotlib.legend_handler import HandlerLine2D
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
pd.set_option("max_columns", None)

Using TensorFlow backend.


# Data Import and Cleaning

In [2]:
justicePath = "SCDB1901-justiceCentered.csv"
casePath = 'SCDB1901-caseCentered.csv'
chiefJusticesPath = 'chiefJustices.csv'
presidentsPath = 'presidents.csv'

In [3]:
justiceFile = pd.read_csv(justicePath, encoding = 'latin1')
caseFile = pd.read_csv(casePath, encoding = 'latin1')
chiefJusticesFile = pd.read_csv(chiefJusticesPath)
presidentsFile = pd.read_csv(presidentsPath)

In [4]:
caseDf = pd.DataFrame(caseFile)
justiceDf = pd.DataFrame(justiceFile)
chiefJusticeDf = pd.DataFrame(chiefJusticesFile)
presidentsDf = pd.DataFrame(presidentsFile)

In [5]:
chiefJusticeDf

Unnamed: 0,Name,State App't From,Appointed by President
0,"Vinson, Fred Moore",Kentucky,Truman
1,"Warren, Earl",California,Eisenhower
2,"Burger, Warren Earl",Virginia,Nixon
3,"Rehnquist, William H.",Virginia,Reagan
4,"Roberts, John G., Jr.",Maryland,"Bush, G. W."


In [6]:
chiefs = chiefJusticeDf['Name'].str.split(',', expand=True)

In [7]:
chiefJusticeDf['chief'] = chiefs[0]

In [8]:
chiefMergeDf = {'chief': chiefJusticeDf['chief'], 'chiefAppointedBy': chiefJusticeDf['Appointed by President']}
chiefMergeDf = pd.DataFrame(chiefMergeDf)
chiefMergeDf

Unnamed: 0,chief,chiefAppointedBy
0,Vinson,Truman
1,Warren,Eisenhower
2,Burger,Nixon
3,Rehnquist,Reagan
4,Roberts,"Bush, G. W."


In [9]:
caseDf = caseDf.merge(chiefMergeDf, on='chief')

In [10]:
caseDf = caseDf.merge(presidentsDf, on='chiefAppointedBy', how='inner')

In [11]:
caseDf

Unnamed: 0,caseId,docketId,caseIssuesId,voteId,dateDecision,decisionType,usCite,sctCite,ledCite,lexisCite,term,naturalCourt,chief,docket,caseName,dateArgument,dateRearg,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,adminActionState,threeJudgeFdc,caseOrigin,caseOriginState,caseSource,caseSourceState,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,caseDispositionUnusual,partyWinning,precedentAlteration,voteUnclear,issue,issueArea,decisionDirection,decisionDirectionDissent,authorityDecision1,authorityDecision2,lawType,lawSupp,lawMinor,majOpinWriter,majOpinAssigner,splitVote,majVotes,minVotes,chiefAppointedBy,chiefAppointedParty
0,1946-001,1946-001-01,1946-001-01-01,1946-001-01-01-01,11/18/1946,1,329 U.S. 1,67 S. Ct. 6,91 L. Ed. 3,1946 U.S. LEXIS 1724,1946,1301,Vinson,24,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,1/9/1946,10/23/1946,198,,172.0,,6,,,0.0,51.0,6.0,29.0,,0.0,11.0,2.0,1.0,1.0,3.0,0.0,1.0,1.0,0.0,80180.0,8.0,2.0,0.0,4.0,,6.0,600.0,35 U.S.C. § 33,78.0,78.0,1,8,1,Truman,Democratic
1,1946-002,1946-002-01,1946-002-01-01,1946-002-01-01-01,11/18/1946,1,329 U.S. 14,67 S. Ct. 13,91 L. Ed. 12,1946 U.S. LEXIS 1725,1946,1301,Vinson,12,CLEVELAND v. UNITED STATES,10/10/1945,10/17/1946,100,,27.0,,1,,,0.0,123.0,52.0,30.0,,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10500.0,1.0,1.0,0.0,4.0,,6.0,600.0,18 U.S.C. § 398,81.0,87.0,1,6,3,Truman,Democratic
2,1946-003,1946-003-01,1946-003-01-01,1946-003-01-01-01,11/18/1946,1,329 U.S. 29,67 S. Ct. 1,91 L. Ed. 22,1946 U.S. LEXIS 3037,1946,1301,Vinson,21,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,11/8/1945,10/18/1946,209,,27.0,,2,66.0,,1.0,107.0,42.0,107.0,42.0,0.0,1.0,,2.0,1.0,2.0,0.0,0.0,0.0,0.0,80250.0,8.0,2.0,0.0,1.0,,2.0,207.0,,84.0,78.0,1,5,4,Truman,Democratic
3,1946-004,1946-004-01,1946-004-01-01,1946-004-01-01-01,11/25/1946,7,329 U.S. 40,67 S. Ct. 167,91 L. Ed. 29,1946 U.S. LEXIS 1696,1946,1301,Vinson,26,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,1/31/1946,10/25/1946,27,,170.0,,1,67.0,,0.0,3.0,,3.0,,0.0,10.0,,2.0,1.0,2.0,0.0,0.0,0.0,0.0,20150.0,2.0,2.0,0.0,4.0,,6.0,600.0,49 Stat. 801,87.0,87.0,1,5,3,Truman,Democratic
4,1946-005,1946-005-01,1946-005-01-01,1946-005-01-01-01,11/25/1946,1,329 U.S. 64,67 S. Ct. 154,91 L. Ed. 44,1946 U.S. LEXIS 2997,1946,1301,Vinson,50,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",10/25/1946,,27,,176.0,,1,,,0.0,3.0,,3.0,,0.0,2.0,,2.0,1.0,3.0,0.0,1.0,0.0,0.0,80060.0,8.0,2.0,0.0,7.0,,,,,78.0,87.0,1,6,3,Truman,Democratic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,2018-073,2018-073-01,2018-073-01-01,2018-073-01-01-01,6/20/2019,1,,139 S. Ct. 2149,204 L. Ed. 2d 506,2019 U.S. LEXIS 4180,2018,1707,Roberts,18-485,MCDONOUGH v. SMITH,4/17/2019,,100,,19.0,37.0,1,,,0.0,95.0,,22.0,,0.0,2.0,2.0,1.0,1.0,4.0,0.0,1.0,0.0,0.0,20400.0,2.0,2.0,,4.0,,3.0,314.0,,113.0,111.0,1,6,3,"Bush, G. W.",Republican
8962,2018-074,2018-074-01,2018-074-01-01,2018-074-01-01-01,6/24/2019,1,,139 S. Ct. 2356,204 L. Ed. 2d 742,2019 U.S. LEXIS 4200,2018,1707,Roberts,18-481,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,4/22/2019,,228,,190.0,,1,,,0.0,115.0,,28.0,,0.0,12.0,2.0,2.0,1.0,4.0,0.0,1.0,0.0,0.0,50040.0,5.0,1.0,0.0,4.0,,3.0,335.0,,115.0,111.0,1,6,3,"Bush, G. W.",Republican
8963,2018-075,2018-075-01,2018-075-01-01,2018-075-01-01-01,6/10/2019,1,,139 S. Ct. 1872,204 L. Ed. 2d 200,2019 U.S. LEXIS 4027,2018,1707,Roberts,17-778,QUARLES v. UNITED STATES,4/24/2019,,126,,27.0,,1,,,0.0,82.0,,26.0,,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10570.0,1.0,1.0,,4.0,,6.0,600.0,Armed Career Criminal Act,116.0,111.0,1,9,0,"Bush, G. W.",Republican
8964,2018-076,2018-076-01,2018-076-01-01,2018-076-01-01-01,6/3/2019,1,,139 S. Ct. 1795,204 L. Ed. 2d 129,2019 U.S. LEXIS 3890,2018,1707,Roberts,18-489,TAGGART v. LORENZEN,4/24/2019,,138,,135.0,,1,,,0.0,20.0,,29.0,,0.0,11.0,2.0,1.0,1.0,5.0,0.0,1.0,0.0,0.0,80030.0,8.0,2.0,,4.0,,3.0,307.0,,110.0,111.0,1,9,0,"Bush, G. W.",Republican


In [None]:
del justiceDf['docketId']
del justiceDf['caseIssuesId']
del justiceDf['voteId']
del justiceDf['usCite']
del justiceDf['sctCite']
del justiceDf['ledCite']
del justiceDf['lexisCite']
del justiceDf['docket']
del justiceDf['threeJudgeFdc']
del justiceDf['lawMinor']
del justiceDf['majOpinWriter']
del justiceDf['majOpinAssigner']
del justiceDf['authorityDecision1']
del justiceDf['authorityDecision2']

In [None]:
justiceDf

In [None]:
groupDf = justiceDf.groupby(['caseId', 'justiceName'])
groupDf = pd.DataFrame(groupDf)
del groupDf[1]

In [None]:
new = groupDf[0].astype('str').str.split(',', expand=True)
new1 = new[0].str.strip('()')
new1 = pd.DataFrame(new1)
new1 = new1[0].str.strip("''")
new2 = new[1].str.strip('()')
new2 = pd.DataFrame(new2)
new2 = new2[1].str.strip("'")
new2 = pd.DataFrame(new2)
new2 = new2[1].str.replace("'", '')
new2

In [None]:
justiceWithCase = {'caseID': new1, 'justice': new2}
justiceWithCase = pd.DataFrame(justiceWithCase)

In [None]:
cases = justiceWithCase['caseID'].unique()

In [None]:
cases

In [None]:
justices = justiceWithCase['justice']

In [None]:
expDf = {'caseId': cases}
expDf = pd.DataFrame(expDf)
expDf['justice1'] = ''
expDf['justice1AppointedBy'] = ''
expDf['justice1AppointedParty'] = ''
expDf['justice2'] = ''
expDf['justice2AppointedBy'] = ''
expDf['justice2AppointedParty'] = ''
expDf['justice3'] = ''
expDf['justice3AppointedBy'] = ''
expDf['justice3AppointedParty'] = ''
expDf['justice4'] = ''
expDf['justice4AppointedBy'] = ''
expDf['justice4AppointedParty'] = ''
expDf['justice5'] = ''
expDf['justice5AppointedBy'] = ''
expDf['justice5AppointedParty'] = ''
expDf['justice6'] = ''
expDf['justice6AppointedBy'] = ''
expDf['justice6AppointedParty'] = ''
expDf['justice7'] = ''
expDf['justice7AppointedBy'] = ''
expDf['justice7AppointedParty'] = ''
expDf['justice8'] = ''
expDf['justice8AppointedBy'] = ''
expDf['justice8AppointedParty'] = ''
expDf['justice9'] = ''
expDf['justice9AppointedBy'] = ''
expDf['justice9AppointedParty'] = ''
expDf

In [13]:
del caseDf['docketId']
del caseDf['caseIssuesId']
del caseDf['voteId']
del caseDf['usCite']
del caseDf['sctCite']
del caseDf['ledCite']
del caseDf['lexisCite']
del caseDf['docket']
del caseDf['threeJudgeFdc']
del caseDf['lawMinor']
del caseDf['majOpinWriter']
del caseDf['majOpinAssigner']
del caseDf['authorityDecision1']
del caseDf['authorityDecision2']

In [14]:
caseDf

Unnamed: 0,caseId,dateDecision,decisionType,term,naturalCourt,chief,caseName,dateArgument,dateRearg,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,adminActionState,caseOrigin,caseOriginState,caseSource,caseSourceState,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,caseDispositionUnusual,partyWinning,precedentAlteration,voteUnclear,issue,issueArea,decisionDirection,decisionDirectionDissent,lawType,lawSupp,splitVote,majVotes,minVotes,chiefAppointedBy,chiefAppointedParty
0,1946-001,11/18/1946,1,1946,1301,Vinson,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,1/9/1946,10/23/1946,198,,172.0,,6,,,51.0,6.0,29.0,,0.0,11.0,2.0,1.0,1.0,3.0,0.0,1.0,1.0,0.0,80180.0,8.0,2.0,0.0,6.0,600.0,1,8,1,Truman,Democratic
1,1946-002,11/18/1946,1,1946,1301,Vinson,CLEVELAND v. UNITED STATES,10/10/1945,10/17/1946,100,,27.0,,1,,,123.0,52.0,30.0,,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10500.0,1.0,1.0,0.0,6.0,600.0,1,6,3,Truman,Democratic
2,1946-003,11/18/1946,1,1946,1301,Vinson,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,11/8/1945,10/18/1946,209,,27.0,,2,66.0,,107.0,42.0,107.0,42.0,0.0,1.0,,2.0,1.0,2.0,0.0,0.0,0.0,0.0,80250.0,8.0,2.0,0.0,2.0,207.0,1,5,4,Truman,Democratic
3,1946-004,11/25/1946,7,1946,1301,Vinson,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,1/31/1946,10/25/1946,27,,170.0,,1,67.0,,3.0,,3.0,,0.0,10.0,,2.0,1.0,2.0,0.0,0.0,0.0,0.0,20150.0,2.0,2.0,0.0,6.0,600.0,1,5,3,Truman,Democratic
4,1946-005,11/25/1946,1,1946,1301,Vinson,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",10/25/1946,,27,,176.0,,1,,,3.0,,3.0,,0.0,2.0,,2.0,1.0,3.0,0.0,1.0,0.0,0.0,80060.0,8.0,2.0,0.0,,,1,6,3,Truman,Democratic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,2018-073,6/20/2019,1,2018,1707,Roberts,MCDONOUGH v. SMITH,4/17/2019,,100,,19.0,37.0,1,,,95.0,,22.0,,0.0,2.0,2.0,1.0,1.0,4.0,0.0,1.0,0.0,0.0,20400.0,2.0,2.0,,3.0,314.0,1,6,3,"Bush, G. W.",Republican
8962,2018-074,6/24/2019,1,2018,1707,Roberts,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,4/22/2019,,228,,190.0,,1,,,115.0,,28.0,,0.0,12.0,2.0,2.0,1.0,4.0,0.0,1.0,0.0,0.0,50040.0,5.0,1.0,0.0,3.0,335.0,1,6,3,"Bush, G. W.",Republican
8963,2018-075,6/10/2019,1,2018,1707,Roberts,QUARLES v. UNITED STATES,4/24/2019,,126,,27.0,,1,,,82.0,,26.0,,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10570.0,1.0,1.0,,6.0,600.0,1,9,0,"Bush, G. W.",Republican
8964,2018-076,6/3/2019,1,2018,1707,Roberts,TAGGART v. LORENZEN,4/24/2019,,138,,135.0,,1,,,20.0,,29.0,,0.0,11.0,2.0,1.0,1.0,5.0,0.0,1.0,0.0,0.0,80030.0,8.0,2.0,,3.0,307.0,1,9,0,"Bush, G. W.",Republican


In [15]:
chief = caseDf['chief']
label_encoder = LabelEncoder()
label_encoder.fit(chief)
chiefEncoded = label_encoder.transform(chief)

In [16]:
chiefEncoded

array([3, 3, 3, ..., 2, 2, 2])

In [17]:
chiefAppointedBy = caseDf['chiefAppointedBy']
label_encoder = LabelEncoder()
label_encoder.fit(chiefAppointedBy)
chiefAppointedByEncoded = label_encoder.transform(chiefAppointedBy)

In [18]:
chiefAppointedByEncoded

array([4, 4, 4, ..., 0, 0, 0])

In [19]:
caseDf['chiefAppointedParty'].value_counts()

Republican    8154
Democratic     812
Name: chiefAppointedParty, dtype: int64

In [20]:
chiefAppointedParty = caseDf['chiefAppointedParty']
label_encoder = LabelEncoder()
label_encoder.fit(chiefAppointedParty)
chiefAppointedPartyEncoded = label_encoder.transform(chiefAppointedParty)

In [21]:
chiefAppointedPartyEncoded

array([0, 0, 0, ..., 1, 1, 1])

In [22]:
caseDf['reargued'] = caseDf['dateRearg'].fillna(0)
rearguedDates =[]
for cell in caseDf['reargued']:
    if cell != 0:
        rearguedDates.append(cell)
for date in rearguedDates:
    caseDf['reargued'] = caseDf['reargued'].replace(date, 1)

In [23]:
caseDf['reargued'].value_counts()

0    8788
1     178
Name: reargued, dtype: int64

In [24]:
caseDf = caseDf.fillna(0)

In [25]:
caseDf

Unnamed: 0,caseId,dateDecision,decisionType,term,naturalCourt,chief,caseName,dateArgument,dateRearg,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,adminActionState,caseOrigin,caseOriginState,caseSource,caseSourceState,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,caseDispositionUnusual,partyWinning,precedentAlteration,voteUnclear,issue,issueArea,decisionDirection,decisionDirectionDissent,lawType,lawSupp,splitVote,majVotes,minVotes,chiefAppointedBy,chiefAppointedParty,reargued
0,1946-001,11/18/1946,1,1946,1301,Vinson,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,1/9/1946,10/23/1946,198,0.0,172.0,0.0,6,0.0,0.0,51.0,6.0,29.0,0.0,0.0,11.0,2.0,1.0,1.0,3.0,0.0,1.0,1.0,0.0,80180.0,8.0,2.0,0.0,6.0,600.0,1,8,1,Truman,Democratic,1
1,1946-002,11/18/1946,1,1946,1301,Vinson,CLEVELAND v. UNITED STATES,10/10/1945,10/17/1946,100,0.0,27.0,0.0,1,0.0,0.0,123.0,52.0,30.0,0.0,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10500.0,1.0,1.0,0.0,6.0,600.0,1,6,3,Truman,Democratic,1
2,1946-003,11/18/1946,1,1946,1301,Vinson,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,11/8/1945,10/18/1946,209,0.0,27.0,0.0,2,66.0,0.0,107.0,42.0,107.0,42.0,0.0,1.0,0.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,80250.0,8.0,2.0,0.0,2.0,207.0,1,5,4,Truman,Democratic,1
3,1946-004,11/25/1946,7,1946,1301,Vinson,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,1/31/1946,10/25/1946,27,0.0,170.0,0.0,1,67.0,0.0,3.0,0.0,3.0,0.0,0.0,10.0,0.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,20150.0,2.0,2.0,0.0,6.0,600.0,1,5,3,Truman,Democratic,1
4,1946-005,11/25/1946,1,1946,1301,Vinson,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",10/25/1946,0,27,0.0,176.0,0.0,1,0.0,0.0,3.0,0.0,3.0,0.0,0.0,2.0,0.0,2.0,1.0,3.0,0.0,1.0,0.0,0.0,80060.0,8.0,2.0,0.0,0.0,0.0,1,6,3,Truman,Democratic,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,2018-073,6/20/2019,1,2018,1707,Roberts,MCDONOUGH v. SMITH,4/17/2019,0,100,0.0,19.0,37.0,1,0.0,0.0,95.0,0.0,22.0,0.0,0.0,2.0,2.0,1.0,1.0,4.0,0.0,1.0,0.0,0.0,20400.0,2.0,2.0,0.0,3.0,314.0,1,6,3,"Bush, G. W.",Republican,0
8962,2018-074,6/24/2019,1,2018,1707,Roberts,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,4/22/2019,0,228,0.0,190.0,0.0,1,0.0,0.0,115.0,0.0,28.0,0.0,0.0,12.0,2.0,2.0,1.0,4.0,0.0,1.0,0.0,0.0,50040.0,5.0,1.0,0.0,3.0,335.0,1,6,3,"Bush, G. W.",Republican,0
8963,2018-075,6/10/2019,1,2018,1707,Roberts,QUARLES v. UNITED STATES,4/24/2019,0,126,0.0,27.0,0.0,1,0.0,0.0,82.0,0.0,26.0,0.0,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10570.0,1.0,1.0,0.0,6.0,600.0,1,9,0,"Bush, G. W.",Republican,0
8964,2018-076,6/3/2019,1,2018,1707,Roberts,TAGGART v. LORENZEN,4/24/2019,0,138,0.0,135.0,0.0,1,0.0,0.0,20.0,0.0,29.0,0.0,0.0,11.0,2.0,1.0,1.0,5.0,0.0,1.0,0.0,0.0,80030.0,8.0,2.0,0.0,3.0,307.0,1,9,0,"Bush, G. W.",Republican,0


In [26]:
caseDf['dateDecision'] = pd.to_datetime(caseDf['dateDecision'])
caseDf['dateArgument'] = pd.to_datetime(caseDf['dateArgument'])
caseDf['deliberation'] = caseDf['dateDecision'] - caseDf['dateArgument']
caseDf['deliberation'] = caseDf['deliberation']/np.timedelta64(1,'D')

In [27]:
caseDf

Unnamed: 0,caseId,dateDecision,decisionType,term,naturalCourt,chief,caseName,dateArgument,dateRearg,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,adminActionState,caseOrigin,caseOriginState,caseSource,caseSourceState,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,caseDispositionUnusual,partyWinning,precedentAlteration,voteUnclear,issue,issueArea,decisionDirection,decisionDirectionDissent,lawType,lawSupp,splitVote,majVotes,minVotes,chiefAppointedBy,chiefAppointedParty,reargued,deliberation
0,1946-001,1946-11-18,1,1946,1301,Vinson,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,1946-01-09,10/23/1946,198,0.0,172.0,0.0,6,0.0,0.0,51.0,6.0,29.0,0.0,0.0,11.0,2.0,1.0,1.0,3.0,0.0,1.0,1.0,0.0,80180.0,8.0,2.0,0.0,6.0,600.0,1,8,1,Truman,Democratic,1,313.0
1,1946-002,1946-11-18,1,1946,1301,Vinson,CLEVELAND v. UNITED STATES,1945-10-10,10/17/1946,100,0.0,27.0,0.0,1,0.0,0.0,123.0,52.0,30.0,0.0,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10500.0,1.0,1.0,0.0,6.0,600.0,1,6,3,Truman,Democratic,1,404.0
2,1946-003,1946-11-18,1,1946,1301,Vinson,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,1945-11-08,10/18/1946,209,0.0,27.0,0.0,2,66.0,0.0,107.0,42.0,107.0,42.0,0.0,1.0,0.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,80250.0,8.0,2.0,0.0,2.0,207.0,1,5,4,Truman,Democratic,1,375.0
3,1946-004,1946-11-25,7,1946,1301,Vinson,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,1946-01-31,10/25/1946,27,0.0,170.0,0.0,1,67.0,0.0,3.0,0.0,3.0,0.0,0.0,10.0,0.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,20150.0,2.0,2.0,0.0,6.0,600.0,1,5,3,Truman,Democratic,1,298.0
4,1946-005,1946-11-25,1,1946,1301,Vinson,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",1946-10-25,0,27,0.0,176.0,0.0,1,0.0,0.0,3.0,0.0,3.0,0.0,0.0,2.0,0.0,2.0,1.0,3.0,0.0,1.0,0.0,0.0,80060.0,8.0,2.0,0.0,0.0,0.0,1,6,3,Truman,Democratic,0,31.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,2018-073,2019-06-20,1,2018,1707,Roberts,MCDONOUGH v. SMITH,2019-04-17,0,100,0.0,19.0,37.0,1,0.0,0.0,95.0,0.0,22.0,0.0,0.0,2.0,2.0,1.0,1.0,4.0,0.0,1.0,0.0,0.0,20400.0,2.0,2.0,0.0,3.0,314.0,1,6,3,"Bush, G. W.",Republican,0,64.0
8962,2018-074,2019-06-24,1,2018,1707,Roberts,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,2019-04-22,0,228,0.0,190.0,0.0,1,0.0,0.0,115.0,0.0,28.0,0.0,0.0,12.0,2.0,2.0,1.0,4.0,0.0,1.0,0.0,0.0,50040.0,5.0,1.0,0.0,3.0,335.0,1,6,3,"Bush, G. W.",Republican,0,63.0
8963,2018-075,2019-06-10,1,2018,1707,Roberts,QUARLES v. UNITED STATES,2019-04-24,0,126,0.0,27.0,0.0,1,0.0,0.0,82.0,0.0,26.0,0.0,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10570.0,1.0,1.0,0.0,6.0,600.0,1,9,0,"Bush, G. W.",Republican,0,47.0
8964,2018-076,2019-06-03,1,2018,1707,Roberts,TAGGART v. LORENZEN,2019-04-24,0,138,0.0,135.0,0.0,1,0.0,0.0,20.0,0.0,29.0,0.0,0.0,11.0,2.0,1.0,1.0,5.0,0.0,1.0,0.0,0.0,80030.0,8.0,2.0,0.0,3.0,307.0,1,9,0,"Bush, G. W.",Republican,0,40.0


In [30]:
preprocessDf = pd.DataFrame({'caseName': caseDf['caseName'], 'chief': chiefEncoded, 'chiefAppointedBy': chiefAppointedByEncoded, 'chiefAppointedParty': chiefAppointedPartyEncoded, 'term': caseDf['term'], 'deliberation': caseDf['deliberation'], 'petitioner': caseDf['petitioner'], 'petitionerState': caseDf['petitionerState'], 'respondent': caseDf['respondent'], 'respondentState': caseDf['respondentState'], 'caseOrigin': caseDf['caseOrigin'], 'caseOriginState': caseDf['caseOriginState'], 'caseSource': caseDf['caseSource'], 'lcDisposition': caseDf['lcDisposition'], 'lcDispositionDirection': caseDf['lcDispositionDirection'], 'lcDisagreement': caseDf['lcDisagreement'], 'issue': caseDf['issue'], 'issueArea': caseDf['issueArea'], 'adminAction': caseDf['adminAction'], 'certReason': caseDf['certReason'], 'lawType': caseDf['lawType'], 'decisionType': caseDf['decisionType'], 'caseDisposition': caseDf['caseDisposition'], 'partyWinning': caseDf['partyWinning'], 'decisionDirection': caseDf['decisionDirection'], 'declarationUncon': caseDf['declarationUncon'], 'precedentAlteration': caseDf['precedentAlteration']})


In [31]:
preprocessDf

Unnamed: 0,caseName,chief,chiefAppointedBy,chiefAppointedParty,term,deliberation,petitioner,petitionerState,respondent,respondentState,caseOrigin,caseOriginState,caseSource,lcDisposition,lcDispositionDirection,lcDisagreement,issue,issueArea,adminAction,certReason,lawType,decisionType,caseDisposition,partyWinning,decisionDirection,declarationUncon,precedentAlteration
0,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,3,4,0,1946,313.0,198,0.0,172.0,0.0,51.0,6.0,29.0,2.0,1.0,0.0,80180.0,8.0,0.0,11.0,6.0,1,3.0,1.0,2.0,1.0,1.0
1,CLEVELAND v. UNITED STATES,3,4,0,1946,404.0,100,0.0,27.0,0.0,123.0,52.0,30.0,2.0,1.0,0.0,10500.0,1.0,0.0,4.0,6.0,1,2.0,0.0,1.0,1.0,0.0
2,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,3,4,0,1946,375.0,209,0.0,27.0,0.0,107.0,42.0,107.0,0.0,2.0,0.0,80250.0,8.0,66.0,1.0,2.0,1,2.0,0.0,2.0,1.0,0.0
3,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,3,4,0,1946,298.0,27,0.0,170.0,0.0,3.0,0.0,3.0,0.0,2.0,0.0,20150.0,2.0,67.0,10.0,6.0,7,2.0,0.0,2.0,1.0,0.0
4,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",3,4,0,1946,31.0,27,0.0,176.0,0.0,3.0,0.0,3.0,0.0,2.0,0.0,80060.0,8.0,0.0,2.0,0.0,1,3.0,1.0,2.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,MCDONOUGH v. SMITH,2,0,1,2018,64.0,100,0.0,19.0,37.0,95.0,0.0,22.0,2.0,1.0,0.0,20400.0,2.0,0.0,2.0,3.0,1,4.0,1.0,2.0,1.0,0.0
8962,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,2,0,1,2018,63.0,228,0.0,190.0,0.0,115.0,0.0,28.0,2.0,2.0,0.0,50040.0,5.0,0.0,12.0,3.0,1,4.0,1.0,1.0,1.0,0.0
8963,QUARLES v. UNITED STATES,2,0,1,2018,47.0,126,0.0,27.0,0.0,82.0,0.0,26.0,2.0,1.0,0.0,10570.0,1.0,0.0,2.0,6.0,1,2.0,0.0,1.0,1.0,0.0
8964,TAGGART v. LORENZEN,2,0,1,2018,40.0,138,0.0,135.0,0.0,20.0,0.0,29.0,2.0,1.0,0.0,80030.0,8.0,0.0,11.0,3.0,1,5.0,1.0,2.0,1.0,0.0


In [35]:
data = preprocessDf.values
X = data[:, 1:21]
y = data[:, 24]
y = y.astype('float64')
features = preprocessDf.columns.drop(['caseName', 'decisionType', 'caseDisposition', 'partyWinning', 'declarationUncon', 'precedentAlteration'])
target = preprocessDf['decisionDirection']
print(X.shape, y.shape)

(8966, 20) (8966,)


In [47]:
preprocessDf['decisionDirection'].value_counts()

2.0    4503
1.0    4273
3.0     151
0.0      39
Name: decisionDirection, dtype: int64

In [36]:
target

0       2.0
1       1.0
2       2.0
3       2.0
4       2.0
       ... 
8961    2.0
8962    1.0
8963    1.0
8964    2.0
8965    1.0
Name: decisionDirection, Length: 8966, dtype: float64

In [37]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
print(f' X_train shape: {X_train.shape}')
print(f' X_test shape: {X_test.shape}')
print(f' y_train shape: {y_train.shape}')
print(f' y_test shape: {y_test.shape}')


 X_train shape: (6724, 20)
 X_test shape: (2242, 20)
 y_train shape: (6724,)
 y_test shape: (2242,)


In [38]:
X_scaler = StandardScaler().fit(X_train, )
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
print(X_test_scaled.shape)

(2242, 20)


In [39]:
print(f' Length of scaled X_train: {len(X_train_scaled)}, length of encoded y_train: {len(y_train)}')
print(f' Length of scaled X_test: {len(X_test_scaled)}, length of encoded y_test: {len(y_test)}')
print(f' Shape of scaled X_train: {X_train_scaled.shape}, shape of encoded y_train: {y_train.shape}')
print(f' Shape of scaled X_test: {X_test_scaled.shape}, shape of encoded y_test: {y_test.shape}')

 Length of scaled X_train: 6724, length of encoded y_train: 6724
 Length of scaled X_test: 2242, length of encoded y_test: 2242
 Shape of scaled X_train: (6724, 20), shape of encoded y_train: (6724,)
 Shape of scaled X_test: (2242, 20), shape of encoded y_test: (2242,)


# sklearn LogisticRegression

In [40]:
clf = LogisticRegression()
clf.fit(X_train_scaled, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [41]:
print(f"Training Data Score: {clf.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {clf.score(X_test_scaled, y_test)}")

Training Data Score: 0.6250743604997026
Testing Data Score: 0.6115075825156111


# sklearn DecisionTree

In [42]:
dtc = tree.DecisionTreeClassifier()
dtc = dtc.fit(X_train_scaled, y_train)

In [43]:
print(f"Training Data Score: {dtc.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {dtc.score(X_test_scaled, y_test)}")

Training Data Score: 0.9997025580011898
Testing Data Score: 0.6008028545941124


# sklearn RandomForest

In [44]:
rf = RandomForestClassifier(
bootstrap = True,
max_depth = 15,
max_features = 3,
min_samples_leaf = 3,
min_samples_split = 10,
n_estimators = 250)
rf = rf.fit(X_train_scaled, y_train)

In [45]:
print(f"Training Data Score: {rf.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {rf.score(X_test_scaled, y_test)}")

Training Data Score: 0.872992266508031
Testing Data Score: 0.6619090098126673


In [46]:
 sorted(zip(rf.feature_importances_, features), reverse=True)

[(0.11411095571603753, 'issue'),
 (0.09935687143851345, 'lcDispositionDirection'),
 (0.08869713400087166, 'deliberation'),
 (0.07519951010258494, 'term'),
 (0.07512968874691492, 'petitioner'),
 (0.06952209823645328, 'respondent'),
 (0.0685334845684894, 'issueArea'),
 (0.06071996618980234, 'caseOrigin'),
 (0.05758014909467415, 'caseSource'),
 (0.04207860939327919, 'certReason'),
 (0.0374078243373101, 'lawType'),
 (0.0373380644256645, 'chief'),
 (0.03296399605229431, 'lcDisposition'),
 (0.02873374735053662, 'respondentState'),
 (0.027032284327799968, 'petitionerState'),
 (0.024064647115635454, 'adminAction'),
 (0.02359130326314301, 'caseOriginState'),
 (0.02248047823676489, 'chiefAppointedBy'),
 (0.012389369049234811, 'lcDisagreement'),
 (0.003069818353995429, 'chiefAppointedParty')]

# Hyperparameter tuning

In [None]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 1000, num = 20)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(5, 100, num = 20)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]

In [None]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [None]:
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
# Fit the random search model
rf_random.fit(X_train_scaled, y_train)

In [None]:
rf_random.best_params_

In [None]:
param_grid = {
    'bootstrap': [True],
    'max_depth': [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 , 20],
    'max_features': [2, 3],
    'min_samples_leaf': [1, 2, 3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 1000]
}

In [None]:
rf = RandomForestClassifier()
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 5, n_jobs = -1, verbose = 2)

In [None]:
grid_search.fit(X_train_scaled, y_train)

In [None]:
grid_search.best_params_