# Dependencies

In [1]:
import pandas as pd
import datetime
import statistics 
from statistics import mode 
from dateutil.relativedelta import relativedelta
from datetime import date
from pprint import pprint
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
import numpy as np
pd.set_option("max_columns", None)

# Data Import and Cleaning

In [2]:
casePath = 'SCDB1901-caseCentered.csv'
justicePath = 'SCDB1901-justiceCentered.csv'
chiefJusticesPath = 'chiefJustices.csv'
assocJusticesPath = 'assocJustices.csv'
presidentsPath = 'presidents.csv'

In [3]:
caseFile = pd.read_csv(casePath, encoding = 'latin1')
justiceFile = pd.read_csv(justicePath, encoding = 'latin1')
chiefJusticesFile = pd.read_csv(chiefJusticesPath)
assocJusticeFile = pd.read_csv(assocJusticesPath)
presidentsFile = pd.read_csv(presidentsPath)

In [4]:
caseDf = pd.DataFrame(caseFile)
justiceCDf = pd.DataFrame(justiceFile)
chiefJusticeDf = pd.DataFrame(chiefJusticesFile)
assocJusticeDf = pd.DataFrame(assocJusticeFile)
presidentsDf = pd.DataFrame(presidentsFile)

In [5]:
chiefJusticeDf

Unnamed: 0,Name,State App't From,Appointed by President
0,"Vinson, Fred Moore",Kentucky,Truman
1,"Warren, Earl",California,Eisenhower
2,"Burger, Warren Earl",Virginia,Nixon
3,"Rehnquist, William H.",Virginia,Reagan
4,"Roberts, John G., Jr.",Maryland,"Bush, G. W."


In [6]:
presidentsDf

Unnamed: 0,appointedBy,appointedParty
0,"Roosevelt, F.",Democratic
1,Truman,Democratic
2,Eisenhower,Republican
3,Kennedy,Democratic
4,Johnson,Democratic
5,Nixon,Republican
6,Ford,Republican
7,Carter,Democratic
8,Reagan,Republican
9,"Bush, G. H. W.",Republican


In [7]:
presidentsDf['appointedParty'].value_counts()

Republican    7
Democratic    7
Name: appointedParty, dtype: int64

In [8]:
chiefs = chiefJusticeDf['Name'].str.split(',', expand=True)

In [9]:
chiefJusticeDf['chief'] = chiefs[0]

In [10]:
chiefMergeDf = {'chief': chiefJusticeDf['chief'], 'appointedBy': chiefJusticeDf['Appointed by President']}
chiefMergeDf = pd.DataFrame(chiefMergeDf)

In [11]:
caseDf = caseDf.merge(chiefMergeDf, on='chief')

In [12]:
caseDf = caseDf.merge(presidentsDf, on='appointedBy', how='left')

In [13]:
caseDf

Unnamed: 0,caseId,docketId,caseIssuesId,voteId,dateDecision,decisionType,usCite,sctCite,ledCite,lexisCite,term,naturalCourt,chief,docket,caseName,dateArgument,dateRearg,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,adminActionState,threeJudgeFdc,caseOrigin,caseOriginState,caseSource,caseSourceState,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,caseDispositionUnusual,partyWinning,precedentAlteration,voteUnclear,issue,issueArea,decisionDirection,decisionDirectionDissent,authorityDecision1,authorityDecision2,lawType,lawSupp,lawMinor,majOpinWriter,majOpinAssigner,splitVote,majVotes,minVotes,appointedBy,appointedParty
0,1946-001,1946-001-01,1946-001-01-01,1946-001-01-01-01,11/18/1946,1,329 U.S. 1,67 S. Ct. 6,91 L. Ed. 3,1946 U.S. LEXIS 1724,1946,1301,Vinson,24,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,1/9/1946,10/23/1946,198,,172.0,,6,,,0.0,51.0,6.0,29.0,,0.0,11.0,2.0,1.0,1.0,3.0,0.0,1.0,1.0,0.0,80180.0,8.0,2.0,0.0,4.0,,6.0,600.0,35 U.S.C. § 33,78.0,78.0,1,8,1,Truman,Democratic
1,1946-002,1946-002-01,1946-002-01-01,1946-002-01-01-01,11/18/1946,1,329 U.S. 14,67 S. Ct. 13,91 L. Ed. 12,1946 U.S. LEXIS 1725,1946,1301,Vinson,12,CLEVELAND v. UNITED STATES,10/10/1945,10/17/1946,100,,27.0,,1,,,0.0,123.0,52.0,30.0,,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10500.0,1.0,1.0,0.0,4.0,,6.0,600.0,18 U.S.C. § 398,81.0,87.0,1,6,3,Truman,Democratic
2,1946-003,1946-003-01,1946-003-01-01,1946-003-01-01-01,11/18/1946,1,329 U.S. 29,67 S. Ct. 1,91 L. Ed. 22,1946 U.S. LEXIS 3037,1946,1301,Vinson,21,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,11/8/1945,10/18/1946,209,,27.0,,2,66.0,,1.0,107.0,42.0,107.0,42.0,0.0,1.0,,2.0,1.0,2.0,0.0,0.0,0.0,0.0,80250.0,8.0,2.0,0.0,1.0,,2.0,207.0,,84.0,78.0,1,5,4,Truman,Democratic
3,1946-004,1946-004-01,1946-004-01-01,1946-004-01-01-01,11/25/1946,7,329 U.S. 40,67 S. Ct. 167,91 L. Ed. 29,1946 U.S. LEXIS 1696,1946,1301,Vinson,26,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,1/31/1946,10/25/1946,27,,170.0,,1,67.0,,0.0,3.0,,3.0,,0.0,10.0,,2.0,1.0,2.0,0.0,0.0,0.0,0.0,20150.0,2.0,2.0,0.0,4.0,,6.0,600.0,49 Stat. 801,87.0,87.0,1,5,3,Truman,Democratic
4,1946-005,1946-005-01,1946-005-01-01,1946-005-01-01-01,11/25/1946,1,329 U.S. 64,67 S. Ct. 154,91 L. Ed. 44,1946 U.S. LEXIS 2997,1946,1301,Vinson,50,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",10/25/1946,,27,,176.0,,1,,,0.0,3.0,,3.0,,0.0,2.0,,2.0,1.0,3.0,0.0,1.0,0.0,0.0,80060.0,8.0,2.0,0.0,7.0,,,,,78.0,87.0,1,6,3,Truman,Democratic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,2018-073,2018-073-01,2018-073-01-01,2018-073-01-01-01,6/20/2019,1,,139 S. Ct. 2149,204 L. Ed. 2d 506,2019 U.S. LEXIS 4180,2018,1707,Roberts,18-485,MCDONOUGH v. SMITH,4/17/2019,,100,,19.0,37.0,1,,,0.0,95.0,,22.0,,0.0,2.0,2.0,1.0,1.0,4.0,0.0,1.0,0.0,0.0,20400.0,2.0,2.0,,4.0,,3.0,314.0,,113.0,111.0,1,6,3,"Bush, G. W.",Republican
8962,2018-074,2018-074-01,2018-074-01-01,2018-074-01-01-01,6/24/2019,1,,139 S. Ct. 2356,204 L. Ed. 2d 742,2019 U.S. LEXIS 4200,2018,1707,Roberts,18-481,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,4/22/2019,,228,,190.0,,1,,,0.0,115.0,,28.0,,0.0,12.0,2.0,2.0,1.0,4.0,0.0,1.0,0.0,0.0,50040.0,5.0,1.0,0.0,4.0,,3.0,335.0,,115.0,111.0,1,6,3,"Bush, G. W.",Republican
8963,2018-075,2018-075-01,2018-075-01-01,2018-075-01-01-01,6/10/2019,1,,139 S. Ct. 1872,204 L. Ed. 2d 200,2019 U.S. LEXIS 4027,2018,1707,Roberts,17-778,QUARLES v. UNITED STATES,4/24/2019,,126,,27.0,,1,,,0.0,82.0,,26.0,,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,10570.0,1.0,1.0,,4.0,,6.0,600.0,Armed Career Criminal Act,116.0,111.0,1,9,0,"Bush, G. W.",Republican
8964,2018-076,2018-076-01,2018-076-01-01,2018-076-01-01-01,6/3/2019,1,,139 S. Ct. 1795,204 L. Ed. 2d 129,2019 U.S. LEXIS 3890,2018,1707,Roberts,18-489,TAGGART v. LORENZEN,4/24/2019,,138,,135.0,,1,,,0.0,20.0,,29.0,,0.0,11.0,2.0,1.0,1.0,5.0,0.0,1.0,0.0,0.0,80030.0,8.0,2.0,,4.0,,3.0,307.0,,110.0,111.0,1,9,0,"Bush, G. W.",Republican


In [14]:
del caseDf['docketId']
del caseDf['caseIssuesId']
del caseDf['voteId']
del caseDf['usCite']
del caseDf['sctCite']
del caseDf['ledCite']
del caseDf['lexisCite']
del caseDf['docket']
del caseDf['threeJudgeFdc']
del caseDf['lawMinor']
del caseDf['majOpinWriter']
del caseDf['majOpinAssigner']
del caseDf['authorityDecision1']
del caseDf['authorityDecision2']

In [15]:
chief = caseDf['chief']
label_encoder = LabelEncoder()
label_encoder.fit(chief)
chiefEncoded = label_encoder.transform(chief)

In [16]:
chiefAppointedBy = caseDf['appointedBy']
label_encoder = LabelEncoder()
label_encoder.fit(chiefAppointedBy)
chiefAppointedByEncoded = label_encoder.transform(chiefAppointedBy)

In [17]:
chiefAppointedParty = caseDf['appointedParty']
label_encoder = LabelEncoder()
label_encoder.fit(chiefAppointedParty)
chiefAppointedPartyEncoded = label_encoder.transform(chiefAppointedParty)

In [18]:
caseDf['reargued'] = caseDf['dateRearg'].fillna(0)
rearguedDates =[]
for cell in caseDf['reargued']:
    if cell != 0:
        rearguedDates.append(cell)
for date in rearguedDates:
    caseDf['reargued'] = caseDf['reargued'].replace(date, 1)

In [19]:
caseDf['reargued'].value_counts()

0    8788
1     178
Name: reargued, dtype: int64

In [20]:
caseDf = caseDf.fillna(0)

In [21]:
caseDf['dateDecision'] = pd.to_datetime(caseDf['dateDecision'])
caseDf['dateArgument'] = pd.to_datetime(caseDf['dateArgument'])
caseDf['deliberation'] = caseDf['dateDecision'] - caseDf['dateArgument']
caseDf['deliberation'] = caseDf['deliberation']/np.timedelta64(1,'D')

In [22]:
terms = caseDf['term'].unique()
justiceDf = {'term': terms}
justiceArrayList = []
for term in terms:
    justiceArrayList.append(justiceCDf['justiceName'].loc[justiceCDf['term'] == term].unique())

In [23]:
justiceDf['justices'] = justiceArrayList

In [24]:
justiceDf = pd.DataFrame(justiceDf)

In [25]:
justiceDf['justices'] = justiceDf['justices'].astype('str')

In [26]:
justiceDf = justiceDf['justices'].str.split("'", expand=True)

In [27]:
del justiceDf[0]
del justiceDf[2]
del justiceDf[4]
del justiceDf[6]
del justiceDf[8]
del justiceDf[10]
del justiceDf[12]
del justiceDf[14]
del justiceDf[16]
del justiceDf[18]
del justiceDf[19]
del justiceDf[20]
del justiceDf[21]
del justiceDf[22]

In [28]:
justiceDf['term'] = terms

In [29]:
justiceDf = justiceDf.rename(columns={'term': 'term', 1: 'justice1', 3: 'justice2', 5: 'justice3', 7: 'justice4', 9: 'justice5', 11: 'justice6', 13: 'justice7', 15: 'justice8', 17: 'justice9'})


In [30]:
justiceDf = justiceDf[['term', 'justice1', 'justice2', 'justice3', 'justice4', 'justice5', 'justice6', 'justice7', 'justice8', 'justice9']]


In [31]:
justiceDf

Unnamed: 0,term,justice1,justice2,justice3,justice4,justice5,justice6,justice7,justice8,justice9
0,1946,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,WBRutledge,FMurphy,FMVinson
1,1947,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,WBRutledge,FMurphy,FMVinson
2,1948,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,WBRutledge,FMurphy,FMVinson
3,1949,FMVinson,HLBlack,SFReed,FFrankfurter,WODouglas,RHJackson,HHBurton,TCClark,SMinton
4,1950,SMinton,TCClark,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,FMVinson
...,...,...,...,...,...,...,...,...,...,...
68,2014,JGRoberts,AScalia,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan
69,2015,JGRoberts,AScalia,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan
70,2016,JGRoberts,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan,NMGorsuch
71,2017,JGRoberts,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan,NMGorsuch


In [32]:
assocJusticeDf

Unnamed: 0,Name,AppointedbyPresident
0,HLBlack,"Roosevelt, F."
1,SFReed,"Roosevelt, F."
2,FFrankfurter,"Roosevelt, F."
3,WODouglas,"Roosevelt, F."
4,FMurphy,"Roosevelt, F."
5,JFByrnes,"Roosevelt, F."
6,RHJackson,"Roosevelt, F."
7,WBRutledge,"Roosevelt, F."
8,HHBurton,Truman
9,TCClark,Truman


In [33]:
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice1', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice1AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice2', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice2AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice3', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice3AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice4', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice4AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice5', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice5AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice6', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice6AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice7', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice7AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice8', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice8AppointedBy'})
justiceDf = justiceDf.merge(assocJusticeDf, left_on='justice9', right_on='Name', how='left')
justiceDf = justiceDf.rename(columns = {'AppointedbyPresident': 'justice9AppointedBy'})

In [34]:
del justiceDf['Name']
del justiceDf['Name_x']
del justiceDf['Name_y']

In [35]:
justiceDf

Unnamed: 0,term,justice1,justice2,justice3,justice4,justice5,justice6,justice7,justice8,justice9,justice1AppointedBy,justice2AppointedBy,justice3AppointedBy,justice4AppointedBy,justice5AppointedBy,justice6AppointedBy,justice7AppointedBy,justice8AppointedBy,justice9AppointedBy
0,1946,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,WBRutledge,FMurphy,FMVinson,Truman,"Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.",Truman
1,1947,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,WBRutledge,FMurphy,FMVinson,Truman,"Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.",Truman
2,1948,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,WBRutledge,FMurphy,FMVinson,Truman,"Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.",Truman
3,1949,FMVinson,HLBlack,SFReed,FFrankfurter,WODouglas,RHJackson,HHBurton,TCClark,SMinton,Truman,"Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.",Truman,Truman,Truman
4,1950,SMinton,TCClark,HHBurton,RHJackson,WODouglas,FFrankfurter,SFReed,HLBlack,FMVinson,Truman,Truman,Truman,"Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.","Roosevelt, F.",Truman
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,2014,JGRoberts,AScalia,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan,"Bush, G. W.",Reagan,Reagan,"Bush, G. H. W.",Clinton,Clinton,"Bush, G. W.",Obama,Obama
69,2015,JGRoberts,AScalia,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan,"Bush, G. W.",Reagan,Reagan,"Bush, G. H. W.",Clinton,Clinton,"Bush, G. W.",Obama,Obama
70,2016,JGRoberts,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan,NMGorsuch,"Bush, G. W.",Reagan,"Bush, G. H. W.",Clinton,Clinton,"Bush, G. W.",Obama,Obama,Trump
71,2017,JGRoberts,AMKennedy,CThomas,RBGinsburg,SGBreyer,SAAlito,SSotomayor,EKagan,NMGorsuch,"Bush, G. W.",Reagan,"Bush, G. H. W.",Clinton,Clinton,"Bush, G. W.",Obama,Obama,Trump


In [36]:
justiceDf = justiceDf.merge(presidentsDf, left_on='justice1AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice1AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice2AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice2AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice3AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice3AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice4AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice4AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice5AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice5AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice6AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice6AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice7AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice7AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice8AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice8AppointedParty'})
justiceDf = justiceDf.merge(presidentsDf, left_on='justice9AppointedBy', right_on='appointedBy', how='left')
justiceDf = justiceDf.rename(columns = {'appointedParty': 'justice9AppointedParty'})

In [37]:
del justiceDf['appointedBy']
del justiceDf['appointedBy_x']
del justiceDf['appointedBy_y']

In [38]:
justiceDf = justiceDf[['term', 'justice1', 'justice1AppointedBy', 'justice1AppointedParty', 'justice2', 'justice2AppointedBy', 'justice2AppointedParty', 'justice3', 'justice3AppointedBy', 'justice3AppointedParty', 'justice4', 'justice4AppointedBy', 'justice4AppointedParty', 'justice5', 'justice5AppointedBy', 'justice5AppointedParty', 'justice6', 'justice6AppointedBy', 'justice6AppointedParty', 'justice7', 'justice7AppointedBy', 'justice7AppointedParty', 'justice8', 'justice8AppointedBy', 'justice8AppointedParty', 'justice9', 'justice9AppointedBy', 'justice9AppointedParty']]
justiceDf

Unnamed: 0,term,justice1,justice1AppointedBy,justice1AppointedParty,justice2,justice2AppointedBy,justice2AppointedParty,justice3,justice3AppointedBy,justice3AppointedParty,justice4,justice4AppointedBy,justice4AppointedParty,justice5,justice5AppointedBy,justice5AppointedParty,justice6,justice6AppointedBy,justice6AppointedParty,justice7,justice7AppointedBy,justice7AppointedParty,justice8,justice8AppointedBy,justice8AppointedParty,justice9,justice9AppointedBy,justice9AppointedParty
0,1946,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic
1,1947,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic
2,1948,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic
3,1949,FMVinson,Truman,Democratic,HLBlack,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,RHJackson,"Roosevelt, F.",Democratic,HHBurton,Truman,Democratic,TCClark,Truman,Democratic,SMinton,Truman,Democratic
4,1950,SMinton,Truman,Democratic,TCClark,Truman,Democratic,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,2014,JGRoberts,"Bush, G. W.",Republican,AScalia,Reagan,Republican,AMKennedy,Reagan,Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic
69,2015,JGRoberts,"Bush, G. W.",Republican,AScalia,Reagan,Republican,AMKennedy,Reagan,Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic
70,2016,JGRoberts,"Bush, G. W.",Republican,AMKennedy,Reagan,Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic,NMGorsuch,Trump,Republican
71,2017,JGRoberts,"Bush, G. W.",Republican,AMKennedy,Reagan,Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic,NMGorsuch,Trump,Republican


In [39]:
def most_common(List): 
    return(mode(List)) 

In [40]:
majorityParty = []
for x in range(len(justiceDf)):
    iterateParty = []
    iterateParty.append(justiceDf.iloc[x, 3])
    iterateParty.append(justiceDf.iloc[x, 6])
    iterateParty.append(justiceDf.iloc[x, 9])
    iterateParty.append(justiceDf.iloc[x, 12])
    iterateParty.append(justiceDf.iloc[x, 15])
    iterateParty.append(justiceDf.iloc[x, 18])
    iterateParty.append(justiceDf.iloc[x, 21])
    iterateParty.append(justiceDf.iloc[x, 24])
    iterateParty.append(justiceDf.iloc[x, 27])
    majorityParty.append(most_common(iterateParty))



In [41]:
justiceDf['majorityParty'] = majorityParty

In [42]:
del caseDf['caseId']
del caseDf['dateDecision']
del caseDf['naturalCourt']
del caseDf['dateArgument']
del caseDf['dateRearg']
del caseDf['caseSourceState']
del caseDf['adminActionState']
del caseDf['minVotes']
del caseDf['majVotes']
del caseDf['splitVote']
del caseDf['lawSupp']
del caseDf['decisionDirectionDissent']
del caseDf['voteUnclear']
del caseDf['caseDispositionUnusual']

In [43]:
preprocessDf = caseDf.merge(justiceDf, on='term', how='left')

In [44]:
preprocessDf

Unnamed: 0,decisionType,term,chief,caseName,petitioner,petitionerState,respondent,respondentState,jurisdiction,adminAction,caseOrigin,caseOriginState,caseSource,lcDisagreement,certReason,lcDisposition,lcDispositionDirection,declarationUncon,caseDisposition,partyWinning,precedentAlteration,issue,issueArea,decisionDirection,lawType,appointedBy,appointedParty,reargued,deliberation,justice1,justice1AppointedBy,justice1AppointedParty,justice2,justice2AppointedBy,justice2AppointedParty,justice3,justice3AppointedBy,justice3AppointedParty,justice4,justice4AppointedBy,justice4AppointedParty,justice5,justice5AppointedBy,justice5AppointedParty,justice6,justice6AppointedBy,justice6AppointedParty,justice7,justice7AppointedBy,justice7AppointedParty,justice8,justice8AppointedBy,justice8AppointedParty,justice9,justice9AppointedBy,justice9AppointedParty,majorityParty
0,1,1946,Vinson,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,198,0.0,172.0,0.0,6,0.0,51.0,6.0,29.0,0.0,11.0,2.0,1.0,1.0,3.0,1.0,1.0,80180.0,8.0,2.0,6.0,Truman,Democratic,1,313.0,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic,Democratic
1,1,1946,Vinson,CLEVELAND v. UNITED STATES,100,0.0,27.0,0.0,1,0.0,123.0,52.0,30.0,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,10500.0,1.0,1.0,6.0,Truman,Democratic,1,404.0,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic,Democratic
2,1,1946,Vinson,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,209,0.0,27.0,0.0,2,66.0,107.0,42.0,107.0,0.0,1.0,0.0,2.0,1.0,2.0,0.0,0.0,80250.0,8.0,2.0,2.0,Truman,Democratic,1,375.0,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic,Democratic
3,7,1946,Vinson,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,27,0.0,170.0,0.0,1,67.0,3.0,0.0,3.0,0.0,10.0,0.0,2.0,1.0,2.0,0.0,0.0,20150.0,2.0,2.0,6.0,Truman,Democratic,1,298.0,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic,Democratic
4,1,1946,Vinson,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",27,0.0,176.0,0.0,1,0.0,3.0,0.0,3.0,0.0,2.0,0.0,2.0,1.0,3.0,1.0,0.0,80060.0,8.0,2.0,0.0,Truman,Democratic,0,31.0,HHBurton,Truman,Democratic,RHJackson,"Roosevelt, F.",Democratic,WODouglas,"Roosevelt, F.",Democratic,FFrankfurter,"Roosevelt, F.",Democratic,SFReed,"Roosevelt, F.",Democratic,HLBlack,"Roosevelt, F.",Democratic,WBRutledge,"Roosevelt, F.",Democratic,FMurphy,"Roosevelt, F.",Democratic,FMVinson,Truman,Democratic,Democratic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,1,2018,Roberts,MCDONOUGH v. SMITH,100,0.0,19.0,37.0,1,0.0,95.0,0.0,22.0,0.0,2.0,2.0,1.0,1.0,4.0,1.0,0.0,20400.0,2.0,2.0,3.0,"Bush, G. W.",Republican,0,64.0,JGRoberts,"Bush, G. W.",Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic,NMGorsuch,Trump,Republican,BMKavanaugh,Trump,Republican,Republican
8962,1,2018,Roberts,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,228,0.0,190.0,0.0,1,0.0,115.0,0.0,28.0,0.0,12.0,2.0,2.0,1.0,4.0,1.0,0.0,50040.0,5.0,1.0,3.0,"Bush, G. W.",Republican,0,63.0,JGRoberts,"Bush, G. W.",Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic,NMGorsuch,Trump,Republican,BMKavanaugh,Trump,Republican,Republican
8963,1,2018,Roberts,QUARLES v. UNITED STATES,126,0.0,27.0,0.0,1,0.0,82.0,0.0,26.0,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,10570.0,1.0,1.0,6.0,"Bush, G. W.",Republican,0,47.0,JGRoberts,"Bush, G. W.",Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic,NMGorsuch,Trump,Republican,BMKavanaugh,Trump,Republican,Republican
8964,1,2018,Roberts,TAGGART v. LORENZEN,138,0.0,135.0,0.0,1,0.0,20.0,0.0,29.0,0.0,11.0,2.0,1.0,1.0,5.0,1.0,0.0,80030.0,8.0,2.0,3.0,"Bush, G. W.",Republican,0,40.0,JGRoberts,"Bush, G. W.",Republican,CThomas,"Bush, G. H. W.",Republican,RBGinsburg,Clinton,Democratic,SGBreyer,Clinton,Democratic,SAAlito,"Bush, G. W.",Republican,SSotomayor,Obama,Democratic,EKagan,Obama,Democratic,NMGorsuch,Trump,Republican,BMKavanaugh,Trump,Republican,Republican


In [45]:
majorityPartyMerged = preprocessDf['majorityParty']
label_encoder = LabelEncoder()
label_encoder.fit(majorityPartyMerged)
majorityPartyEncoded = label_encoder.transform(majorityPartyMerged)

In [46]:
majorityPartyEncoded

array([0, 0, 0, ..., 1, 1, 1])

In [47]:
processedDf = pd.DataFrame({'caseName': caseDf['caseName'], 'chief': chiefEncoded, 'chiefAppointedBy': chiefAppointedByEncoded, 'chiefAppointedParty': chiefAppointedPartyEncoded, 'majorityParty': majorityPartyEncoded, 'term': caseDf['term'], 'deliberation': caseDf['deliberation'], 'petitioner': caseDf['petitioner'], 'petitionerState': caseDf['petitionerState'], 'respondent': caseDf['respondent'], 'respondentState': caseDf['respondentState'], 'caseOrigin': caseDf['caseOrigin'], 'caseOriginState': caseDf['caseOriginState'], 'caseSource': caseDf['caseSource'], 'lcDisposition': caseDf['lcDisposition'], 'lcDispositionDirection': caseDf['lcDispositionDirection'], 'lcDisagreement': caseDf['lcDisagreement'], 'issue': caseDf['issue'], 'issueArea': caseDf['issueArea'], 'adminAction': caseDf['adminAction'], 'certReason': caseDf['certReason'], 'jurisdiction': preprocessDf['jurisdiction'], 'lawType': caseDf['lawType'], 'decisionType': caseDf['decisionType'], 'caseDisposition': caseDf['caseDisposition'], 'partyWinning': caseDf['partyWinning'], 'decisionDirection': caseDf['decisionDirection'], 'declarationUncon': caseDf['declarationUncon'], 'precedentAlteration': caseDf['precedentAlteration']})


In [48]:
processedDf

Unnamed: 0,caseName,chief,chiefAppointedBy,chiefAppointedParty,majorityParty,term,deliberation,petitioner,petitionerState,respondent,respondentState,caseOrigin,caseOriginState,caseSource,lcDisposition,lcDispositionDirection,lcDisagreement,issue,issueArea,adminAction,certReason,jurisdiction,lawType,decisionType,caseDisposition,partyWinning,decisionDirection,declarationUncon,precedentAlteration
0,HALLIBURTON OIL WELL CEMENTING CO. v. WALKER e...,3,4,0,0,1946,313.0,198,0.0,172.0,0.0,51.0,6.0,29.0,2.0,1.0,0.0,80180.0,8.0,0.0,11.0,6,6.0,1,3.0,1.0,2.0,1.0,1.0
1,CLEVELAND v. UNITED STATES,3,4,0,0,1946,404.0,100,0.0,27.0,0.0,123.0,52.0,30.0,2.0,1.0,0.0,10500.0,1.0,0.0,4.0,1,6.0,1,2.0,0.0,1.0,1.0,0.0
2,CHAMPLIN REFINING CO. v. UNITED STATES ET AL.,3,4,0,0,1946,375.0,209,0.0,27.0,0.0,107.0,42.0,107.0,0.0,2.0,0.0,80250.0,8.0,66.0,1.0,2,2.0,1,2.0,0.0,2.0,1.0,0.0
3,UNITED STATES v. ALCEA BAND OF TILLAMOOKS ET AL.,3,4,0,0,1946,298.0,27,0.0,170.0,0.0,3.0,0.0,3.0,0.0,2.0,0.0,20150.0,2.0,67.0,10.0,1,6.0,7,2.0,0.0,2.0,1.0,0.0
4,"UNITED STATES v. HOWARD P. FOLEY CO., INC.",3,4,0,0,1946,31.0,27,0.0,176.0,0.0,3.0,0.0,3.0,0.0,2.0,0.0,80060.0,8.0,0.0,2.0,1,0.0,1,3.0,1.0,2.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8961,MCDONOUGH v. SMITH,2,0,1,1,2018,64.0,100,0.0,19.0,37.0,95.0,0.0,22.0,2.0,1.0,0.0,20400.0,2.0,0.0,2.0,1,3.0,1,4.0,1.0,2.0,1.0,0.0
8962,FOOD MARKETING INSTITUTE v. ARGUS LEADER MEDIA,2,0,1,1,2018,63.0,228,0.0,190.0,0.0,115.0,0.0,28.0,2.0,2.0,0.0,50040.0,5.0,0.0,12.0,1,3.0,1,4.0,1.0,1.0,1.0,0.0
8963,QUARLES v. UNITED STATES,2,0,1,1,2018,47.0,126,0.0,27.0,0.0,82.0,0.0,26.0,2.0,1.0,0.0,10570.0,1.0,0.0,2.0,1,6.0,1,2.0,0.0,1.0,1.0,0.0
8964,TAGGART v. LORENZEN,2,0,1,1,2018,40.0,138,0.0,135.0,0.0,20.0,0.0,29.0,2.0,1.0,0.0,80030.0,8.0,0.0,11.0,1,3.0,1,5.0,1.0,2.0,1.0,0.0


In [49]:
#needs to be checked
data = processedDf.values
X = data[:, 1:23]
y = data[:, 26]
y = y.astype('float64')
features = processedDf.columns.drop(['caseName', 'decisionType', 'caseDisposition', 'partyWinning', 'declarationUncon', 'precedentAlteration'])
target = processedDf['decisionDirection']
print(X.shape, y.shape)

(8966, 22) (8966,)


In [50]:
processedDf['decisionDirection'].value_counts()

2.0    4503
1.0    4273
3.0     151
0.0      39
Name: decisionDirection, dtype: int64

In [52]:
y

array([2., 1., 2., ..., 1., 2., 1.])

In [53]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
print(f' X_train shape: {X_train.shape}')
print(f' X_test shape: {X_test.shape}')
print(f' y_train shape: {y_train.shape}')
print(f' y_test shape: {y_test.shape}')


 X_train shape: (6724, 22)
 X_test shape: (2242, 22)
 y_train shape: (6724,)
 y_test shape: (2242,)


In [54]:
X_scaler = StandardScaler().fit(X_train, )
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
print(X_test_scaled.shape)

(2242, 22)


In [55]:
print(f' Length of scaled X_train: {len(X_train_scaled)}, length of encoded y_train: {len(y_train)}')
print(f' Length of scaled X_test: {len(X_test_scaled)}, length of encoded y_test: {len(y_test)}')
print(f' Shape of scaled X_train: {X_train_scaled.shape}, shape of encoded y_train: {y_train.shape}')
print(f' Shape of scaled X_test: {X_test_scaled.shape}, shape of encoded y_test: {y_test.shape}')

 Length of scaled X_train: 6724, length of encoded y_train: 6724
 Length of scaled X_test: 2242, length of encoded y_test: 2242
 Shape of scaled X_train: (6724, 22), shape of encoded y_train: (6724,)
 Shape of scaled X_test: (2242, 22), shape of encoded y_test: (2242,)


# sklearn RandomForest

In [56]:
rf = RandomForestClassifier(
bootstrap = True,
max_depth = 15,
max_features = 3,
min_samples_leaf = 3,
min_samples_split = 10,
n_estimators = 400)
rf = rf.fit(X_train_scaled, y_train)

In [57]:
print(f"Training Data Score: {rf.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {rf.score(X_test_scaled, y_test)}")

Training Data Score: 0.8701665675193337
Testing Data Score: 0.6628010704727921


In [58]:
 sorted(zip(rf.feature_importances_, features), reverse=True)

[(0.1098222235819017, 'issue'),
 (0.09925196491971887, 'lcDispositionDirection'),
 (0.08459875165579529, 'deliberation'),
 (0.07452640128143327, 'term'),
 (0.07428268412445575, 'petitioner'),
 (0.07027600693503254, 'issueArea'),
 (0.0669402548915916, 'respondent'),
 (0.05930149303651689, 'caseOrigin'),
 (0.053529776615862834, 'caseSource'),
 (0.038284471115090736, 'certReason'),
 (0.037940551289395164, 'lawType'),
 (0.03250499217188374, 'lcDisposition'),
 (0.02951052626524996, 'chief'),
 (0.02794283485620996, 'respondentState'),
 (0.02703838151181605, 'petitionerState'),
 (0.024175219888047814, 'adminAction'),
 (0.023960943655930978, 'caseOriginState'),
 (0.02157420578067271, 'chiefAppointedBy'),
 (0.0163344689203367, 'jurisdiction'),
 (0.01266524613531927, 'lcDisagreement'),
 (0.012241230818479958, 'majorityParty'),
 (0.0032973705492581424, 'chiefAppointedParty')]

# sklearn GradientBoostingClassifier

In [59]:
gbc = GradientBoostingClassifier()
gbc.fit(X_train_scaled, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='auto',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [60]:
print(f"Training Data Score: {gbc.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {gbc.score(X_test_scaled, y_test)}")

Training Data Score: 0.7279892920880429
Testing Data Score: 0.66057091882248


In [61]:
 sorted(zip(gbc.feature_importances_, features), reverse=True)

[(0.27917004656692324, 'lcDispositionDirection'),
 (0.1870297946239951, 'issue'),
 (0.08245311229360187, 'deliberation'),
 (0.06008495824167475, 'respondent'),
 (0.04406909302354108, 'chief'),
 (0.03829376909253448, 'issueArea'),
 (0.03761634159432231, 'certReason'),
 (0.03662927920317031, 'term'),
 (0.035330752681495, 'petitioner'),
 (0.035142928018433384, 'caseSource'),
 (0.02935524382471932, 'lawType'),
 (0.02756642176471431, 'jurisdiction'),
 (0.022695728087511944, 'caseOrigin'),
 (0.017639270009432106, 'lcDisposition'),
 (0.015230196816382003, 'chiefAppointedBy'),
 (0.011438780534352381, 'lcDisagreement'),
 (0.010510586839083309, 'adminAction'),
 (0.01018768729313261, 'petitionerState'),
 (0.008407846438189885, 'respondentState'),
 (0.005738393725979818, 'majorityParty'),
 (0.005395006376216264, 'caseOriginState'),
 (1.4762950594282624e-05, 'chiefAppointedParty')]

# sklearn LogisticRegression

In [62]:
clf = LogisticRegression()
clf.fit(X_train_scaled, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [63]:
print(f"Training Data Score: {clf.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {clf.score(X_test_scaled, y_test)}")

Training Data Score: 0.6356335514574658
Testing Data Score: 0.6155218554861731


# sklearn DecisionTree

In [64]:
dtc = tree.DecisionTreeClassifier()
dtc = dtc.fit(X_train_scaled, y_train)

In [65]:
print(f"Training Data Score: {dtc.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {dtc.score(X_test_scaled, y_test)}")

Training Data Score: 0.9997025580011898
Testing Data Score: 0.6003568242640499


# Hyperparameter tuning

## RandomSearch

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 1000, num = 20)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(5, 100, num = 20)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

In [None]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [None]:
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
# Fit the random search model
rf_random.fit(X_train_scaled, y_train)

In [None]:
rf_random.best_params_

## GridSearch

In [None]:
param_grid = {
    'bootstrap': [True],
    'max_depth': [10, 11, 12, 13, 14, 15, 16, 17, 18, 19 , 20],
    'max_features': [2, 3],
    'min_samples_leaf': [1, 2, 3],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750]
}

In [None]:
rf = RandomForestClassifier()
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 5, n_jobs = -1, verbose = 2)

In [None]:
grid_search.fit(X_train_scaled, y_train)

In [None]:
grid_search.best_params_

# Future Goals

## Incorporation of justice-specific data

In [None]:
justiceDf