Writing a parser to access presidential pardon statistics published here by the Department of Justice here: https://www.justice.gov/pardon/clemency-statistics. I'm interested in how Trump's early pardons have diverged from recent presidents' clemency patterns. Thanks to https://medium.com/@ageitgey/quick-tip-the-easiest-way-to-grab-data-out-of-a-web-page-in-python-7153cecfca58 for the help.

In [79]:
# import libraries
import pandas as pd

In [80]:
# get all tables
tables = pd.read_html("https://www.justice.gov/pardon/clemency-statistics")

In [81]:
# CARTER
carter = tables[14]
carter = carter[2:] # take out first two rows with dirty column names
carter = carter[:5] # take out totals -- we'll re-calculate later
print(carter)

                0    1    2    3    4    5   6  7    8    9    10     11
2  1977 (8.5 mos.)  368  106  292  271    0   1  0    1    0  118   49.0
3             1978  541  327  379  262  162   3  0  307  409   74   46.0
4             1979  377  131  436  274  143  10  0  138  123   55  132.0
5             1980  477  140  355  168  155   8  3  150  106  161   81.0
6  1981 (3.5 mos.)  366  108  119   71   74   7  0   42   35   11   18.0


In [82]:
# add clean column names
carter.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
print(carter)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1977 (8.5 mos.)      368      106       292       271        0        1   
3             1978      541      327       379       262      162        3   
4             1979      377      131       436       274      143       10   
5             1980      477      140       355       168      155        8   
6  1981 (3.5 mos.)      366      108       119        71       74        7   

  grantedR deniedP deniedC closedP  closedC  
2        0       1       0     118     49.0  
3        0     307     409      74     46.0  
4        0     138     123      55    132.0  
5        3     150     106     161     81.0  
6        0      42      35      11     18.0  


In [83]:
# add president name so we can merge all tables together
carter['president'] = 'carter'
print(carter)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1977 (8.5 mos.)      368      106       292       271        0        1   
3             1978      541      327       379       262      162        3   
4             1979      377      131       436       274      143       10   
5             1980      477      140       355       168      155        8   
6  1981 (3.5 mos.)      366      108       119        71       74        7   

  grantedR deniedP deniedC closedP  closedC president  
2        0       1       0     118     49.0    carter  
3        0     307     409      74     46.0    carter  
4        0     138     123      55    132.0    carter  
5        3     150     106     161     81.0    carter  
6        0      42      35      11     18.0    carter  


In [87]:
# REAGAN
reagan = tables[15]
reagan = reagan[2:] # take out first two rows with dirty column names
reagan = reagan[:9] # take out totals -- we'll re-calculate later
reagan.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
reagan['president'] = 'reagan'
print(reagan)

               year pendingP pendingC receivedP receivedC grantedP grantedC  \
2   1981 (8.5 mos.)      358      119       220       137        2        0   
3              1982      510      169       283       179       83        3   
4              1983      371      137       298       149       91        2   
5              1984      409      147       289       158       37        5   
6              1985      467      168       256       151       32        3   
7              1986      540      188       222       140       55        0   
8              1987      548      197       227       183       23        0   
9              1988      588      236       236       148       38        0   
10  1989 (3.5 mos.)      513      160        68        60       32        0   

   grantedR deniedP deniedC closedP  closedC president  
2         0       0       0      66     87.0    reagan  
3         0     258     123      81     85.0    reagan  
4         0      74      33      96   

In [88]:
# H.W. BUSH
hwbush = tables[16]
hwbush = hwbush[2:] # take out first two rows with dirty column names
hwbush = hwbush[:5] # take out totals -- we'll re-calculate later
hwbush.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
hwbush['president'] = 'hwbush'
print(hwbush)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1989 (8.5 mos.)      488      186       115       130        9        1   
3             1990      432      184       206       148        0        0   
4             1991      485      196       172       146       29        0   
5             1992      180      109       174       205        0        0   
6  1993 (3.5 mos.)      269      207        64       106       36        2   

  grantedR deniedP deniedC closedP  closedC president  
2        0     122      22      41    112.0    hwbush  
3        0      94      22      59    114.0    hwbush  
4        0     390     198      62     31.0    hwbush  
5        0      45      76      40     31.0    hwbush  
6        0      25     111      18      8.0    hwbush  


In [91]:
# CLINTON
clinton = tables[17]
clinton = clinton[2:9] # take out first two rows with dirty column names
clinton = clinton[:9] # take out totals -- we'll re-calculate later
clinton.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
clinton['president'] = 'clinton'
print(clinton)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1993 (8.5 mos.)      260      192       172       526        0        0   
3             1994      392      656       228       580        0        0   
4             1995      371      700       209       403       53        3   
5             1996      330      709       204       308        0        0   
6             1997      438      736       209       476        0        0   
7             1998      540      764       201       407       21        0   
8             1999      628      884       261       748       34       12   

  grantedR deniedP deniedC closedP  closedC president  
2        0       1       2      33     53.0   clinton  
3        0     175     400      74    136.0   clinton  
4        0     158     258      39    133.0   clinton  
5        0      72     139      23    137.0   clinton  
6        0      69     325      38    123.0   clinton  
7        0      54     126      37    1

In [92]:
# W. BUSH
bush = tables[18]
bush = bush[2:] # take out first two rows with dirty column names
bush = bush[:9] # take out totals -- we'll re-calculate later
bush.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
bush['president'] = 'bush'
print(bush)

               year pendingP pendingC receivedP receivedC grantedP grantedC  \
2   2001 (8.5 mos.)      923     2063       110       548        0        0   
3              2002      988     2332       152      1096        0        0   
4              2003      565     1715       172       851        7        0   
5              2004      659     1588       235       951       12        2   
6              2005      733     1728       252       807       39        0   
7              2006      822     1829       254       759       39        0   
8              2007      729     1495       334       925       16        2   
9              2008      972     2083       555      1770       44        2   
10  2009 (3.5 mos.)      864     1309       434       869       32        5   

   grantedR deniedP deniedC closedP  closedC president  
2         0       0       1      45    277.0      bush  
3         0     519    1466      53    240.0      bush  
4         0      51     819      21   

In [93]:
# OBAMA
obama = tables[19]
obama = obama[2:] # take out first two rows with dirty column names
obama = obama[:9] # take out totals -- we'll re-calculate later
obama.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
obama['president'] = 'obama'
print(obama)

               year pendingP pendingC receivedP receivedC grantedP grantedC  \
2   2009 (8.5 mos.)     1040      903       232      1086        0        0   
3              2010     1140     1869       262      1902        0        0   
4              2011     1285     3431       331      1585       17        0   
5              2012      643     1523       383      1547        5        1   
6              2013      826     2232       303      2370       17        0   
7              2014      754     2785       273      6561       13        9   
8              2015      824     7889       294      2999       12       79   
9              2016      958     9115       997     11028        6      583   
10  2017 (3.5 mos.)     1920    11355       320      4071      142     1043   

   grantedR deniedP deniedC closedP  closedC president  
2         0       0       0     132    120.0     obama  
3         0       0       0     116    340.0     obama  
4         0     872    3104      84   

In [102]:
# TRUMP
trump = tables[20]
trump = trump[2:] # take out first two rows with dirty column names
trump = trump[:2] # take out totals -- we'll re-calculate later
trump = trump.drop([3,4], axis=1) # drop extra columns in only trump's table
trump.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
trump['president'] = 'trump'
print(trump)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  2017 (8.5 mos.)     2010     9361       422      1729        1        0   
3    2018 (9 mos.)     2271     8932       253       993        6        4   

  grantedR deniedP deniedC closedP  closedC president  
2        0       0       0     161   2158.0     trump  
3        0      82      98     294    847.0     trump  


In [103]:
pardons = pd.concat([carter,reagan,hwbush,clinton,bush,obama,trump])
print(pardons)

               year pendingP pendingC receivedP receivedC grantedP grantedC  \
2   1977 (8.5 mos.)      368      106       292       271        0        1   
3              1978      541      327       379       262      162        3   
4              1979      377      131       436       274      143       10   
5              1980      477      140       355       168      155        8   
6   1981 (3.5 mos.)      366      108       119        71       74        7   
2   1981 (8.5 mos.)      358      119       220       137        2        0   
3              1982      510      169       283       179       83        3   
4              1983      371      137       298       149       91        2   
5              1984      409      147       289       158       37        5   
6              1985      467      168       256       151       32        3   
7              1986      540      188       222       140       55        0   
8              1987      548      197       227     