Writing a parser to access presidential pardon statistics published here by the Department of Justice here: https://www.justice.gov/pardon/clemency-statistics. I'm interested in how Trump's early pardons have diverged from recent presidents' clemency patterns. Thanks to https://medium.com/@ageitgey/quick-tip-the-easiest-way-to-grab-data-out-of-a-web-page-in-python-7153cecfca58 for the help.

In [2]:
# import libraries
import pandas as pd

In [3]:
# get all tables
tables = pd.read_html("https://www.justice.gov/pardon/clemency-statistics")

In [4]:
# PARDONS YEAR 1
# CARTER
carter = tables[14]
carter = carter[2:] # take out first two rows with dirty column names
carter = carter[:1] # take out all but first year, for comparison to trump
print(carter)

                0    1    2    3    4  5  6  7  8  9    10    11
2  1977 (8.5 mos.)  368  106  292  271  0  1  0  1  0  118  49.0


In [5]:
# add clean column names
carter.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
print(carter)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1977 (8.5 mos.)      368      106       292       271        0        1   

  grantedR deniedP deniedC closedP  closedC  
2        0       1       0     118     49.0  


In [6]:
# add president name so we can merge all tables together
carter['president'] = 'carter'
print(carter)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1977 (8.5 mos.)      368      106       292       271        0        1   

  grantedR deniedP deniedC closedP  closedC president  
2        0       1       0     118     49.0    carter  


In [7]:
# REAGAN
reagan = tables[15]
reagan = reagan[2:] # take out first two rows with dirty column names
reagan = reagan[:1] # take out all but first year, for comparison to trump
reagan.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
reagan['president'] = 'reagan'
print(reagan)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1981 (8.5 mos.)      358      119       220       137        2        0   

  grantedR deniedP deniedC closedP  closedC president  
2        0       0       0      66     87.0    reagan  


In [8]:
# H.W. BUSH
hwbush = tables[16]
hwbush = hwbush[2:] # take out first two rows with dirty column names
hwbush = hwbush[:1] # take out all but first year, for comparison to trump
hwbush.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
hwbush['president'] = 'hwbush'
print(hwbush)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1989 (8.5 mos.)      488      186       115       130        9        1   

  grantedR deniedP deniedC closedP  closedC president  
2        0     122      22      41    112.0    hwbush  


In [9]:
# CLINTON
clinton = tables[17]
clinton = clinton[2:] # take out first two rows with dirty column names
clinton = clinton[:1] # take out all but first year, for comparison to trump
clinton.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
clinton['president'] = 'clinton'
print(clinton)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1993 (8.5 mos.)      260      192       172       526        0        0   

  grantedR deniedP deniedC closedP  closedC president  
2        0       1       2      33     53.0   clinton  


In [10]:
# W. BUSH
bush = tables[18]
bush = bush[2:] # take out first two rows with dirty column names
bush = bush[:1] # take out all but first year, for comparison to trump
bush.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
bush['president'] = 'bush'
print(bush)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  2001 (8.5 mos.)      923     2063       110       548        0        0   

  grantedR deniedP deniedC closedP  closedC president  
2        0       0       1      45    277.0      bush  


In [11]:
# OBAMA
obama = tables[19]
obama = obama[2:] # take out first two rows with dirty column names
obama = obama[:1] # take out all but first year, for comparison to trump
obama.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
obama['president'] = 'obama'
print(obama)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  2009 (8.5 mos.)     1040      903       232      1086        0        0   

  grantedR deniedP deniedC closedP  closedC president  
2        0       0       0     132    120.0     obama  


In [12]:
# TRUMP
trump = tables[20]
trump = trump[2:] # take out first two rows with dirty column names
trump = trump[:1] # take out all but first year, for comparison to trump
trump = trump.drop([3,4], axis=1) # drop extra columns in only trump's table
trump.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']
trump['president'] = 'trump'
print(trump)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  2017 (8.5 mos.)     2010     9361       422      1729        1        0   

  grantedR deniedP deniedC closedP  closedC president  
2        0       0       0     161   2158.0     trump  


In [13]:
pardons_yr1 = pd.concat([carter,reagan,hwbush,clinton,bush,obama,trump])
print(pardons_yr1)

              year pendingP pendingC receivedP receivedC grantedP grantedC  \
2  1977 (8.5 mos.)      368      106       292       271        0        1   
2  1981 (8.5 mos.)      358      119       220       137        2        0   
2  1989 (8.5 mos.)      488      186       115       130        9        1   
2  1993 (8.5 mos.)      260      192       172       526        0        0   
2  2001 (8.5 mos.)      923     2063       110       548        0        0   
2  2009 (8.5 mos.)     1040      903       232      1086        0        0   
2  2017 (8.5 mos.)     2010     9361       422      1729        1        0   

  grantedR deniedP deniedC closedP  closedC president  
2        0       1       0     118     49.0    carter  
2        0       0       0      66     87.0    reagan  
2        0     122      22      41    112.0    hwbush  
2        0       1       2      33     53.0   clinton  
2        0       0       1      45    277.0      bush  
2        0       0       0     132    1

In [32]:
# PARDONS YEAR 1 ANALYSIS
# 1. How many total petitions (P, C, R) did each president grant in his first year in office?
granted_yr1 = pardons_yr1.drop(['year','pendingP','pendingC','receivedP','receivedC','deniedP','deniedC','closedP','closedC'], axis=1)
granted_yr1[['grantedP','grantedC','grantedR']] = granted_yr1[['grantedP','grantedC','grantedR']].apply(pd.to_numeric)

granted_yr1['granted_all'] = granted_yr1['grantedP'] + granted_yr1['grantedC'] + granted_yr1['grantedR']
print(granted_yr1)

   grantedP  grantedC  grantedR president  granted_all
2         0         1         0    carter            1
2         2         0         0    reagan            2
2         9         1         0    hwbush           10
2         0         0         0   clinton            0
2         0         0         0      bush            0
2         0         0         0     obama            0
2         1         0         0     trump            1


In [38]:
# What percent of petitions pending + received were in the first year were granted?
pctgranted_yr1 = pardons_yr1.drop(['deniedP','deniedC','closedP','closedC'], axis=1)
pctgranted_yr1[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR']] = pctgranted_yr1[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR']].apply(pd.to_numeric)

pctgranted_yr1['pend_rec'] = pctgranted_yr1['pendingP'] + pctgranted_yr1['pendingC'] + pctgranted_yr1['receivedP'] + pctgranted_yr1['receivedC']
pctgranted_yr1['granted_all'] = pctgranted_yr1['grantedP'] + pctgranted_yr1['grantedC'] + pctgranted_yr1['grantedR']
pctgranted_yr1['pctgranted'] = (pctgranted_yr1['granted_all'] / pctgranted_yr1['pend_rec']) * 100
print(pctgranted_yr1)

              year  pendingP  pendingC  receivedP  receivedC  grantedP  \
2  1977 (8.5 mos.)       368       106        292        271         0   
2  1981 (8.5 mos.)       358       119        220        137         2   
2  1989 (8.5 mos.)       488       186        115        130         9   
2  1993 (8.5 mos.)       260       192        172        526         0   
2  2001 (8.5 mos.)       923      2063        110        548         0   
2  2009 (8.5 mos.)      1040       903        232       1086         0   
2  2017 (8.5 mos.)      2010      9361        422       1729         1   

   grantedC  grantedR president  pend_rec  granted_all  pctgranted  
2         1         0    carter      1037            1    0.096432  
2         0         0    reagan       834            2    0.239808  
2         1         0    hwbush       919           10    1.088139  
2         0         0   clinton      1150            0    0.000000  
2         0         0      bush      3644            0    0.00

In [16]:
# PARDONS YEARS 1 + 2
# CARTER
carter2 = tables[14]
carter2 = carter2[2:] # take out first two rows with dirty column names
carter2 = carter2[:2]

carter2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

carter2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = carter2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
carter2 = carter2.append(carter2.sum(numeric_only=True), ignore_index=True)
carter2.at[2, 'year'] = 'total'

carter2['president'] = 'carter'

carter_sum = carter2[2:]

print(carter_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total     909.0     433.0      671.0      533.0     162.0       4.0   

   grantedR  deniedP  deniedC  closedP  closedC president  
2       0.0    308.0    409.0    192.0     95.0    carter  


In [17]:
# REAGAN
reagan2 = tables[15]
reagan2 = reagan2[2:] # take out first two rows with dirty column names
reagan2 = reagan2[:2]

reagan2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

reagan2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = reagan2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
reagan2 = reagan2.append(reagan2.sum(numeric_only=True), ignore_index=True)
reagan2.at[2, 'year'] = 'total'

reagan2['president'] = 'reagan'

reagan_sum = reagan2[2:]

print(reagan_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total     868.0     288.0      503.0      316.0      85.0       3.0   

   grantedR  deniedP  deniedC  closedP  closedC president  
2       0.0    258.0    123.0    147.0    172.0    reagan  


In [18]:
# H.W. BUSH
hwbush2 = tables[16]
hwbush2 = hwbush2[2:] # take out first two rows with dirty column names
hwbush2 = hwbush2[:2]

hwbush2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

hwbush2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = hwbush2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
hwbush2 = hwbush2.append(hwbush2.sum(), ignore_index=True)
hwbush2.at[2, 'year'] = 'total'

hwbush2['president'] = 'hwbush'

hwbush_sum = hwbush2[2:]

print(hwbush_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total       920       370        321        278         9         1   

   grantedR  deniedP  deniedC  closedP  closedC president  
2         0      216       44      100    226.0    hwbush  


In [19]:
# CLINTON
clinton2 = tables[17]
clinton2 = clinton2[2:] # take out first two rows with dirty column names
clinton2 = clinton2[:2]

clinton2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

clinton2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = clinton2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
clinton2 = clinton2.append(clinton2.sum(), ignore_index=True)
clinton2.at[2, 'year'] = 'total'

clinton2['president'] = 'clinton'

clinton_sum = clinton2[2:]

print(clinton_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total       652       848        400       1106         0         0   

   grantedR  deniedP  deniedC  closedP  closedC president  
2         0      176      402      107    189.0   clinton  


In [20]:
# W. BUSH
bush2 = tables[18]
bush2 = bush2[2:] # take out first two rows with dirty column names
bush2 = bush2[:2]

bush2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

bush2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = bush2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
bush2 = bush2.append(bush2.sum(), ignore_index=True)
bush2.at[2, 'year'] = 'total'

bush2['president'] = 'bush'

bush_sum = bush2[2:]

print(bush_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total      1911      4395        262       1644         0         0   

   grantedR  deniedP  deniedC  closedP  closedC president  
2         0      519     1467       98    517.0      bush  


In [21]:
# OBAMA
obama2 = tables[19]
obama2 = obama2[2:] # take out first two rows with dirty column names
obama2 = obama2[:2]

obama2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

obama2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = obama2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
obama2 = obama2.append(obama2.sum(), ignore_index=True)
obama2.at[2, 'year'] = 'total'

obama2['president'] = 'obama'

obama_sum = obama2[2:]

print(obama_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total      2180      2772        494       2988         0         0   

   grantedR  deniedP  deniedC  closedP  closedC president  
2         0        0        0      248    460.0     obama  


In [22]:
# TRUMP
trump2 = tables[20]
trump2 = trump2[2:] # take out first two rows with dirty column names
trump2 = trump2[:2] # take out all but first year, for comparison to trump
trump2 = trump2.drop([3,4], axis=1) # drop extra columns in only trump's table

trump2.columns = ['year','pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']

trump2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']] = trump2[['pendingP','pendingC','receivedP','receivedC','grantedP','grantedC','grantedR','deniedP','deniedC','closedP','closedC']].apply(pd.to_numeric)
trump2 = trump2.append(trump2.sum(), ignore_index=True)
trump2.at[2, 'year'] = 'total'

trump2['president'] = 'trump'

trump_sum = trump2[2:]

print(trump_sum)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total      4281     18293        675       2722         7         4   

   grantedR  deniedP  deniedC  closedP  closedC president  
2         0       82       98      455   3005.0     trump  


In [23]:
pardons_yr12 = pd.concat([carter_sum,reagan_sum,hwbush_sum,clinton_sum,bush_sum,obama_sum,trump_sum])
print(pardons_yr12)

    year  pendingP  pendingC  receivedP  receivedC  grantedP  grantedC  \
2  total     909.0     433.0      671.0      533.0     162.0       4.0   
2  total     868.0     288.0      503.0      316.0      85.0       3.0   
2  total     920.0     370.0      321.0      278.0       9.0       1.0   
2  total     652.0     848.0      400.0     1106.0       0.0       0.0   
2  total    1911.0    4395.0      262.0     1644.0       0.0       0.0   
2  total    2180.0    2772.0      494.0     2988.0       0.0       0.0   
2  total    4281.0   18293.0      675.0     2722.0       7.0       4.0   

   grantedR  deniedP  deniedC  closedP  closedC president  
2       0.0    308.0    409.0    192.0     95.0    carter  
2       0.0    258.0    123.0    147.0    172.0    reagan  
2       0.0    216.0     44.0    100.0    226.0    hwbush  
2       0.0    176.0    402.0    107.0    189.0   clinton  
2       0.0    519.0   1467.0     98.0    517.0      bush  
2       0.0      0.0      0.0    248.0    460.0

In [33]:
# PARDONS YEARS 1+2 ANALYSIS
# 1. How many total petitions (P, C, R) did each president grant in his first two years in office?
granted_yr12 = pardons_yr12.drop(['year','pendingP','pendingC','receivedP','receivedC','deniedP','deniedC','closedP','closedC'], axis=1)
granted_yr12[['grantedP','grantedC','grantedR']] = granted_yr12[['grantedP','grantedC','grantedR']].apply(pd.to_numeric)

granted_yr12['granted_all'] = granted_yr12['grantedP'] + granted_yr12['grantedC'] + granted_yr12['grantedR']
print(granted_yr12)

   grantedP  grantedC  grantedR president  granted_all
2     162.0       4.0       0.0    carter        166.0
2      85.0       3.0       0.0    reagan         88.0
2       9.0       1.0       0.0    hwbush         10.0
2       0.0       0.0       0.0   clinton          0.0
2       0.0       0.0       0.0      bush          0.0
2       0.0       0.0       0.0     obama          0.0
2       7.0       4.0       0.0     trump         11.0
