In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
import seaborn as sns
import scipy
import geopandas as gpd

# Prison Populations

In [2]:
fn = os.path.join('../../../data/shapefiles/Prison_Boundaries/Prison_Boundaries.shp')

In [3]:
gdf = gpd.read_file(fn)
gdf['VAL_METHOD']

0       IMAGERY/OTHER
1       IMAGERY/OTHER
2       IMAGERY/OTHER
3       IMAGERY/OTHER
4       IMAGERY/OTHER
            ...      
6733    IMAGERY/OTHER
6734    IMAGERY/OTHER
6735    IMAGERY/OTHER
6736    IMAGERY/OTHER
6737             None
Name: VAL_METHOD, Length: 6738, dtype: object

In [4]:
print(gdf.columns)

Index(['FID', 'FACILITYID', 'NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'ZIP4',
       'TELEPHONE', 'TYPE', 'STATUS', 'POPULATION', 'COUNTY', 'COUNTYFIPS',
       'COUNTRY', 'NAICS_CODE', 'NAICS_DESC', 'SOURCE', 'SOURCEDATE',
       'VAL_METHOD', 'VAL_DATE', 'WEBSITE', 'SECURELVL', 'CAPACITY',
       'SHAPE_Leng', 'GlobalID', 'CreationDa', 'Creator', 'EditDate', 'Editor',
       'SHAPE_Le_1', 'SHAPE_Area', 'geometry'],
      dtype='object')


In [5]:
gdf['VAL_DATE']

0       2020-02-27
1       2020-02-11
2       2020-03-04
3       2020-01-02
4       2020-01-02
           ...    
6733    2020-02-25
6734    2020-02-25
6735    2020-02-25
6736    2020-02-25
6737          None
Name: VAL_DATE, Length: 6738, dtype: object

In [6]:
print('how many in tx + fl out of total', (145240+98941)/2032647)

how many in tx + fl out of total 0.12012956504498813


# Figure 1a

In [7]:
# Check out data for figure 1
fn = os.path.join('../../../figures/wbgtmax/Figure_1a.csv')
fig1 = pd.read_csv(fn)
fig1.head()

Unnamed: 0,STATE,STATEFP,Type,wbgt_28_pop_mean
0,AL,1,County,187974
1,AL,1,Federal,114837
2,AL,1,Local,9914
3,AL,1,State,597932
4,AL,1,Total,910657


In [8]:
ans = fig1[fig1['Type'] == 'Total']['wbgt_28_pop_mean'].sum()
print(round(ans / 10**6,2),)

41.25


In [9]:
total = fig1[fig1['Type'] == 'Total']['wbgt_28_pop_mean'].sum()
tx_t = fig1[(fig1['STATE'] == 'TX') & (fig1['Type'] == 'Total')]['wbgt_28_pop_mean'].sum()
fl_t = fig1[(fig1['STATE'] == 'FL') & (fig1['Type'] == 'Total')]['wbgt_28_pop_mean'].sum()
tx_s = fig1[(fig1['STATE'] == 'TX') & (fig1['Type'] == 'State')]['wbgt_28_pop_mean'].sum()
fl_s = fig1[(fig1['STATE'] == 'FL') & (fig1['Type'] == 'State')]['wbgt_28_pop_mean'].sum()
print('TX & FL, as pct of Total US exposure', round(tx_t/total*100,2), round(fl_t/total*100,2))
print('TX & FL state prison, as pct of total state exposure', round(tx_s/tx_t*100,2), round(fl_s/tx_t*100,2))

TX & FL, as pct of Total US exposure 27.52 24.48
TX & FL state prison, as pct of total state exposure 62.49 51.5


In [10]:
state_t =  fig1[fig1['Type'] == 'State']['wbgt_28_pop_mean'].sum()
print('state, as pct of Total US exposure', round(state_t/total*100,2), round(state_t/10**6,2))

state, as pct of Total US exposure 60.77 25.07


In [11]:
state_t =  fig1[fig1['Type'] == 'County']['wbgt_28_pop_mean'].sum()
print('County, as pct of Total US exposure', round(state_t/total*100,2), state_t)

County, as pct of Total US exposure 26.88 11087330


# Figure 1b

In [12]:
# Check out data for figure 1
fn = os.path.join('../../../figures/wbgtmax/Figure_1b.csv')
fig1 = pd.read_csv(fn)
fig1.head()

Unnamed: 0,prison_id,wbgt_28_2016_2020,NAME,STATE,STATEFP,COUNTY,CITY,TYPE,POPULATION,CAPACITY,VAL_DATE
0,1,12.8,MIDLAND COUNTY CENTRAL DETENTION CENTER,TX,48,MIDLAND,MIDLAND,COUNTY,438,498,2020/02/27
1,3,54.4,SAN JACINTO COUNTY JAIL,TX,48,SAN JACINTO,COLDSPRING,COUNTY,96,144,2020/03/04
2,4,4.8,YELLOW MEDICINE COUNTY JAIL,MN,27,YELLOW MEDICINE,GRANITE FALLS,COUNTY,27,36,2020/01/02
3,5,0.2,NMJC THIEF RIVER FALLS SATELLITE HOME,MN,27,PENNINGTON,THIEF RIVER FALLS,LOCAL,2,5,2020/01/02
4,6,0.2,PREBLE COUNTY JAIL,OH,39,PREBLE,EATON,COUNTY,79,70,2020/02/20


In [13]:
ans = len(fig1[fig1['wbgt_28_2016_2020'] >= 75])
print('How many prisons had 75 days or more?', ans)

How many prisons had 75 days or more? 118


In [14]:
fig1[fig1['wbgt_28_2016_2020'] >= 75].sort_values('wbgt_28_2016_2020', ascending = False)

Unnamed: 0,prison_id,wbgt_28_2016_2020,NAME,STATE,STATEFP,COUNTY,CITY,TYPE,POPULATION,CAPACITY,VAL_DATE
309,566,126.2,STARR COUNTY JAIL,TX,48,STARR,RIO GRANDE CITY,COUNTY,249,275,2020/03/04
2161,3725,126.0,EDNA TAMAYO HOUSE,TX,48,CAMERON,HARLINGEN,STATE,21,24,2020/02/14
164,316,118.6,WEBB COUNTY JAIL,TX,48,WEBB,LAREDO,COUNTY,485,570,2020/03/04
1415,2526,117.8,WEBB COUNTY DETENTION CENTER,TX,48,WEBB,LAREDO,MULTI,2,480,2020/03/04
804,1473,116.6,CAMERON COUNTY DETENTION CENTER 2,TX,48,CAMERON,BROWNSVILLE,COUNTY,348,-999,2020/02/11
...,...,...,...,...,...,...,...,...,...,...,...
2083,3604,75.6,USP COLEMAN II,FL,12,SUMTER,SUMTERVILLE,FEDERAL,1374,956,2019/12/12
2081,3602,75.6,FCI COLEMAN MEDIUM,FL,12,SUMTER,SUMTERVILLE,FEDERAL,1486,1056,2019/12/12
1930,3351,75.6,FCI COLEMAN LOW CAMP,FL,12,SUMTER,SUMTERVILLE,FEDERAL,382,512,2019/12/12
980,1751,75.4,"HILLSBOROUGH COUNTY JAIL, ORIENT ROAD",FL,12,HILLSBOROUGH,TAMPA,COUNTY,670,1711,2019/12/12


# Figure 2

In [15]:
# Check out data for figure 1
fn = os.path.join('../../../figures/wbgtmax/Figure_2a.csv')
fig2 = pd.read_csv(fn)
fig2.head()

Unnamed: 0,STATE,state,year,wbgt_28_diff
0,AL,1,1982,1.1
1,AL,1,1983,2.0
2,AL,1,1984,1.8
3,AL,1,1985,2.1
4,AL,1,1986,2.2


### USA

In [16]:
metric = 'wbgt_28'
geo = 'USA'

In [17]:
avg = fig2[fig2['STATE'] == geo][metric+'_diff'].mean()
print('Avg. dif 1981 - 2020 USA:', round(avg, 2))

Avg. dif 1981 - 2020 USA: 5.45


In [None]:
std = fig2[fig2['STATE'] == geo][metric+'_diff'].std()
print('std. dif 1981 - 2020 USA:', round(std,2))

In [None]:
cv = std / avg
print('coef of var. 1981 - 2020 USA:', round(cv,1))

In [None]:
fig2[fig2['STATE'] == geo].sort_values('wbgt_28_diff', ascending=False)[['year', 'wbgt_28_state', metric+'_diff']].head(5)

### State Level

In [None]:
fig2.sort_values('wbgt_28_diff', ascending=False)[['STATE','year', metric+'_diff']].head(50);

In [None]:
fig2.groupby('STATE')['wbgt_28_diff'].mean().sort_values(ascending=False).head(10)

In [None]:
geo = 'AZ'

In [None]:
fig2[fig2['STATE'] == geo]['wbgt_28_state'].mean()

In [None]:
fig2[fig2['STATE'] == geo]['wbgt_28_prison'].mean()

In [None]:
# dif on avg az
top = 
bottom =


In [None]:
# GA Prison
geo = 'AZ'
years = list(range(1981, 2019 + 1, 1))
prison = fig2[fig2['STATE'] == geo][metric+'_prison']
data = pd.DataFrame({'x': years, 'y': prison})
plt.bar(data['x'], data['y'])
plt.ylim(0,100)
print(scipy.stats.linregress(data['x'], data['y']))

# GA Average
geo = 'GA'
years = list(range(1981, 2019 + 1, 1))
prison = fig2[fig2['STATE'] == geo][metric+'_state']
data = pd.DataFrame({'x': years, 'y': prison})
plt.bar(data['x'], data['y'])
plt.ylim(0,100)
print(scipy.stats.linregress(data['x'], data['y']))

# prison = fig2[fig2['STATE'] == geo][metric+'_state']
# plt.bar(years, prison)

# plt.ylim(0, 70)
plt.xlabel('Year')
plt.ylabel('Days WBGTmax > 28°C')
plt.legend(labels = ['GA Incarcerated', 'GA Non-incarcerated'])

# Old code

In [None]:
for geo in np.unique(fig2['STATE']):
    print(geo, len(fig2[fig2['STATE'] == geo]))

In [None]:
geo = 'AL'
years = list(range(1981, 2019 + 1, 1))
prison = fig2[fig2['STATE'] == geo][metric+'_prison']
data = pd.DataFrame({'x': years, 'y': prison})

sns.regplot(data, x = 'x', y = 'y', fit_reg = True)
plt.ylim(0,100)
print(scipy.stats.linregress(data['x'], data['y']))
c
# prison = fig2[fig2['STATE'] == geo][metric+'_state']
# plt.bar(years, prison)

# plt.ylim(0, 70)