In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
import seaborn as sns
import scipy
import geopandas as gpd

# Prison Populations

In [2]:
fn = os.path.join('../../../data/shapefiles/Prison_Boundaries/Prison_Boundaries.shp')

In [3]:
gdf = gpd.read_file(fn)
gdf['VAL_METHOD']

0       IMAGERY/OTHER
1       IMAGERY/OTHER
2       IMAGERY/OTHER
3       IMAGERY/OTHER
4       IMAGERY/OTHER
            ...      
6733    IMAGERY/OTHER
6734    IMAGERY/OTHER
6735    IMAGERY/OTHER
6736    IMAGERY/OTHER
6737             None
Name: VAL_METHOD, Length: 6738, dtype: object

In [4]:
print(gdf.columns)

Index(['FID', 'FACILITYID', 'NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'ZIP4',
       'TELEPHONE', 'TYPE', 'STATUS', 'POPULATION', 'COUNTY', 'COUNTYFIPS',
       'COUNTRY', 'NAICS_CODE', 'NAICS_DESC', 'SOURCE', 'SOURCEDATE',
       'VAL_METHOD', 'VAL_DATE', 'WEBSITE', 'SECURELVL', 'CAPACITY',
       'SHAPE_Leng', 'GlobalID', 'CreationDa', 'Creator', 'EditDate', 'Editor',
       'SHAPE_Le_1', 'SHAPE_Area', 'geometry'],
      dtype='object')


In [5]:
gdf['VAL_DATE']

0       2020-02-27
1       2020-02-11
2       2020-03-04
3       2020-01-02
4       2020-01-02
           ...    
6733    2020-02-25
6734    2020-02-25
6735    2020-02-25
6736    2020-02-25
6737          None
Name: VAL_DATE, Length: 6738, dtype: object

In [6]:
print('how many in tx + fl out of total', (145240+98941)/2032647)

how many in tx + fl out of total 0.12012956504498813


# Figure 1

In [8]:
# Check out data for figure 1
fn = os.path.join('../../../figures/wbgtmax/Figure_1a.csv')
fig1 = pd.read_csv(fn)
fig1.head()

Unnamed: 0,STATE,STATEFP,Type,wbgt_28_pop_mean
0,AL,1,County,187974
1,AL,1,Federal,114837
2,AL,1,Local,9914
3,AL,1,State,597932
4,AL,1,Total,910657


In [14]:
ans = fig1[fig1['Type'] == 'Total']['wbgt_28_pop_mean'].sum()
print(round(ans / 10**6,2),)

41.25


In [21]:
total = fig1[fig1['Type'] == 'Total']['wbgt_28_pop_mean'].sum()
tx_t = fig1[(fig1['STATE'] == 'TX') & (fig1['Type'] == 'Total')]['wbgt_28_pop_mean'].sum()
fl_t = fig1[(fig1['STATE'] == 'FL') & (fig1['Type'] == 'Total')]['wbgt_28_pop_mean'].sum()
tx_s = fig1[(fig1['STATE'] == 'TX') & (fig1['Type'] == 'State')]['wbgt_28_pop_mean'].sum()
fl_s = fig1[(fig1['STATE'] == 'FL') & (fig1['Type'] == 'State')]['wbgt_28_pop_mean'].sum()
print('TX & FL, as pct of Total US exposure', round(tx_t/total*100,2), round(fl_t/total*100,2))
print('TX & FL state prison, as pct of total state exposure', round(tx_s/tx_t*100,2), round(fl_s/tx_t*100,2))

TX & FL, as pct of Total US exposure 27.52 24.48
TX & FL state prison, as pct of total state exposure 62.49 51.5


In [18]:
state_t =  fig1[fig1['Type'] == 'State']['wbgt_28_pop_mean'].sum()
print('state, as pct of Total US exposure', round(state_t/total*100,2), round(state_t/10**6,2))

state, as pct of Total US exposure 60.77 25.07


In [20]:
state_t =  fig1[fig1['Type'] == 'County']['wbgt_28_pop_mean'].sum()
print('County, as pct of Total US exposure', round(state_t/total*100,2), state_t)

County, as pct of Total US exposure 26.88 11087330


# Figure 2

In [38]:
# Check out data for figure 1
fn = os.path.join('../../../figures/wbgtmax/Figure_2a.csv')
fig2 = pd.read_csv(fn)
fig2.head()

Unnamed: 0,STATE,state,year,wbgt_28_diff
0,AL,1,1982,1.1
1,AL,1,1983,2.0
2,AL,1,1984,1.8
3,AL,1,1985,2.1
4,AL,1,1986,2.2


### USA

In [24]:
metric = 'wbgt_28'
geo = 'USA'

In [26]:
avg = fig2[fig2['STATE'] == geo][metric+'_diff'].mean()
print('Avg. dif 1981 - 2020 USA:', round(avg, 2))

Avg. dif 1981 - 2020 USA: 5.45


In [28]:
std = fig2[fig2['STATE'] == geo][metric+'_diff'].std()
print('std. dif 1981 - 2020 USA:', round(std,2))

std. dif 1981 - 2020 USA: 1.28


In [29]:
cv = std / avg
print('coef of var. 1981 - 2020 USA:', round(cv,1))

coef of var. 1981 - 2020 USA: 0.2


In [30]:
fig2[fig2['STATE'] == geo].sort_values('wbgt_28_diff', ascending=False)[['year', 'wbgt_28_state', metric+'_diff']].head(5)

KeyError: "['wbgt_28_state'] not in index"

### State Level

In [31]:
fig2.sort_values('wbgt_28_diff', ascending=False)[['STATE','year', metric+'_diff']].head(50);

In [32]:
fig2.groupby('STATE')['wbgt_28_diff'].mean().sort_values(ascending=False).head(10)

STATE
AZ     13.105128
CA      7.838462
NV      7.748718
GA      7.607692
USA     5.453846
AL      3.264103
IL      3.192308
AR      3.156410
NE      2.635897
NC      2.610256
Name: wbgt_28_diff, dtype: float64

In [33]:
geo = 'AZ'

In [34]:
fig2[fig2['STATE'] == geo]['wbgt_28_state'].mean()

KeyError: 'wbgt_28_state'

In [None]:
fig2[fig2['STATE'] == geo]['wbgt_28_prison'].mean()

In [None]:
# dif on avg az
top = 
bottom =


In [None]:
# GA Prison
geo = 'AZ'
years = list(range(1981, 2019 + 1, 1))
prison = fig2[fig2['STATE'] == geo][metric+'_prison']
data = pd.DataFrame({'x': years, 'y': prison})
plt.bar(data['x'], data['y'])
plt.ylim(0,100)
print(scipy.stats.linregress(data['x'], data['y']))

# GA Average
geo = 'GA'
years = list(range(1981, 2019 + 1, 1))
prison = fig2[fig2['STATE'] == geo][metric+'_state']
data = pd.DataFrame({'x': years, 'y': prison})
plt.bar(data['x'], data['y'])
plt.ylim(0,100)
print(scipy.stats.linregress(data['x'], data['y']))

# prison = fig2[fig2['STATE'] == geo][metric+'_state']
# plt.bar(years, prison)

# plt.ylim(0, 70)
plt.xlabel('Year')
plt.ylabel('Days WBGTmax > 28°C')
plt.legend(labels = ['GA Incarcerated', 'GA Non-incarcerated'])

# Old code

In [None]:
for geo in np.unique(fig2['STATE']):
    print(geo, len(fig2[fig2['STATE'] == geo]))

In [None]:
geo = 'AL'
years = list(range(1981, 2019 + 1, 1))
prison = fig2[fig2['STATE'] == geo][metric+'_prison']
data = pd.DataFrame({'x': years, 'y': prison})

sns.regplot(data, x = 'x', y = 'y', fit_reg = True)
plt.ylim(0,100)
print(scipy.stats.linregress(data['x'], data['y']))
c
# prison = fig2[fig2['STATE'] == geo][metric+'_state']
# plt.bar(years, prison)

# plt.ylim(0, 70)