In [1]:
# installs
# !pip install ipdb dateparser

In [2]:
# imports
import csv, requests, json, dateutil, datetime, dateparser
import pandas as pd

In [3]:
# configs
ccvi_file_path = 'ccvi.csv'
ccme_file_url = 'https://raw.githubusercontent.com/beamalsky/medical-examiner-data/master/src/data/final/cases.json'
vax_file_path = 'vax_by_CA_112021.csv'
vax_supply_path = "doses_by_community.csv"
output_file_path = 'combined_ca_data.csv'
spring_wave_start_date = datetime.datetime(2021,3,28)

In [4]:
# get names, ccvi
ccvi = [x for x in csv.DictReader(open(ccvi_file_path))]

In [5]:
# get mortality
mortality_by_ca = json.loads(requests.get(ccme_file_url).content)
ca_decedents = dict()
for decedent in mortality_by_ca:
    if decedent['community'] not in ca_decedents:
        ca_decedents[decedent['community']] = []
    ca_decedents[decedent['community']].append(decedent)

In [6]:
# get vax rate
ca_rates = [x for x in csv.DictReader(open(vax_file_path))] 

In [7]:
# protect chicago
pcps = """Archer Heights
Austin
Belmont Cragin
Chicago Lawn
Englewood
Gage Park
Humboldt Park
Montclare
New City
North Lawndale
Roseland
South Deering
Little Village
Washington Heights
West Englewood""".splitlines()
pcps

['Archer Heights',
 'Austin',
 'Belmont Cragin',
 'Chicago Lawn',
 'Englewood',
 'Gage Park',
 'Humboldt Park',
 'Montclare',
 'New City',
 'North Lawndale',
 'Roseland',
 'South Deering',
 'Little Village',
 'Washington Heights',
 'West Englewood']

In [8]:
# transform keys

# fixes inconsistent/misnamed community areas that are keyed in various ways 
# ... NOTE key renames will fail if re-ran as a cell (ctrl + f9 to run all)
ca_decedents['Little Village'] = ca_decedents.pop('SOUTH LAWNDALE (LITTLE VILLAGE)')
ca_decedents['Pilsen'] = ca_decedents.pop('LOWER WEST SIDE (PILSEN)')

ccvi_lv = [x for x in ccvi if x['Community Area'] == 'South Lawndale'][0]
ccvi_lv['Community Area'] = 'Little Village'
ccvi_p = [x for x in ccvi if x['Community Area'] == 'Lower West Side'][0]
ccvi_p['Community Area'] = 'Pilsen'

lv_rate = [x for x in ca_rates if x['Community Area'] == 'SOUTH LAWNDALE'][0]
lv_rate['Community Area'] = 'Little Village'
p_rate = [x for x in ca_rates if x['Community Area'] == 'LOWER WEST SIDE'][0]
p_rate['Community Area'] = 'Pilsen'
o_rate = [ x for x in ca_rates if x['Community Area'] == 'OHARE'][0]
o_rate['Community Area'] = "O'HARE"

# missing from CCVI
ccvi.append({'Community Area':'Burnside','CCVI': None, 'Population': 2254})
ccvi.append({'Community Area':'Fuller Park','CCVI': None, 'Population': 2399}) # TODO verify

In [9]:
# join ca data
ca_data = dict()

# ccvi
for ca in ccvi:
    ca_data[ca['Community Area'].title()] = {'CCVI':ca['CCVI'],'pop':int(ca['Population'])}

# ccme
for ca in ca_decedents:
    if ca and ca.title() in ca_data:
        # all deaths
        ca_data[ca.title()]['deaths'] = len(ca_decedents[ca])
        # all deaths since spring wave
        ca_data[ca.title()]['deaths_since_spring'] = len([x for x in ca_decedents[ca] if dateparser.parse(x['death_date']) > spring_wave_start_date])
    else:
        print(ca,'found in ccme but not ca_data')

# vax
for ca in ca_rates:
    ca_name = ca['Community Area'].title()
    if ca_name and ca_name in ca_data:
        ca_data[ca_name]['complete_vax'] = ca['% Complete Series']
    else:
        print(ca_name,'found in vax but not ca_data')

# pcps
for ca in ca_data:
    if ca in pcps:
        ca_data[ca]['PCP'] = True
    else:
        ca_data[ca]['PCP'] = False

None found in ccme but not ca_data


In [10]:
# sanity checks

# 77 community areas
print('number of CAs:',len(ca_data))

# 15 PCPs
print('number of PCPs:',len([x for x in ca_data if ca_data[x]['PCP']]))

number of CAs: 77
number of PCPs: 15


In [11]:
# derivations

for ca in ca_data:
    # deaths per 100K
    ca_data[ca]['deaths_per_100k'] = float(ca_data[ca]['deaths']/ca_data[ca]['pop'])*100000
    # deaths since spring per 100K
    ca_data[ca]['deaths_since_spring_per_100k'] = float(ca_data[ca]['deaths_since_spring']/ca_data[ca]['pop'])*100000


In [12]:
# listify rows & re-label columns

ca_rows = []
for ca in ca_data:
    relabeled_row = {
        'Community Area': ca,
        'Protect Chicago Plus?': '✶' if ca_data[ca]['PCP'] else None,
        'Vulnerability Index': ca_data[ca]['CCVI'],
        'Vaccination Rate': str(int(float(ca_data[ca]['complete_vax']))) + '%',
        'Deaths per 100,000': round(ca_data[ca]['deaths_per_100k'],1),
        'Deaths per 100,000 since spring': round(ca_data[ca]['deaths_since_spring_per_100k'],1),
    }
    ca_rows.append(relabeled_row)

In [13]:
ca_rows[0]

{'Community Area': 'West Englewood',
 'Protect Chicago Plus?': '✶',
 'Vulnerability Index': '64',
 'Vaccination Rate': '54%',
 'Deaths per 100,000': 337.8,
 'Deaths per 100,000 since spring': 56.9}

In [14]:
# write out
headers = ['Community Area','Protect Chicago Plus?','Vulnerability Index',
           'Vaccination Rate','Deaths per 100,000','Deaths per 100,000 since spring']
output_file = open(output_file_path,'w')
output_csv = csv.DictWriter(output_file,headers)
output_csv.writeheader()
output_csv.writerows(ca_rows)
output_file.close()