In [1]:
## looking into public services accessibility by zip-code, alderman district
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import matplotlib
from matplotlib.pyplot import plot

calls_service = pd.read_csv('../data/mfdems.csv') # EMS Calls for Service Milwaukee
liq_lic = pd.read_csv('../data/liquorlicenses.csv') # Liquor License data Milwaukee
## https://www.irs.gov/statistics/soi-tax-stats-individual-income-tax-statistics-zip-code-data-soi
nat_irs_16 = pd.read_csv('../data/16zpallagi.csv') # IRS Data 2016
nat_irs_16['zipcode'] = nat_irs_16['zipcode'].astype(np.float64)

FileNotFoundError: File b'mfdems.csv' does not exist

In [None]:
calls_service.info()

In [None]:
mke_zipcodes = [x for x in calls_service['ZIP Code'].unique() if ~np.isnan(x)]
mke_alderdists = [x for x in calls_service['Aldermanic District'].unique() if ~np.isnan(x)]
mke_irs_16 = nat_irs_16[nat_irs_16['zipcode'].isin(mke_zipcodes)]
mke_irs_16.head()

In [None]:
mke_zip_irs = mke_irs_16[['zipcode','N1','N2','NUMDEP','ELDERLY']].groupby(['zipcode']).sum()
mke_zip_irs.columns = ['returns','exemptions','dependents','elderly']
mke_zip_irs.head()

In [None]:
mke_zip_agi_irs = mke_irs_16[['zipcode','agi_stub','N1','N2','NUMDEP','ELDERLY']].groupby(['zipcode','agi_stub']).sum()
mke_zip_agi_irs.columns = ['returns','exemptions','dependents','elderly']
mke_zip_agi_irs['returns_pct_zip'] = 100 * mke_zip_agi_irs['returns'] / mke_zip_agi_irs.groupby(level=[0])['returns'].transform('sum')
mke_zip_agi_irs.head()

In [None]:
agi_foreach_zip = mke_irs_16[['zipcode','agi_stub','N1']].pivot(index='agi_stub',columns='zipcode',values='N1')
agi_foreach_zip

In [None]:
agi_by_zip = mke_irs_16[['zipcode','agi_stub','N1']].pivot(index='zipcode',columns='agi_stub',values='N1')
pop_by_zip = mke_irs_16[['zipcode','N1']].groupby(['zipcode']).agg(['sum'])['N1']
errythang = pop_by_zip.join(agi_by_zip)
errythang.head()

In [None]:
df=calls_service
df['Incident Date'] = pd.to_datetime(df['Incident Date'])
df['Incident Date'] = df['Incident Date'].dt.strftime('%Y-%m')
call_types = [x for x in df['Final Call for Service Type'].unique()]
call_types.sort()
print(call_types)

In [None]:
## group by original call type and aggregate by count
print(df.groupby('Original Call for Service Type').agg(['count'])['Incident Date'].sort_values(by='count',ascending=False).head())
print(df.groupby('Original Call for Service Type').agg(['count'])['Incident Date'].sort_values(by='count',ascending=False).tail())

In [None]:
## group by final call type and aggregate by count
print(df.groupby('Final Call for Service Type').agg(['count'])['Incident Date'].sort_values(by='count',ascending=False).head())
print(df.groupby('Final Call for Service Type').agg(['count'])['Incident Date'].sort_values(by='count',ascending=False).tail())

In [None]:
## Clearly there is a difference between the listings of original call type and final call type
df.groupby(['Original Call for Service Type','Final Call for Service Type']).agg(['count'])

In [None]:
all_calls_by_zip = df[["ZIP Code","Original Call for Service Type"]].groupby(['ZIP Code'], as_index=True).agg(['count']).sort_values(by='ZIP Code', ascending=True)['Original Call for Service Type']
all_calls_by_zip.columns = ['ServiceCalls']
sick_byZip = df[df['Original Call for Service Type']=='Sick Person (Specific Diagnosis)'].groupby('ZIP Code',as_index=True).agg(['count'])['Original Call for Service Type']
sick_byZip.columns=['SickPerson']

service_byZip = all_calls_by_zip.join(sick_byZip)
#
errythang.sort_values(by='zipcode')
service_byZip.sort_values(by= 'ZIP Code')
errythang = errythang.merge(service_byZip, left_index=True, right_index=True, how='left')
errythang.sort_values(by=['ServiceCalls','SickPerson'], ascending=False)

In [None]:
# MKE Service Call data
cardiac = df[df['Final Call for Service Type']=='Cardiac or Respiratory Arrest/Death'].groupby(['Aldermanic District'], as_index=True).agg(['count'])['Final Call for Service Type']
cardiac.columns = ['CardiacOrRespiratoryArrest/Death']
downgraded = df[df['Final Call for Service Type']=='Downgraded Response'].groupby(['Aldermanic District'], as_index=False).agg(['count'])['Final Call for Service Type']
downgraded.columns = ['DowngradedResponse']
#### Assault is only organized by Aldermanic District; can't get it by ZIP Code
assault_byAldDist = df[df['Final Call for Service Type']=='Assault/Sexual Assault'].groupby('Aldermanic District',as_index=True).agg(['count'])['Final Call for Service Type']
assault_byAldDist.columns=['Assault/SexualAssault']
any_service_call = df[['Aldermanic District','Final Call for Service Type']].groupby(['Aldermanic District'], as_index=True).agg(['count'])['Final Call for Service Type']
any_service_call.columns = ['ServiceCalls']
## MKE Liquor License data
liq_avail = liq_lic[['ALDERMANIC_DISTRICT','EXP_DATE']].groupby(['ALDERMANIC_DISTRICT'], as_index=True).agg(['count']).sort_values(by='ALDERMANIC_DISTRICT', ascending=True)['EXP_DATE']
liq_avail.columns=['LiquorLicenses']

In [None]:
any_card_down = any_service_call.join(cardiac.join(downgraded)).join(assault_byAldDist)
all_byAldDist = any_card_down.merge(liq_avail,left_index=True,right_index=True,how='left')

In [None]:
all_byAldDist.sort_values(by=['ServiceCalls','Assault/SexualAssault','CardiacOrRespiratoryArrest/Death','DowngradedResponse','LiquorLicenses'], ascending=False)

In [None]:
## Notice that District 4 is in the top five of all three lists!
## That's not a good thing

In [None]:
all_byAldDist.plot(x='LiquorLicenses',y='CardiacRespiratoryArrestDeath', style='o')
all_byAldDist.plot(x='LiquorLicenses',y='DowngradedResponse', style='o')