## Chicago Census Data

#### Data Source: Chicago Community Data Snapshot (2013-2017)
https://datahub.cmap.illinois.gov/dataset/community-data-snapshots-raw-data/resource/50aa644b-0405-4fd5-a9b5-9012fda46943?inner_span=True

In [2]:
import pandas as pd

In [3]:
census = pd.read_csv("resources/census.csv")
census_key = pd.read_csv("resources/census_key.csv")

In [4]:
census.head()

Unnamed: 0,GEOG,2000_POP,2010_POP,TOT_POP,UND19,A20_34,A35_49,A50_64,A65_74,A75_84,...,ARABIC,KOREAN,OTHER_ASIAN,OTHER_EURO,OTHER_UNSPEC,2000_WHITE,2000_HISP,2000_BLACK,2000_ASIAN,2000_OTHER
0,Albany Park,57655,51542,51575,13354,13738,11975,8232,2329,1343,...,621,650,1801,3294,563,15866,26741,1907,10178,2963
1,Archer Heights,12644,13393,13233,4016,2933,2879,1897,907,369,...,24,0,0,64,0,6752,5485,74,48,285
2,Armour Square,12032,13391,13699,2997,2824,2539,2541,1371,1043,...,1,6,138,218,0,2062,448,2046,7305,171
3,Ashburn,39584,41081,43283,12983,8368,8998,8477,2855,977,...,444,9,91,309,258,14546,6674,17045,408,911
4,Auburn Gresham,55928,48743,45770,11432,9224,7941,9587,4041,2700,...,0,11,61,202,80,237,347,54862,45,437


In [5]:
#Select columns of interest and rename columns
census_df = census[["GEOG", "UND19","A20_34","A35_49","A50_64","A65_74","A75_84","OV85","MED_AGE","WHITE","HISP","BLACK","ASIAN","OTHER","IN_LBFRC","EMP","UNEMP","NOT_IN_LBFRC","LT_HS","HS","SOME_COLL","ASSOC","BACH","GRAD_PROF",
                   "INC_LT_25K","INC_25_50K","INC_50_75K","INC_75_100K","INC_100_150K","INC_GT_150","MEDINC"]]
census_df = census_df.rename(columns={"GEOG":"Area",
                                      "UND19":"<19y",
                                      "A20_34":"20-34y",
                                      "A35_49":"35-59y",
                                      "A50_64":"50-64y",
                                      "A65_74":"65-74y",
                                      "A75_84":"75-84y",
                                      "OV85":"85y<",
                                      "MED_AGE":"Median Age",
                                     "MEDINC":"Median Income"})
census_df.head()

Unnamed: 0,Area,<19y,20-34y,35-59y,50-64y,65-74y,75-84y,85y<,Median Age,WHITE,...,ASSOC,BACH,GRAD_PROF,INC_LT_25K,INC_25_50K,INC_50_75K,INC_75_100K,INC_100_150K,INC_GT_150,Median Income
0,Albany Park,13354,13738,11975,8232,2329,1343,604,33.6,14932,...,2047,7339,4041,3627,3835,2967,2091,2259,1632,55561
1,Archer Heights,4016,2933,2879,1897,907,369,231,33.0,2477,...,278,663,195,946,1149,787,384,493,148,44949
2,Armour Square,2997,2824,2539,2541,1371,1043,385,42.0,1599,...,505,1313,884,2483,1136,648,324,425,149,26543
3,Ashburn,12983,8368,8998,8477,2855,977,625,35.5,5044,...,2239,3595,1906,2021,2673,2653,2166,2215,1105,65499
4,Auburn Gresham,11432,9224,7941,9587,4041,2700,845,39.6,257,...,2113,2771,1805,7497,4448,2661,1297,1054,322,29821


In [6]:
#Load census key data
census_key.head()

Unnamed: 0,Community Area Number,COMMUNITY AREA NAME,PERCENT OF HOUSING CROWDED,PERCENT HOUSEHOLDS BELOW POVERTY,PERCENT AGED 16+ UNEMPLOYED,PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA,PERCENT AGED UNDER 18 OR OVER 64,PER CAPITA INCOME,HARDSHIP INDEX
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0
2,3.0,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20.0
3,4.0,Lincoln Square,3.4,10.9,8.2,13.4,25.5,37524,17.0
4,5.0,North Center,0.3,7.5,5.2,4.5,26.2,57123,6.0


In [7]:
#Clean up dataframe to extract only community area number and area name
census_key_df = census_key.rename(columns={"COMMUNITY AREA NAME":"Area"})
census_key_df = census_key_df[["Community Area Number","Area"]]
census_key_df.head()

Unnamed: 0,Community Area Number,Area
0,1.0,Rogers Park
1,2.0,West Ridge
2,3.0,Uptown
3,4.0,Lincoln Square
4,5.0,North Center


In [11]:
#Combine with census data, so that census dataframe will have the community area number

combined_census = pd.merge(census_key_df, census_df,on="Area")
combined_census = combined_census.rename(columns={"Community Area Number":"community_area_number",
                                                 "Media Age":"median_age","Median Income":"median_income"})
combined_census.head()

Unnamed: 0,community_area_number,Area,<19y,20-34y,35-59y,50-64y,65-74y,75-84y,85y<,Median Age,...,ASSOC,BACH,GRAD_PROF,INC_LT_25K,INC_25_50K,INC_50_75K,INC_75_100K,INC_100_150K,INC_GT_150,median_income
0,1.0,Rogers Park,12092,16967,12526,9130,2718,1268,799,33.8,...,1825,9693,6627,8558,6268,4048,2167,1944,1273,37549
1,2.0,West Ridge,19951,16264,14581,13704,5138,2765,1320,35.6,...,2830,12247,7161,6180,6501,3965,2652,3302,2182,48701
2,3.0,Uptown,7030,18725,13253,10706,3333,1977,1272,37.5,...,2329,15315,9176,9629,5794,4321,3120,3293,3079,45644
3,4.0,Lincoln Square,7306,13696,10180,6588,2228,976,509,34.7,...,1527,11589,7831,3263,3987,3043,2512,2745,2882,66393
4,5.0,North Center,8266,10302,9454,4024,1602,808,486,33.8,...,925,10509,7017,1416,1870,2185,1986,2677,4415,97703


In [12]:
#Export to CSV file
combined_census.to_csv("cleaned_census_df.csv")

In [13]:
combined_census.columns

Index(['community_area_number', 'Area', '<19y', '20-34y', '35-59y', '50-64y',
       '65-74y', '75-84y', '85y<', 'Median Age', 'WHITE', 'HISP', 'BLACK',
       'ASIAN', 'OTHER', 'IN_LBFRC', 'EMP', 'UNEMP', 'NOT_IN_LBFRC', 'LT_HS',
       'HS', 'SOME_COLL', 'ASSOC', 'BACH', 'GRAD_PROF', 'INC_LT_25K',
       'INC_25_50K', 'INC_50_75K', 'INC_75_100K', 'INC_100_150K', 'INC_GT_150',
       'median_income'],
      dtype='object')

## Flu Shot Clinic Availibility Data 

#### Data source: Department of Public Health (2014-2019)
#### Flu seasons are identified by the years they span (ie. 2017-18) instead of the year they are given. Data shown are the number of flu clinics in a given community area in Chicago.
https://catalog.data.gov/dataset/flu-shot-locations-2014-present

In [15]:
#Load flushot data
flushot = pd.read_csv("resources/flushot.csv")
flushot.head()

Unnamed: 0,Season,Facility ID,Latitude,Longitude,Street1,Street2,City,State,Postal Code,Country,...,Eligibility,Cost,Notes,Record ID,Location,Boundaries - ZIP Codes,Community Areas,Zip Codes,Census Tracts,Wards
0,2017-2018,267,41.9685,-87.72876,4010 W LAWRENCE AVE,,CHICAGO,IL,60630,United States of America,...,,Call for details.,,2017-2018_267,"(41.9685, -87.72876)",48.0,14.0,21869,751.0,38.0
1,2015-2016,202,41.981429,-87.668555,5440 N CLARK ST,,CHICAGO,IL,60640,United States of America,...,,Call for details.,,2015-2016_202,"(41.981429, -87.668555)",15.0,76.0,22616,564.0,24.0
2,2015-2016,208,41.884543,-87.627803,151 N STATE ST,,CHICAGO,IL,60601,United States of America,...,,Call for details.,,2015-2016_208,"(41.8845429, -87.6278027)",42.0,38.0,14309,580.0,36.0
3,2015-2016,213,41.844305,-87.707719,3303 W 26TH ST,,CHICAGO,IL,60623,United States of America,...,,Call for details.,,2015-2016_213,"(41.8443048, -87.7077192)",57.0,32.0,21569,146.0,1.0
4,2016-2017,60,41.9683,-87.738086,4404 W. Lawrence Ave.,,Chicago,IL,60630,United States of America,...,,"No cost to individual. If insured, insurance w...",Free to individual. Insurance will be billed.,2016-2017_60,"(41.9683, -87.7380857)",48.0,14.0,21869,751.0,38.0


In [16]:
#Select columns of interest
flushot_df = flushot[["Season","Latitude","Longitude","Postal Code","Community Areas"]]
flushot_df=flushot_df.rename(columns={"Community Areas":"community_area_number"})
flushot_df.head()

Unnamed: 0,Season,Latitude,Longitude,Postal Code,community_area_number
0,2017-2018,41.9685,-87.72876,60630,14.0
1,2015-2016,41.981429,-87.668555,60640,76.0
2,2015-2016,41.884543,-87.627803,60601,38.0
3,2015-2016,41.844305,-87.707719,60623,32.0
4,2016-2017,41.9683,-87.738086,60630,14.0


In [17]:
#Export to CSV file
flushot_df.to_csv("cleaned_flu_df.csv")

In [20]:
flushot_df.columns

Index(['Season', 'Latitude', 'Longitude', 'Postal Code',
       'community_area_number'],
      dtype='object')

In [19]:
#Calculate total number of clinic available based on year and community area code
flu_total = flushot_df.groupby(["community_area_number","Season"]).count()["Postal Code"]

flu_total.head()

community_area_number  Season   
1.0                    2014-2015    2
                       2015-2016    2
                       2016-2017    2
                       2017-2018    2
                       2018-2019    2
Name: Postal Code, dtype: int64