In [1]:
import pandas as pd
import censusdata

# Variables

## Total Population

In [2]:
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01003'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B01003_001E  | TOTAL POPULATION               | !! Estimate Total                                        | int  
-------------------------------------------------------------------------------------------------------------------


In [3]:
total_population = 'B01003_001E'

In [4]:
variables = {'total_population':total_population}
variables

{'total_population': 'B01003_001E'}

## Median Income

In [5]:
censusdata.search('acs5', 2018, 'concept', 'median income')

[('B06011PR_001E',
  'MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS) BY PLACE OF BIRTH IN PUERTO RICO',
  'Estimate!!Median income in the past 12 months --!!Total'),
 ('B06011PR_002E',
  'MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS) BY PLACE OF BIRTH IN PUERTO RICO',
  'Estimate!!Median income in the past 12 months --!!Total!!Born in Puerto Rico'),
 ('B06011PR_003E',
  'MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS) BY PLACE OF BIRTH IN PUERTO RICO',
  'Estimate!!Median income in the past 12 months --!!Total!!Born in other state of the United States'),
 ('B06011PR_004E',
  'MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS) BY PLACE OF BIRTH IN PUERTO RICO',
  'Estimate!!Median income in the past 12 months --!!Total!!Native; born elsewhere'),
 ('B06011PR_005E',
  'MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS) BY PLACE OF BIRTH IN PUERTO RICO',
  'Estimate!!Me

In [6]:
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B06011'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B06011_001E  | MEDIAN INCOME IN THE PAST 12 M | !! !! Estimate Median income in the past 12 months -- To | int  
B06011_002E  | MEDIAN INCOME IN THE PAST 12 M | !! !! !! Estimate Median income in the past 12 months -- | int  
B06011_003E  | MEDIAN INCOME IN THE PAST 12 M | !! !! !! Estimate Median income in the past 12 months -- | int  
B06011_004E  | MEDIAN INCOME IN THE PAST 12 M | !! !! !! Estimate Median income in the past 12 months -- | int  
B06011_005E  | MEDIAN INCOME IN THE PAST 12 M | !! !! !! Estimate Median income in the past 12 months -- | int  
-------------------------------------------------------------------------------------------------------------------


In [7]:
median_income = 'B06011_001E'
variables['median_income'] = median_income
variables

{'total_population': 'B01003_001E', 'median_income': 'B06011_001E'}

## Minority Representation

In [8]:
censusdata.search('acs5', 2018, 'concept', 'hispanic or latino ')

[('B03001_001E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total'),
 ('B03001_002E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Not Hispanic or Latino'),
 ('B03001_003E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Hispanic or Latino'),
 ('B03001_004E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Hispanic or Latino!!Mexican'),
 ('B03001_005E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Hispanic or Latino!!Puerto Rican'),
 ('B03001_006E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Hispanic or Latino!!Cuban'),
 ('B03001_007E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Hispanic or Latino!!Dominican (Dominican Republic)'),
 ('B03001_008E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN',
  'Estimate!!Total!!Hispanic or Latino!!Central American'),
 ('B03001_009E',
  'HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN

In [9]:
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B02001'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B02001_001E  | RACE                           | !! Estimate Total                                        | int  
B02001_002E  | RACE                           | !! !! Estimate Total White alone                         | int  
B02001_003E  | RACE                           | !! !! Estimate Total Black or African American alone     | int  
B02001_004E  | RACE                           | !! !! Estimate Total American Indian and Alaska Native a | int  
B02001_005E  | RACE                           | !! !! Estimate Total Asian alone                         | int  
B02001_006E  | RACE                           | !! !! Estimate Total Native Hawaiian and Other Pacific I | int  
B02001_007E  | RACE                           | !! !! Estimate Total Some other race alone   

In [10]:
total_white = 'B02001_002E'
variables['total_white'] = total_white
variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E'}

In [11]:
# SEX BY AGE (WHITE ALONE, NOT HISPANIC OR LATINO
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01001H'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B01001H_001E | SEX BY AGE (WHITE ALONE, NOT H | !! Estimate Total                                        | int  
B01001H_002E | SEX BY AGE (WHITE ALONE, NOT H | !! !! Estimate Total Male                                | int  
B01001H_003E | SEX BY AGE (WHITE ALONE, NOT H | !! !! !! Estimate Total Male Under 5 years               | int  
B01001H_004E | SEX BY AGE (WHITE ALONE, NOT H | !! !! !! Estimate Total Male 5 to 9 years                | int  
B01001H_005E | SEX BY AGE (WHITE ALONE, NOT H | !! !! !! Estimate Total Male 10 to 14 years              | int  
B01001H_006E | SEX BY AGE (WHITE ALONE, NOT H | !! !! !! Estimate Total Male 15 to 17 years              | int  
B01001H_007E | SEX BY AGE (WHITE ALONE, NOT H | !! !! !! Estimate Total Male 18 and 19 years 

In [12]:
total_white_nonhispanic = 'B01001H_001E'

variables['total_white_nonhispanic'] = total_white_nonhispanic

variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E',
 'total_white_nonhispanic': 'B01001H_001E'}

In [13]:
# censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01001I'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B01001I_001E | SEX BY AGE (HISPANIC OR LATINO | !! Estimate Total                                        | int  
B01001I_002E | SEX BY AGE (HISPANIC OR LATINO | !! !! Estimate Total Male                                | int  
B01001I_003E | SEX BY AGE (HISPANIC OR LATINO | !! !! !! Estimate Total Male Under 5 years               | int  
B01001I_004E | SEX BY AGE (HISPANIC OR LATINO | !! !! !! Estimate Total Male 5 to 9 years                | int  
B01001I_005E | SEX BY AGE (HISPANIC OR LATINO | !! !! !! Estimate Total Male 10 to 14 years              | int  
B01001I_006E | SEX BY AGE (HISPANIC OR LATINO | !! !! !! Estimate Total Male 15 to 17 years              | int  
B01001I_007E | SEX BY AGE (HISPANIC OR LATINO | !! !! !! Estimate Total Male 18 and 19 years 

In [14]:
# total_hispanic = 'B01001I_001E'
# variables['total_hispanic'] = total_hispanic
# variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E',
 'total_white_nonhispanic': 'B01001H_001E',
 'total_hispanic': 'B01001I_001E'}

## Education

In [15]:
censusdata.search('acs5', 2018, 'concept', 'EDUCATIONAL ATTAINMENT FOR THE')

[('B15001_001E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Estimate!!Total'),
 ('B15001_002E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Estimate!!Total!!Male'),
 ('B15001_003E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Estimate!!Total!!Male!!18 to 24 years'),
 ('B15001_004E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Estimate!!Total!!Male!!18 to 24 years!!Less than 9th grade'),
 ('B15001_005E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Estimate!!Total!!Male!!18 to 24 years!!9th to 12th grade, no diploma'),
 ('B15001_006E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Estimate!!Total!!Male!!18 to 24 years!!High school graduate (includes equivalency)'),
 ('B15001_007E',
  'SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 18 YEARS AND OVER',
  'Es

In [16]:
# EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B15003'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B15003_001E  | EDUCATIONAL ATTAINMENT FOR THE | !! Estimate Total                                        | int  
B15003_002E  | EDUCATIONAL ATTAINMENT FOR THE | !! !! Estimate Total No schooling completed              | int  
B15003_003E  | EDUCATIONAL ATTAINMENT FOR THE | !! !! Estimate Total Nursery school                      | int  
B15003_004E  | EDUCATIONAL ATTAINMENT FOR THE | !! !! Estimate Total Kindergarten                        | int  
B15003_005E  | EDUCATIONAL ATTAINMENT FOR THE | !! !! Estimate Total 1st grade                           | int  
B15003_006E  | EDUCATIONAL ATTAINMENT FOR THE | !! !! Estimate Total 2nd grade                           | int  
B15003_007E  | EDUCATIONAL ATTAINMENT FOR THE | !! !! Estimate Total 3rd grade               

In [17]:
total_education = 'B15003_001E'
high_school_grad = 'B15003_017E'
ged = 'B15003_018E'
some_college_1 = 'B15003_019E'
some_college_2 = 'B15003_020E'
assoc_deg = 'B15003_021E'
bach_deg = 'B15003_022E'
master_deg = 'B15003_023E'
prof_deg = 'B15003_024E'
doc_deg = 'B15003_025E'

In [18]:
variables['total_education'] = total_education
variables['high_school_grad'] = high_school_grad
variables['ged'] = ged
variables['some_college_1'] = some_college_1
variables['some_college_2'] = some_college_2
variables['assoc_deg'] = assoc_deg
variables['bach_deg'] = bach_deg
variables['master_deg'] = master_deg
variables['prof_deg'] = prof_deg
variables['doc_deg'] = doc_deg

variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E',
 'total_white_nonhispanic': 'B01001H_001E',
 'total_hispanic': 'B01001I_001E',
 'total_education': 'B15003_001E',
 'high_school_grad': 'B15003_017E',
 'ged': 'B15003_018E',
 'some_college_1': 'B15003_019E',
 'some_college_2': 'B15003_020E',
 'assoc_deg': 'B15003_021E',
 'bach_deg': 'B15003_022E',
 'master_deg': 'B15003_023E',
 'prof_deg': 'B15003_024E',
 'doc_deg': 'B15003_025E'}

## Poverty

In [19]:
censusdata.search('acs5', 2018, 'concept', 'POVERTY STATUS IN THE PAST 12')

[('B06012PR_001E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
  'Estimate!!Total'),
 ('B06012PR_002E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
  'Estimate!!Total!!Below 100 percent of the poverty level'),
 ('B06012PR_003E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
  'Estimate!!Total!!100 to 149 percent of the poverty level'),
 ('B06012PR_004E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
  'Estimate!!Total!!At or above 150 percent of the poverty level'),
 ('B06012PR_005E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
  'Estimate!!Total!!Born in Puerto Rico'),
 ('B06012PR_006E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
  'Estimate!!Total!!Born in Puerto Rico!!Below 100 percent of the poverty level'),
 ('B06012PR_007E',
  'PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 12 MONTHS IN PUERTO RICO',
 

In [20]:
# POVERTY STATUS IN THE PAST 12 MONTHS OF INDIVIDUALS BY SEX BY EMPLOYMENT STATUS
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B17005'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B17005_001E  | POVERTY STATUS IN THE PAST 12  | !! Estimate Total                                        | int  
B17005_002E  | POVERTY STATUS IN THE PAST 12  | !! !! Estimate Total Income in the past 12 months below  | int  
B17005_003E  | POVERTY STATUS IN THE PAST 12  | !! !! !! Estimate Total Income in the past 12 months bel | int  
B17005_004E  | POVERTY STATUS IN THE PAST 12  | !! !! !! !! Estimate Total Income in the past 12 months  | int  
B17005_005E  | POVERTY STATUS IN THE PAST 12  | !! !! !! !! !! Estimate Total Income in the past 12 mont | int  
B17005_006E  | POVERTY STATUS IN THE PAST 12  | !! !! !! !! !! Estimate Total Income in the past 12 mont | int  
B17005_007E  | POVERTY STATUS IN THE PAST 12  | !! !! !! !! Estimate Total Income in the past

In [21]:
total_poverty = 'B17005_001E'
# Estimate!!Total!!Income in the past 12 months below poverty level
below_pov_level = 'B17005_002E'

variables['total_poverty'] = total_poverty
variables['below_pov_level'] = below_pov_level

variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E',
 'total_white_nonhispanic': 'B01001H_001E',
 'total_hispanic': 'B01001I_001E',
 'total_education': 'B15003_001E',
 'high_school_grad': 'B15003_017E',
 'ged': 'B15003_018E',
 'some_college_1': 'B15003_019E',
 'some_college_2': 'B15003_020E',
 'assoc_deg': 'B15003_021E',
 'bach_deg': 'B15003_022E',
 'master_deg': 'B15003_023E',
 'prof_deg': 'B15003_024E',
 'doc_deg': 'B15003_025E',
 'total_poverty': 'B17005_001E',
 'below_pov_level': 'B17005_002E'}

## Unemployment

In [22]:
censusdata.search('acs5', 2018, 'concept', 'EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER')

[('B23001_001E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total'),
 ('B23001_002E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total!!Male'),
 ('B23001_003E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total!!Male!!16 to 19 years'),
 ('B23001_004E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total!!Male!!16 to 19 years!!In labor force'),
 ('B23001_005E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total!!Male!!16 to 19 years!!In labor force!!In Armed Forces'),
 ('B23001_006E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total!!Male!!16 to 19 years!!In labor force!!Civilian'),
 ('B23001_007E',
  'SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER',
  'Estimate!!Total!!Male!!16 to 19 years!!In labor force!!Civi

In [23]:
# EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B23025'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B23025_001E  | EMPLOYMENT STATUS FOR THE POPU | !! Estimate Total                                        | int  
B23025_002E  | EMPLOYMENT STATUS FOR THE POPU | !! !! Estimate Total In labor force                      | int  
B23025_003E  | EMPLOYMENT STATUS FOR THE POPU | !! !! !! Estimate Total In labor force Civilian labor fo | int  
B23025_004E  | EMPLOYMENT STATUS FOR THE POPU | !! !! !! !! Estimate Total In labor force Civilian labor | int  
B23025_005E  | EMPLOYMENT STATUS FOR THE POPU | !! !! !! !! Estimate Total In labor force Civilian labor | int  
B23025_006E  | EMPLOYMENT STATUS FOR THE POPU | !! !! !! Estimate Total In labor force Armed Forces      | int  
B23025_007E  | EMPLOYMENT STATUS FOR THE POPU | !! !! Estimate Total Not in labor force      

In [24]:
# Estimate!!Total!!In labor force!!Civilian labor force
civ_labor_force = 'B23025_003E'
# Estimate!!Total!!In labor force!!Civilian labor force!!Unemployed
unemployed = 'B23025_005E'

variables['civ_labor_force'] = civ_labor_force
variables['unemployed'] = unemployed

variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E',
 'total_white_nonhispanic': 'B01001H_001E',
 'total_hispanic': 'B01001I_001E',
 'total_education': 'B15003_001E',
 'high_school_grad': 'B15003_017E',
 'ged': 'B15003_018E',
 'some_college_1': 'B15003_019E',
 'some_college_2': 'B15003_020E',
 'assoc_deg': 'B15003_021E',
 'bach_deg': 'B15003_022E',
 'master_deg': 'B15003_023E',
 'prof_deg': 'B15003_024E',
 'doc_deg': 'B15003_025E',
 'total_poverty': 'B17005_001E',
 'below_pov_level': 'B17005_002E',
 'civ_labor_force': 'B23025_003E',
 'unemployed': 'B23025_005E'}

## Median Age

In [25]:
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01002'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
B01002_001E  | MEDIAN AGE BY SEX              | !! !! Estimate Median age -- Total                       | float
B01002_002E  | MEDIAN AGE BY SEX              | !! !! Estimate Median age -- Male                        | float
B01002_003E  | MEDIAN AGE BY SEX              | !! !! Estimate Median age -- Female                      | float
-------------------------------------------------------------------------------------------------------------------


In [26]:
median_age = 'B01002_001E'

variables['median_age'] = median_age
variables

{'total_population': 'B01003_001E',
 'median_income': 'B06011_001E',
 'total_white': 'B02001_002E',
 'total_white_nonhispanic': 'B01001H_001E',
 'total_hispanic': 'B01001I_001E',
 'total_education': 'B15003_001E',
 'high_school_grad': 'B15003_017E',
 'ged': 'B15003_018E',
 'some_college_1': 'B15003_019E',
 'some_college_2': 'B15003_020E',
 'assoc_deg': 'B15003_021E',
 'bach_deg': 'B15003_022E',
 'master_deg': 'B15003_023E',
 'prof_deg': 'B15003_024E',
 'doc_deg': 'B15003_025E',
 'total_poverty': 'B17005_001E',
 'below_pov_level': 'B17005_002E',
 'civ_labor_force': 'B23025_003E',
 'unemployed': 'B23025_005E',
 'median_age': 'B01002_001E'}

In [27]:
# median_age = 'B01002_001E'
# total_population = 'B01003_001E'
# total_ethnicity = 'B02001_001E'
# total_education = 'B15003_001E'
# total_poverty = 'B17005_001E'
# total_labor_force_part = 'B12006_001E'
# median_income = 'B06011_001E'

# variables = [total_population, median_age, median_income, ethnicity, 
#              education, poverty, labor_force_part]

# NYC Zips 

In [28]:
zip_codes = pd.read_csv('data/nyc_zips.csv')
zip_codes.nyc_zips = zip_codes.nyc_zips.astype('str')
zip_codes = list(zip_codes.nyc_zips.values)

In [29]:
separator = ','
nyc_zips = separator.join(zip_codes)
nyc_zips

'11105,11423,10048,10104,11691,10103,11420,11230,10128,10006,10172,11209,11235,10155,10031,10044,10120,10065,10177,83,11219,10121,10119,10129,11226,11220,10055,11104,11364,10026,10474,10025,11427,11243,11365,10464,11216,10310,10112,11369,11004,11412,11426,10471,11106,11373,11374,10305,10041,10029,10004,11377,10301,11695,11102,11436,11428,10010,10452,11204,11435,10013,11416,10033,10307,10036,10475,11385,10171,11241,10314,11414,10106,11218,10000,10454,11234,10045,11421,11231,11367,11692,11214,11434,10303,11379,10461,11238,10462,10019,10281,11210,10022,11419,10176,10020,10167,11215,10279,11239,10473,10468,10458,10009,11228,12345,10115,10457,11366,10030,10005,11242,10455,11101,10179,11378,11001,10162,10158,10028,10011,11359,10007,10166,10803,11203,11411,10154,10040,10017,10021,11372,11211,10463,11362,11237,10002,10153,11223,11417,10174,10271,11207,11233,10467,10075,10014,11429,10151,11694,10038,10308,11229,10018,11217,10023,10302,10001,10118,11251,11418,10024,11212,10178,10016,11368,10034,

In [30]:
censusdata.geographies(censusdata.censusgeo([('zip code tabulation area', nyc_zips)]), 'acs5', 2018)

{'ZCTA5 11222': censusgeo((('zip code tabulation area', '11222'),)),
 'ZCTA5 11233': censusgeo((('zip code tabulation area', '11233'),)),
 'ZCTA5 11235': censusgeo((('zip code tabulation area', '11235'),)),
 'ZCTA5 11426': censusgeo((('zip code tabulation area', '11426'),)),
 'ZCTA5 11427': censusgeo((('zip code tabulation area', '11427'),)),
 'ZCTA5 11101': censusgeo((('zip code tabulation area', '11101'),)),
 'ZCTA5 11205': censusgeo((('zip code tabulation area', '11205'),)),
 'ZCTA5 11215': censusgeo((('zip code tabulation area', '11215'),)),
 'ZCTA5 11219': censusgeo((('zip code tabulation area', '11219'),)),
 'ZCTA5 11226': censusgeo((('zip code tabulation area', '11226'),)),
 'ZCTA5 11228': censusgeo((('zip code tabulation area', '11228'),)),
 'ZCTA5 11236': censusgeo((('zip code tabulation area', '11236'),)),
 'ZCTA5 11237': censusgeo((('zip code tabulation area', '11237'),)),
 'ZCTA5 11354': censusgeo((('zip code tabulation area', '11354'),)),
 'ZCTA5 11356': censusgeo((('zip c

In [31]:
nyc_zips = censusdata.censusgeo([('zip code tabulation area', nyc_zips)])

# Pull Data

In [32]:
variable_vals = list(variables.values())

In [33]:
data = censusdata.download('acs5', 2018, nyc_zips, variable_vals)
data

Unnamed: 0,B01003_001E,B06011_001E,B02001_002E,B01001H_001E,B01001I_001E,B15003_001E,B15003_017E,B15003_018E,B15003_019E,B15003_020E,B15003_021E,B15003_022E,B15003_023E,B15003_024E,B15003_025E,B17005_001E,B17005_002E,B23025_003E,B23025_005E,B01002_001E
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",36492,52004,29893,26794,5197,30177,3966,642,1128,2744,1282,11797,4846,814,271,32725,3567,24761,1195,34.8
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",76819,28652,6731,5320,12276,49821,13737,2235,1374,5981,3803,8449,3310,430,321,59586,14986,35722,2219,33.0
"ZCTA5 11235: Summary level: 860, zip code tabulation area:11235",78128,27444,58754,56160,6491,59442,11484,1656,1411,4407,4521,16573,8694,2083,1053,64405,12099,37785,2228,45.1
"ZCTA5 11426: Summary level: 860, zip code tabulation area:11426",20801,40693,8083,5834,3938,14569,3732,659,523,1547,1019,3192,1711,455,143,16729,1201,10966,481,38.6
"ZCTA5 11427: Summary level: 860, zip code tabulation area:11427",24037,32027,7047,4762,4676,17320,4005,646,596,2449,1407,3395,1651,372,153,18830,1467,11836,616,42.0
"ZCTA5 11101: Summary level: 860, zip code tabulation area:11101",30043,40222,12915,9746,7908,22131,4470,855,508,1744,1129,6342,2746,1145,394,24681,4388,17739,1323,34.1
"ZCTA5 11205: Summary level: 860, zip code tabulation area:11205",46064,29349,22570,19470,8153,27491,4572,951,811,2865,1195,7130,3987,931,353,33114,8580,23338,1969,29.8
"ZCTA5 11215: Summary level: 860, zip code tabulation area:11215",70156,63578,53421,46961,11428,52178,3804,509,832,2671,1528,18903,13771,4565,2130,56304,4397,43071,1700,36.0
"ZCTA5 11219: Summary level: 860, zip code tabulation area:11219",90036,19006,60521,55723,10913,49091,14714,1332,1365,3595,1982,5918,2483,761,261,59700,17270,35258,2037,27.8
"ZCTA5 11226: Summary level: 860, zip code tabulation area:11226",100277,29366,15288,12161,17145,68636,20097,2858,1711,9778,3772,12809,5224,1074,367,79722,12903,52670,3787,34.7


In [34]:
data.columns = variables.keys()

In [35]:
data.head()

Unnamed: 0,total_population,median_income,total_white,total_white_nonhispanic,total_hispanic,total_education,high_school_grad,ged,some_college_1,some_college_2,assoc_deg,bach_deg,master_deg,prof_deg,doc_deg,total_poverty,below_pov_level,civ_labor_force,unemployed,median_age
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",36492,52004,29893,26794,5197,30177,3966,642,1128,2744,1282,11797,4846,814,271,32725,3567,24761,1195,34.8
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",76819,28652,6731,5320,12276,49821,13737,2235,1374,5981,3803,8449,3310,430,321,59586,14986,35722,2219,33.0
"ZCTA5 11235: Summary level: 860, zip code tabulation area:11235",78128,27444,58754,56160,6491,59442,11484,1656,1411,4407,4521,16573,8694,2083,1053,64405,12099,37785,2228,45.1
"ZCTA5 11426: Summary level: 860, zip code tabulation area:11426",20801,40693,8083,5834,3938,14569,3732,659,523,1547,1019,3192,1711,455,143,16729,1201,10966,481,38.6
"ZCTA5 11427: Summary level: 860, zip code tabulation area:11427",24037,32027,7047,4762,4676,17320,4005,646,596,2449,1407,3395,1651,372,153,18830,1467,11836,616,42.0


In [36]:
data.total_population.sum()

8519038

In [37]:
indicies = list(data.index)
indicies

[censusgeo((('zip code tabulation area', '11222'),), 'ZCTA5 11222'),
 censusgeo((('zip code tabulation area', '11233'),), 'ZCTA5 11233'),
 censusgeo((('zip code tabulation area', '11235'),), 'ZCTA5 11235'),
 censusgeo((('zip code tabulation area', '11426'),), 'ZCTA5 11426'),
 censusgeo((('zip code tabulation area', '11427'),), 'ZCTA5 11427'),
 censusgeo((('zip code tabulation area', '11101'),), 'ZCTA5 11101'),
 censusgeo((('zip code tabulation area', '11205'),), 'ZCTA5 11205'),
 censusgeo((('zip code tabulation area', '11215'),), 'ZCTA5 11215'),
 censusgeo((('zip code tabulation area', '11219'),), 'ZCTA5 11219'),
 censusgeo((('zip code tabulation area', '11226'),), 'ZCTA5 11226'),
 censusgeo((('zip code tabulation area', '11228'),), 'ZCTA5 11228'),
 censusgeo((('zip code tabulation area', '11236'),), 'ZCTA5 11236'),
 censusgeo((('zip code tabulation area', '11237'),), 'ZCTA5 11237'),
 censusgeo((('zip code tabulation area', '11354'),), 'ZCTA5 11354'),
 censusgeo((('zip code tabulation 

In [38]:
new_zips = [] 
for i in indicies:
    i = str(i)[-5:]
    new_zips.append(i)
new_zips

['11222',
 '11233',
 '11235',
 '11426',
 '11427',
 '11101',
 '11205',
 '11215',
 '11219',
 '11226',
 '11228',
 '11236',
 '11237',
 '11354',
 '11356',
 '11360',
 '11362',
 '11366',
 '11370',
 '11371',
 '11374',
 '11379',
 '11419',
 '11422',
 '11428',
 '11433',
 '11004',
 '11005',
 '11206',
 '11363',
 '11211',
 '11372',
 '11217',
 '11109',
 '11359',
 '11201',
 '11231',
 '11417',
 '11203',
 '11208',
 '11210',
 '11212',
 '11221',
 '11239',
 '11361',
 '10457',
 '10464',
 '10472',
 '10309',
 '10467',
 '10456',
 '10065',
 '10075',
 '10110',
 '10168',
 '10174',
 '10468',
 '10001',
 '10035',
 '10003',
 '10009',
 '10011',
 '10022',
 '10028',
 '10119',
 '10044',
 '10162',
 '10279',
 '10460',
 '10282',
 '10301',
 '10307',
 '10465',
 '10470',
 '10471',
 '10006',
 '10012',
 '10013',
 '10014',
 '10023',
 '10024',
 '10032',
 '10112',
 '10152',
 '10177',
 '10306',
 '10461',
 '10018',
 '10027',
 '10036',
 '10170',
 '10171',
 '10304',
 '10308',
 '10459',
 '10466',
 '10474',
 '10803',
 '10004',
 '10025',


In [39]:
data.columns

Index(['total_population', 'median_income', 'total_white',
       'total_white_nonhispanic', 'total_hispanic', 'total_education',
       'high_school_grad', 'ged', 'some_college_1', 'some_college_2',
       'assoc_deg', 'bach_deg', 'master_deg', 'prof_deg', 'doc_deg',
       'total_poverty', 'below_pov_level', 'civ_labor_force', 'unemployed',
       'median_age'],
      dtype='object')

In [40]:
data['zip_code'] = new_zips

column_names = ['zip_code','total_population', 'median_income', 'total_white',
                'total_white_nonhispanic','total_hispanic','total_education', 
                'high_school_grad', 'ged','some_college_1','some_college_2', 
                'assoc_deg', 'bach_deg','master_deg', 'prof_deg','doc_deg', 
                'total_poverty','below_pov_level','civ_labor_force',
                'unemployed', 'median_age'
]

data = data.reindex(columns=column_names)

data.head()

Unnamed: 0,zip_code,total_population,median_income,total_white,total_white_nonhispanic,total_hispanic,total_education,high_school_grad,ged,some_college_1,...,assoc_deg,bach_deg,master_deg,prof_deg,doc_deg,total_poverty,below_pov_level,civ_labor_force,unemployed,median_age
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",11222,36492,52004,29893,26794,5197,30177,3966,642,1128,...,1282,11797,4846,814,271,32725,3567,24761,1195,34.8
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",11233,76819,28652,6731,5320,12276,49821,13737,2235,1374,...,3803,8449,3310,430,321,59586,14986,35722,2219,33.0
"ZCTA5 11235: Summary level: 860, zip code tabulation area:11235",11235,78128,27444,58754,56160,6491,59442,11484,1656,1411,...,4521,16573,8694,2083,1053,64405,12099,37785,2228,45.1
"ZCTA5 11426: Summary level: 860, zip code tabulation area:11426",11426,20801,40693,8083,5834,3938,14569,3732,659,523,...,1019,3192,1711,455,143,16729,1201,10966,481,38.6
"ZCTA5 11427: Summary level: 860, zip code tabulation area:11427",11427,24037,32027,7047,4762,4676,17320,4005,646,596,...,1407,3395,1651,372,153,18830,1467,11836,616,42.0


In [41]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 208 entries, ZCTA5 11222: Summary level: 860, zip code tabulation area:11222 to ZCTA5 11693: Summary level: 860, zip code tabulation area:11693
Data columns (total 21 columns):
zip_code                   208 non-null object
total_population           208 non-null int64
median_income              208 non-null int64
total_white                208 non-null int64
total_white_nonhispanic    208 non-null int64
total_hispanic             208 non-null int64
total_education            208 non-null int64
high_school_grad           208 non-null int64
ged                        208 non-null int64
some_college_1             208 non-null int64
some_college_2             208 non-null int64
assoc_deg                  208 non-null int64
bach_deg                   208 non-null int64
master_deg                 208 non-null int64
prof_deg                   208 non-null int64
doc_deg                    208 non-null int64
total_poverty              208 non-null i

## Calculate Variables

## Minority Representation

In [46]:
# create a column for 'minority representation'
# total population - total_white + total_hispanic / total_population
data['minority_rep'] = (data['total_population'] - data['total_white_nonhispanic']) / data['total_population']

data.head()

Unnamed: 0,zip_code,total_population,median_income,total_white,total_white_nonhispanic,total_hispanic,total_education,high_school_grad,ged,some_college_1,...,bach_deg,master_deg,prof_deg,doc_deg,total_poverty,below_pov_level,civ_labor_force,unemployed,median_age,minority_rep
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",11222,36492,52004,29893,26794,5197,30177,3966,642,1128,...,11797,4846,814,271,32725,3567,24761,1195,34.8,0.265757
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",11233,76819,28652,6731,5320,12276,49821,13737,2235,1374,...,8449,3310,430,321,59586,14986,35722,2219,33.0,0.930746
"ZCTA5 11235: Summary level: 860, zip code tabulation area:11235",11235,78128,27444,58754,56160,6491,59442,11484,1656,1411,...,16573,8694,2083,1053,64405,12099,37785,2228,45.1,0.28118
"ZCTA5 11426: Summary level: 860, zip code tabulation area:11426",11426,20801,40693,8083,5834,3938,14569,3732,659,523,...,3192,1711,455,143,16729,1201,10966,481,38.6,0.719533
"ZCTA5 11427: Summary level: 860, zip code tabulation area:11427",11427,24037,32027,7047,4762,4676,17320,4005,646,596,...,3395,1651,372,153,18830,1467,11836,616,42.0,0.801889


## Education Variables

In [48]:
data['hs_or_above'] = (data.high_school_grad + data.ged + data.some_college_1 +
                        data.some_college_2 + data.bach_deg + data.master_deg + 
                        data.prof_deg + data.doc_deg) / data.total_education

data['bach_or_above'] = (data.bach_deg + data.master_deg + 
                        data.prof_deg + data.doc_deg) / data.total_education

data.head()

Unnamed: 0,zip_code,total_population,median_income,total_white,total_white_nonhispanic,total_hispanic,total_education,high_school_grad,ged,some_college_1,...,doc_deg,total_poverty,below_pov_level,civ_labor_force,unemployed,median_age,minority_rep,hs_and_above,hs_or_above,bach_or_above
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",11222,36492,52004,29893,26794,5197,30177,3966,642,1128,...,271,32725,3567,24761,1195,34.8,0.265757,0.868476,0.868476,0.587467
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",11233,76819,28652,6731,5320,12276,49821,13737,2235,1374,...,321,59586,14986,35722,2219,33.0,0.930746,0.719315,0.719315,0.251099
"ZCTA5 11235: Summary level: 860, zip code tabulation area:11235",11235,78128,27444,58754,56160,6491,59442,11484,1656,1411,...,1053,64405,12099,37785,2228,45.1,0.28118,0.79676,0.79676,0.477827
"ZCTA5 11426: Summary level: 860, zip code tabulation area:11426",11426,20801,40693,8083,5834,3938,14569,3732,659,523,...,143,16729,1201,10966,481,38.6,0.719533,0.821058,0.821058,0.377583
"ZCTA5 11427: Summary level: 860, zip code tabulation area:11427",11427,24037,32027,7047,4762,4676,17320,4005,646,596,...,153,18830,1467,11836,616,42.0,0.801889,0.765993,0.765993,0.321651


## Poverty Variable

In [49]:
data['poverty_rate'] = data.below_pov_level / data.total_poverty
data.head(2)

Unnamed: 0,zip_code,total_population,median_income,total_white,total_white_nonhispanic,total_hispanic,total_education,high_school_grad,ged,some_college_1,...,total_poverty,below_pov_level,civ_labor_force,unemployed,median_age,minority_rep,hs_and_above,hs_or_above,bach_or_above,poverty_rate
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",11222,36492,52004,29893,26794,5197,30177,3966,642,1128,...,32725,3567,24761,1195,34.8,0.265757,0.868476,0.868476,0.587467,0.108999
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",11233,76819,28652,6731,5320,12276,49821,13737,2235,1374,...,59586,14986,35722,2219,33.0,0.930746,0.719315,0.719315,0.251099,0.251502


## Unemployment Variable

In [50]:
data['unemployment_rate'] = data.unemployed / data.civ_labor_force
data.head(2)

Unnamed: 0,zip_code,total_population,median_income,total_white,total_white_nonhispanic,total_hispanic,total_education,high_school_grad,ged,some_college_1,...,below_pov_level,civ_labor_force,unemployed,median_age,minority_rep,hs_and_above,hs_or_above,bach_or_above,poverty_rate,unemployment_rate
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",11222,36492,52004,29893,26794,5197,30177,3966,642,1128,...,3567,24761,1195,34.8,0.265757,0.868476,0.868476,0.587467,0.108999,0.048261
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",11233,76819,28652,6731,5320,12276,49821,13737,2235,1374,...,14986,35722,2219,33.0,0.930746,0.719315,0.719315,0.251099,0.251502,0.062119


# Final Data

In [52]:
nyc_census_data = data[['zip_code','total_population','median_income',
                        'median_age','minority_rep','hs_and_above',
                        'bach_or_above','poverty_rate','unemployment_rate']]

nyc_census_data.head(2)

Unnamed: 0,zip_code,total_population,median_income,median_age,minority_rep,hs_and_above,bach_or_above,poverty_rate,unemployment_rate
"ZCTA5 11222: Summary level: 860, zip code tabulation area:11222",11222,36492,52004,34.8,0.265757,0.868476,0.587467,0.108999,0.048261
"ZCTA5 11233: Summary level: 860, zip code tabulation area:11233",11233,76819,28652,33.0,0.930746,0.719315,0.251099,0.251502,0.062119


In [53]:
nyc_census_data.describe()

Unnamed: 0,total_population,median_income,median_age,minority_rep,hs_and_above,bach_or_above,poverty_rate,unemployment_rate
count,208.0,208.0,208.0,184.0,184.0,184.0,184.0,184.0
mean,40956.913462,-80092460.0,-80128170.0,0.626508,0.773884,0.408822,0.153844,0.067103
std,29572.844465,217327100.0,217313900.0,0.271139,0.108885,0.218274,0.096224,0.040993
min,0.0,-666666700.0,-666666700.0,0.0,0.494408,0.065934,0.0,0.0
25%,18877.0,22164.0,32.95,0.389522,0.698424,0.25753,0.088351,0.044675
50%,37848.0,30841.0,36.35,0.623163,0.77558,0.343129,0.128726,0.060984
75%,62148.5,42153.75,40.5,0.887118,0.850084,0.508622,0.199116,0.07871
max,112425.0,147538.0,81.5,0.992365,1.0,0.927911,0.742268,0.45


In [54]:
nyc_census_data.corr()

Unnamed: 0,total_population,median_income,median_age,minority_rep,hs_and_above,bach_or_above,poverty_rate,unemployment_rate
total_population,1.0,0.513069,0.513106,0.315989,-0.426281,-0.3612,0.32897,0.147638
median_income,0.513069,1.0,1.0,0.170957,-0.153538,-0.002893,0.118234,0.121089
median_age,0.513106,1.0,1.0,0.171274,-0.153929,-0.003322,0.11851,0.121335
minority_rep,0.315989,0.170957,0.171274,1.0,-0.773414,-0.744684,0.556089,0.566733
hs_and_above,-0.426281,-0.153538,-0.153929,-0.773414,1.0,0.87519,-0.692287,-0.543794
bach_or_above,-0.3612,-0.002893,-0.003322,-0.744684,0.87519,1.0,-0.540511,-0.531541
poverty_rate,0.32897,0.118234,0.11851,0.556089,-0.692287,-0.540511,1.0,0.755886
unemployment_rate,0.147638,0.121089,0.121335,0.566733,-0.543794,-0.531541,0.755886,1.0
