## Demographic Data In San Bernardino + Riverside

### Race & Housing (Rent/Own)

In [1]:
import requests


In [2]:
import geopandas as gpd
import pandas as pd

### Get 2009 Data on Race from Table B02001

In [3]:
#for SB County
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B02001),geometry,&for=tract:*&in=state:06&in=county:071')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
df_SB_race = pd.DataFrame(data_rows, columns=column_names)


#get it for Riverside
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B02001),geometry,&for=tract:*&in=state:06&in=county:065')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
#create dataframe
df_R_race =  pd.DataFrame(data_rows, columns=column_names)
df_R_race.head(2)


Unnamed: 0,B02001_001E,B02001_001EA,B02001_001M,B02001_001MA,B02001_002E,B02001_002EA,B02001_002M,B02001_002MA,B02001_003E,B02001_003EA,...,B02001_009MA,B02001_010E,B02001_010EA,B02001_010M,B02001_010MA,GEO_ID,NAME,state,county,tract
0,9277,,707,,6729,,754,,274,,...,,619,,274,,1400000US06065043223,"Census Tract 432.23, Riverside County, California",6,65,43223
1,12791,,674,,11321,,680,,237,,...,,204,,139,,1400000US06065043224,"Census Tract 432.24, Riverside County, California",6,65,43224


### Use API to get the 2009 Housing Data (variables 'B19013_001E':'Median HH Income','B25003_001E':'Total Occupied','B25003_002E':'Owner Occupied','B25003_003E':'Renter Occupied')

In [4]:
#get 2009 housing columns
#SB
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=B19013_001E,B25003_001E,B25003_002E,B25003_003E&for=tract:*&in=state:06&in=county:071')
censusdata = r.json()
#df_SB_house = pd.DataFrame(censusdata)
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
#create dataframe
df_SB_house = pd.DataFrame(data_rows, columns=column_names)
df_SB_house.info



<bound method DataFrame.info of     B19013_001E B25003_001E B25003_002E B25003_003E state county   tract
0         77134        1563        1135         428    06    071  000103
1         99359        1808        1476         332    06    071  000104
2        109018        1782        1449         333    06    071  000105
3        118598        3323        3035         288    06    071  000106
4         93355         893         747         146    06    071  000107
..          ...         ...         ...         ...   ...    ...     ...
239       52159        1282         849         433    06    071  011900
240       53201        3951        2242        1709    06    071  012000
241       47091        4920        3703        1217    06    071  012100
242       30000         710         570         140    06    071  940100
243      126250           4           2           2    06    071  940500

[244 rows x 7 columns]>

In [5]:
#Riverside
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=B19013_001E,B25003_001E,B25003_002E,B25003_003E&for=tract:*&in=state:06&in=county:065')
censusdata = r.json()
#df_R_house = pd.DataFrame(censusdata)
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
#create dataframe
df_R_house = pd.DataFrame(data_rows, columns=column_names)
df_R_house.info

<bound method DataFrame.info of     B19013_001E B25003_001E B25003_002E B25003_003E state county   tract
0         71775        2809        2174         635    06    065  043223
1        108708        3853        3602         251    06    065  043224
2         69173        2675        2145         530    06    065  043225
3         67813        2182        1986         196    06    065  043304
4         53998        3089        2584         505    06    065  043305
..          ...         ...         ...         ...   ...    ...     ...
338       95990        1621        1381         240    06    065  043218
339       74347        3569        2695         874    06    065  043219
340       50665        1561         526        1035    06    065  043220
341       90652        5712        4635        1077    06    065  043221
342      106098        1234        1169          65    06    065  043222

[343 rows x 7 columns]>

In [6]:
#Join the riverside race and housing data
riverside = pd.merge(df_R_house, df_R_race, on='tract')

In [7]:
#Join the SB race and housing data
SB = pd.merge(df_SB_house, df_SB_race, on='tract')

In [8]:
SB.info

<bound method DataFrame.info of     B19013_001E B25003_001E B25003_002E B25003_003E state_x county_x   tract  \
0         77134        1563        1135         428      06      071  000103   
1         99359        1808        1476         332      06      071  000104   
2        109018        1782        1449         333      06      071  000105   
3        118598        3323        3035         288      06      071  000106   
4         93355         893         747         146      06      071  000107   
..          ...         ...         ...         ...     ...      ...     ...   
239       52159        1282         849         433      06      071  011900   
240       53201        3951        2242        1709      06      071  012000   
241       47091        4920        3703        1217      06      071  012100   
242       30000         710         570         140      06      071  940100   
243      126250           4           2           2      06      071  940500   

    B02

In [9]:
#create a single df for SB & Riverside --> calling it GDF to avoid having to change all the code below, but need to add in geometry column 
gdf = pd.concat([riverside,SB], ignore_index=True)

In [10]:
gdf.sample(1)

Unnamed: 0,B19013_001E,B25003_001E,B25003_002E,B25003_003E,state_x,county_x,tract,B02001_001E,B02001_001EA,B02001_001M,...,B02001_009M,B02001_009MA,B02001_010E,B02001_010EA,B02001_010M,B02001_010MA,GEO_ID,NAME,state_y,county_y
237,89068,4687,4020,667,6,65,41908,14048,,770,...,157,,339,,173,,1400000US06065041908,"Census Tract 419.08, Riverside County, California",6,65


In [11]:
gdf.rename(columns={
                'B19013_001E':'Median HH Income', 'B25003_001E':'Total Occupied','B25003_002E':'Owner Occupied','B25003_003E':'Renter Occupied',
            'B02001_001E': 'Total Pop',
                'B02001_002E':'White alone',
                "B02001_003E": "Black or African American alone", 
                "B02001_004E": "American Indian and Alaska Native alone",
                "B02001_005E": "Asian alone",
                "B02001_006E": "Native Hawaiian and Other Pacific Islander alone",
                "B02001_007E": "Some other race alone",
                "B02001_008E": "Two or more races:",
                "B02001_009E": "Two races including Some other race",
                "B02001_010E": "Two races excluding Some other race, and three or more races"}, inplace=True)

In [12]:
gdf = gdf[['Median HH Income',
 'Total Occupied',
 'Owner Occupied',
 'Renter Occupied',
 'state_x',
 'county_x',
 'tract',
 'Total Pop',
 'White alone',
 'Black or African American alone',
 'American Indian and Alaska Native alone',
 'Asian alone',
 'Native Hawaiian and Other Pacific Islander alone',
 'Some other race alone',
 'Two or more races:',
 'Two races including Some other race',
 'Two races excluding Some other race, and three or more races',
 'GEO_ID',
 'NAME']]

In [13]:
gdf.columns.values.tolist()

['Median HH Income',
 'Total Occupied',
 'Owner Occupied',
 'Renter Occupied',
 'state_x',
 'county_x',
 'tract',
 'Total Pop',
 'White alone',
 'Black or African American alone',
 'American Indian and Alaska Native alone',
 'Asian alone',
 'Native Hawaiian and Other Pacific Islander alone',
 'Some other race alone',
 'Two or more races:',
 'Two races including Some other race',
 'Two races excluding Some other race, and three or more races',
 'GEO_ID',
 'NAME']

In [14]:
int_cols = ['Median HH Income',
 'Total Occupied',
 'Owner Occupied',
 'Renter Occupied',
 'Total Pop',
 'White alone',
 'Black or African American alone',
 'American Indian and Alaska Native alone',
 'Asian alone',
 'Native Hawaiian and Other Pacific Islander alone',
 'Some other race alone',
 'Two or more races:',
 'Two races including Some other race',
 'Two races excluding Some other race, and three or more races']

In [15]:
# Convert columns to integer data type
for column in int_cols:
    gdf[column] = gdf[column].astype(int)

In [16]:
race_cols = [ 'White alone',
 'Black or African American alone',
 'American Indian and Alaska Native alone',
 'Asian alone',
 'Native Hawaiian and Other Pacific Islander alone',
 'Some other race alone',
 'Two or more races:',
 'Two races including Some other race',
 'Two races excluding Some other race, and three or more races']

In [17]:
for col in race_cols:
    gdf["Percent " + col] = (gdf[col]/gdf['Total Pop']) * 100

In [18]:
gdf = gdf.drop(race_cols, axis=1)

In [19]:
gdf['Percent Owner Occupied'] = (gdf['Owner Occupied']/gdf['Total Occupied']) * 100
gdf['Percent Renter Occupied'] = (gdf['Renter Occupied']/gdf['Total Occupied']) * 100

In [20]:
gdf = gdf.drop(['Total Occupied',
 'Owner Occupied',
 'Renter Occupied',], axis=1)

In [21]:
gdf.head(3)

Unnamed: 0,Median HH Income,state_x,county_x,tract,Total Pop,GEO_ID,NAME,Percent White alone,Percent Black or African American alone,Percent American Indian and Alaska Native alone,Percent Asian alone,Percent Native Hawaiian and Other Pacific Islander alone,Percent Some other race alone,Percent Two or more races:,Percent Two races including Some other race,"Percent Two races excluding Some other race, and three or more races",Percent Owner Occupied,Percent Renter Occupied
0,71775,6,65,43223,9277,1400000US06065043223,"Census Tract 432.23, Riverside County, California",72.534224,2.953541,3.859006,1.735475,0.797672,9.604398,8.515684,1.843268,6.672416,77.39409,22.60591
1,108708,6,65,43224,12791,1400000US06065043224,"Census Tract 432.24, Riverside County, California",88.507544,1.852865,0.50817,4.503166,0.0,1.798139,2.830115,1.235244,1.594871,93.485596,6.514404
2,69173,6,65,43225,8749,1400000US06065043225,"Census Tract 432.25, Riverside County, California",69.1279,4.069036,0.0,3.897588,0.0,21.362441,1.543033,0.89153,0.651503,80.186916,19.813084


## Income

In [22]:
#Household income = table B19001

#for SB County
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B19001),geometry,&for=tract:*&in=state:06&in=county:071')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
SB_HI = pd.DataFrame(data_rows, columns=column_names)
SB_HI.head(3)

Unnamed: 0,B19001_001E,B19001_001EA,B19001_001M,B19001_001MA,B19001_002E,B19001_002EA,B19001_002M,B19001_002MA,B19001_003E,B19001_003EA,...,B19001_016MA,B19001_017E,B19001_017EA,B19001_017M,B19001_017MA,GEO_ID,NAME,state,county,tract
0,1563,,101,,52,,48,,29,,...,,142,,72,,1400000US06071000103,"Census Tract 1.03, San Bernardino County, Cali...",6,71,103
1,1808,,51,,39,,47,,15,,...,,233,,84,,1400000US06071000104,"Census Tract 1.04, San Bernardino County, Cali...",6,71,104
2,1782,,76,,93,,74,,0,,...,,258,,73,,1400000US06071000105,"Census Tract 1.05, San Bernardino County, Cali...",6,71,105


In [23]:
#get it for Riverside
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B19001),geometry,&for=tract:*&in=state:06&in=county:065')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
#create dataframe
riverside_HI =  pd.DataFrame(data_rows, columns=column_names)
riverside_HI.head(2)

Unnamed: 0,B19001_001E,B19001_001EA,B19001_001M,B19001_001MA,B19001_002E,B19001_002EA,B19001_002M,B19001_002MA,B19001_003E,B19001_003EA,...,B19001_016MA,B19001_017E,B19001_017EA,B19001_017M,B19001_017MA,GEO_ID,NAME,state,county,tract
0,2809,,155,,62,,48,,36,,...,,170,,82,,1400000US06065043223,"Census Tract 432.23, Riverside County, California",6,65,43223
1,3853,,189,,13,,20,,176,,...,,704,,183,,1400000US06065043224,"Census Tract 432.24, Riverside County, California",6,65,43224


In [24]:


#Join the SB and Riverside income data
#HI_gdf = pd.merge(riverside_HI,SB_HI)

HI_gdf = pd.concat([riverside_HI,SB_HI], ignore_index=True)

In [25]:
HI_gdf.head(5)

Unnamed: 0,B19001_001E,B19001_001EA,B19001_001M,B19001_001MA,B19001_002E,B19001_002EA,B19001_002M,B19001_002MA,B19001_003E,B19001_003EA,...,B19001_016MA,B19001_017E,B19001_017EA,B19001_017M,B19001_017MA,GEO_ID,NAME,state,county,tract
0,2809,,155,,62,,48,,36,,...,,170,,82,,1400000US06065043223,"Census Tract 432.23, Riverside County, California",6,65,43223
1,3853,,189,,13,,20,,176,,...,,704,,183,,1400000US06065043224,"Census Tract 432.24, Riverside County, California",6,65,43224
2,2675,,168,,53,,52,,155,,...,,105,,74,,1400000US06065043225,"Census Tract 432.25, Riverside County, California",6,65,43225
3,2182,,138,,110,,58,,100,,...,,211,,110,,1400000US06065043304,"Census Tract 433.04, Riverside County, California",6,65,43304
4,3089,,215,,119,,72,,166,,...,,0,,132,,1400000US06065043305,"Census Tract 433.05, Riverside County, California",6,65,43305


In [26]:
#drop extra columns
HI_gdf = HI_gdf[["B19001_001E", "B19001_002E"    ,
                "B19001_003E"     ,
                "B19001_004E"    ,
                "B19001_005E"    ,
                "B19001_006E"     ,
                "B19001_007E"     , 
                "B19001_008E"    ,
                "B19001_009E"   ,
                "B19001_010E" ,
                "B19001_011E" ,
                "B19001_012E",
                "B19001_013E"  ,
                "B19001_014E",
                "B19001_015E"   ,
                "B19001_016E",
                "B19001_017E",
                'tract','GEO_ID']]

In [27]:
 #rename columns
HI_gdf.rename(columns={"B19001_001E":"Total:",       
                "B19001_002E":"<$10,000",       
                "B19001_003E": '$10,000-$14,999',     
                "B19001_004E":'$15,000 - $19,999',     
                "B19001_005E": '$20,000 - $24,999',     
                "B19001_006E":"$25,000 - $29,999",      
                "B19001_007E":"$30,000 - $34,999",       
                "B19001_008E":"$35,000 - $39,999",      
                "B19001_009E":  "$40,000 - $44,999",    
                "B19001_010E":  "$45,000 - $49,999",   
                "B19001_011E":  "$50,000 - $59,999",  
                "B19001_012E":  "$60,000 - $74,999",  
                "B19001_013E":  "$75,000 - $99,999",  
                "B19001_014E": "$100,000 - $124,999",    
                "B19001_015E":"$125,000 - $149,999",     
                "B19001_016E":"$150,000 - $199,999",   
                "B19001_017E": "$200,000 or more"}, inplace=True)

In [28]:
#get col names
#HI_gdf.columns.values.tolist()

In [29]:
#make list of income related cols
cols = [ '<$10,000',
 '$10,000-$14,999',
 '$15,000 - $19,999',
 '$20,000 - $24,999',
 '$25,000 - $29,999',
 '$30,000 - $34,999',
 '$35,000 - $39,999',
 '$40,000 - $44,999',
 '$45,000 - $49,999',
 '$50,000 - $59,999',
 '$60,000 - $74,999',
 '$75,000 - $99,999',
 '$100,000 - $124,999',
 '$125,000 - $149,999',
 '$150,000 - $199,999',
 '$200,000 or more']

In [30]:
#convert to integer
int_cols = ['Total:',
 '<$10,000',
 '$10,000-$14,999',
 '$15,000 - $19,999',
 '$20,000 - $24,999',
 '$25,000 - $29,999',
 '$30,000 - $34,999',
 '$35,000 - $39,999',
 '$40,000 - $44,999',
 '$45,000 - $49,999',
 '$50,000 - $59,999',
 '$60,000 - $74,999',
 '$75,000 - $99,999',
 '$100,000 - $124,999',
 '$125,000 - $149,999',
 '$150,000 - $199,999',
 '$200,000 or more']

for column in int_cols:
    HI_gdf[column] = HI_gdf[column].astype(int)

In [31]:
#get household income percentages for tracts
for col in cols:
    HI_gdf["Percent " + col] = (HI_gdf[col]/HI_gdf['Total:']) * 100

In [32]:
#drop old columns
HI_gdf =  HI_gdf.drop(cols, axis=1)

In [33]:
HI_gdf.head(1)
                

Unnamed: 0,Total:,tract,GEO_ID,"Percent <$10,000","Percent $10,000-$14,999","Percent $15,000 - $19,999","Percent $20,000 - $24,999","Percent $25,000 - $29,999","Percent $30,000 - $34,999","Percent $35,000 - $39,999","Percent $40,000 - $44,999","Percent $45,000 - $49,999","Percent $50,000 - $59,999","Percent $60,000 - $74,999","Percent $75,000 - $99,999","Percent $100,000 - $124,999","Percent $125,000 - $149,999","Percent $150,000 - $199,999","Percent $200,000 or more"
0,2809,43223,1400000US06065043223,2.207191,1.281595,8.650765,1.423994,8.223567,3.275187,1.423994,3.951584,5.339979,5.553578,10.11036,13.492346,14.097544,10.466358,4.449982,6.051976


## Education Level

In [34]:
#education level (sex by educational attainment pop 25 years and older)  = B15002 (now B15003 in more recent surveys)

In [35]:
#for SB County
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B15002),geometry,&for=tract:*&in=state:06&in=county:071')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
SB_educ = pd.DataFrame(data_rows, columns=column_names)
SB_educ.head()

Unnamed: 0,B15002_001E,B15002_001EA,B15002_001M,B15002_001MA,B15002_002E,B15002_002EA,B15002_002M,B15002_002MA,B15002_003E,B15002_003EA,...,B15002_034MA,B15002_035E,B15002_035EA,B15002_035M,B15002_035MA,GEO_ID,NAME,state,county,tract
0,3091,,273,,1493,,205,,30,,...,,10,,15,,1400000US06071005300,"Census Tract 53, San Bernardino County, Califo...",6,71,5300
1,2752,,280,,1267,,186,,0,,...,,48,,59,,1400000US06071007800,"Census Tract 78, San Bernardino County, Califo...",6,71,7800
2,2584,,167,,1264,,114,,12,,...,,0,,132,,1400000US06071010022,"Census Tract 100.22, San Bernardino County, Ca...",6,71,10022
3,6614,,593,,3616,,426,,58,,...,,3,,5,,1400000US06071010300,"Census Tract 103, San Bernardino County, Calif...",6,71,10300
4,6291,,432,,3117,,303,,208,,...,,0,,132,,1400000US06071001301,"Census Tract 13.01, San Bernardino County, Cal...",6,71,1301


In [36]:
#get it for Riverside
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B15002),geometry,&for=tract:*&in=state:06&in=county:065')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
#create dataframe
riverside_educ =  pd.DataFrame(data_rows, columns=column_names)
riverside_educ.head(2)

Unnamed: 0,B15002_001E,B15002_001EA,B15002_001M,B15002_001MA,B15002_002E,B15002_002EA,B15002_002M,B15002_002MA,B15002_003E,B15002_003EA,...,B15002_034MA,B15002_035E,B15002_035EA,B15002_035M,B15002_035MA,GEO_ID,NAME,state,county,tract
0,1469,,162,,669,,131,,55,,...,,0,,132,,1400000US06065043403,"Census Tract 434.03, Riverside County, California",6,65,43403
1,2760,,289,,1534,,200,,8,,...,,17,,26,,1400000US06065046402,"Census Tract 464.02, Riverside County, California",6,65,46402


In [37]:
educ_gdf = pd.concat([riverside_educ,SB_educ], ignore_index=True)

In [38]:
cols_to_keep = ['B15002_001E',
'B15002_003E',
'B15002_004E',
'B15002_005E',
'B15002_006E',
'B15002_007E',
'B15002_008E',
'B15002_009E',
'B15002_010E',
'B15002_011E',
'B15002_012E',
'B15002_013E',
'B15002_014E',
'B15002_015E',
'B15002_016E',
'B15002_017E',
'B15002_018E',
'B15002_020E',
'B15002_021E',
'B15002_022E',
'B15002_023E',
'B15002_024E',
'B15002_025E',
'B15002_026E',
'B15002_027E',
'B15002_028E',
'B15002_029E',
'B15002_030E',
'B15002_031E',
'B15002_032E',
'B15002_033E',
'B15002_034E',
'B15002_035E','GEO_ID', 'NAME','state','county','tract']

In [39]:
educ_gdf = educ_gdf.loc[:, cols_to_keep]

In [40]:
educ_gdf.head()

Unnamed: 0,B15002_001E,B15002_003E,B15002_004E,B15002_005E,B15002_006E,B15002_007E,B15002_008E,B15002_009E,B15002_010E,B15002_011E,...,B15002_031E,B15002_032E,B15002_033E,B15002_034E,B15002_035E,GEO_ID,NAME,state,county,tract
0,1469,55,23,39,68,17,0,63,60,190,...,103,55,46,0,0,1400000US06065043403,"Census Tract 434.03, Riverside County, California",6,65,43403
1,2760,8,50,38,90,18,61,119,100,401,...,121,40,29,0,17,1400000US06065046402,"Census Tract 464.02, Riverside County, California",6,65,46402
2,1583,31,0,14,0,0,0,74,0,230,...,75,138,34,0,0,1400000US06065040903,"Census Tract 409.03, Riverside County, California",6,65,40903
3,1886,6,4,172,51,72,0,61,68,256,...,48,43,0,8,0,1400000US06065041203,"Census Tract 412.03, Riverside County, California",6,65,41203
4,5795,18,10,20,21,29,0,67,23,628,...,188,447,333,27,13,1400000US06065043808,"Census Tract 438.08, Riverside County, California",6,65,43808


In [41]:
#get male and female columns to sum
male_cols = educ_gdf[[
 'B15002_003E',
'B15002_004E',
'B15002_005E',
'B15002_006E',
'B15002_007E',
'B15002_008E',
'B15002_009E',
'B15002_010E',
'B15002_011E',
'B15002_012E',
'B15002_013E',
'B15002_014E',
'B15002_015E',
'B15002_016E',
'B15002_017E',
'B15002_018E']]
female_cols = educ_gdf[[
'B15002_020E',
'B15002_021E',
'B15002_022E',
'B15002_023E',
'B15002_024E',
'B15002_025E',
'B15002_026E',
'B15002_027E',
'B15002_028E',
'B15002_029E',
'B15002_030E',
'B15002_031E',
'B15002_032E',
'B15002_033E',
'B15002_034E',
'B15002_035E',
]]


In [42]:
#rename columns to match
male_cols.rename(columns={
    'B15002_003E':'B15002_020E',
'B15002_004E':'B15002_021E',
'B15002_005E':'B15002_022E',
'B15002_006E':'B15002_023E',
'B15002_007E':'B15002_024E',
'B15002_008E':'B15002_025E',
'B15002_009E':'B15002_026E',
'B15002_010E':'B15002_027E',
'B15002_011E':'B15002_028E',
'B15002_012E':'B15002_029E',
'B15002_013E':'B15002_030E',
'B15002_014E':'B15002_031E',
'B15002_015E':'B15002_032E',
'B15002_016E':'B15002_033E',
'B15002_017E':'B15002_034E',
'B15002_018E':'B15002_035E'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  male_cols.rename(columns={


In [43]:
#convert datatype to int
female_cols = female_cols.astype(int)
#convert datatype to int
male_cols = male_cols.astype(int)

In [44]:
df_sum = male_cols.add(female_cols, fill_value=0)

In [45]:
#add back tract column
df_sum['tract'] = educ_gdf['tract']

In [46]:
df_sum['Total_Educ'] = educ_gdf['B15002_001E']

In [47]:
df_sum.head()

Unnamed: 0,B15002_020E,B15002_021E,B15002_022E,B15002_023E,B15002_024E,B15002_025E,B15002_026E,B15002_027E,B15002_028E,B15002_029E,B15002_030E,B15002_031E,B15002_032E,B15002_033E,B15002_034E,B15002_035E,tract,Total_Educ
0,63,53,58,99,17,30,132,81,486,114,67,113,84,72,0,0,43403,1469
1,35,99,91,126,36,97,148,138,790,232,524,215,142,43,0,44,46402,2760
2,31,25,84,29,42,86,74,21,366,99,261,128,246,79,12,0,40903,1583
3,12,59,321,51,113,37,131,98,524,56,303,48,107,0,26,0,41203,1886
4,27,22,39,41,65,21,140,94,1676,602,1046,527,890,455,80,70,43808,5795


In [48]:
df_sum.rename(columns={
      'B15002_020E':'No schooling completed',
'B15002_028E':'high school diploma',
'B15002_031E':'Associates degree',
'B15002_032E':'Bachelors degree',
'B15002_033E':'Masters degree',
'B15002_034E':'Professional school degree',
'B15002_035E':'Doctorate degree'
                }, inplace=True)

In [49]:
df_sum.head()

Unnamed: 0,No schooling completed,B15002_021E,B15002_022E,B15002_023E,B15002_024E,B15002_025E,B15002_026E,B15002_027E,high school diploma,B15002_029E,B15002_030E,Associates degree,Bachelors degree,Masters degree,Professional school degree,Doctorate degree,tract,Total_Educ
0,63,53,58,99,17,30,132,81,486,114,67,113,84,72,0,0,43403,1469
1,35,99,91,126,36,97,148,138,790,232,524,215,142,43,0,44,46402,2760
2,31,25,84,29,42,86,74,21,366,99,261,128,246,79,12,0,40903,1583
3,12,59,321,51,113,37,131,98,524,56,303,48,107,0,26,0,41203,1886
4,27,22,39,41,65,21,140,94,1676,602,1046,527,890,455,80,70,43808,5795


In [50]:
#create col for some school but less than highschool degree
df_sum['less_highschool'] = df_sum.iloc[:,2:8].sum(axis=1)
#create col for some school post highschool but less than secondary degree
df_sum['some_college'] = df_sum.B15002_029E + df_sum.B15002_030E

In [51]:
df_sum.head()

Unnamed: 0,No schooling completed,B15002_021E,B15002_022E,B15002_023E,B15002_024E,B15002_025E,B15002_026E,B15002_027E,high school diploma,B15002_029E,B15002_030E,Associates degree,Bachelors degree,Masters degree,Professional school degree,Doctorate degree,tract,Total_Educ,less_highschool,some_college
0,63,53,58,99,17,30,132,81,486,114,67,113,84,72,0,0,43403,1469,417,181
1,35,99,91,126,36,97,148,138,790,232,524,215,142,43,0,44,46402,2760,636,756
2,31,25,84,29,42,86,74,21,366,99,261,128,246,79,12,0,40903,1583,336,360
3,12,59,321,51,113,37,131,98,524,56,303,48,107,0,26,0,41203,1886,751,359
4,27,22,39,41,65,21,140,94,1676,602,1046,527,890,455,80,70,43808,5795,400,1648


In [52]:
educ_gdf = df_sum

In [53]:
#educ_gdf.columns.values.tolist()

In [54]:
#drop columns we don't need
educ_gdf = educ_gdf[[
 'Total_Educ',
 'No schooling completed',
 'high school diploma',
 'Associates degree',
 'Bachelors degree',
 'Masters degree',
 'Professional school degree',
 'Doctorate degree',
 'tract',
 'less_highschool',
 'some_college']]

In [55]:
cols =  ['No schooling completed',
 'high school diploma',
 'Associates degree',
 'Bachelors degree',
 'Masters degree',
 'Professional school degree',
 'Doctorate degree',
 'less_highschool',
 'some_college']

In [56]:
int_cols = [ 'Total_Educ','No schooling completed',
 'high school diploma',
 'Associates degree',
 'Bachelors degree',
 'Masters degree',
 'Professional school degree',
 'Doctorate degree',
 'less_highschool',
 'some_college']


In [57]:

# Convert columns to integer data type
for column in int_cols:
    educ_gdf[column] = educ_gdf[column].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  educ_gdf[column] = educ_gdf[column].astype('int64')


In [58]:
for col in cols:
    educ_gdf["Percent " + col] = (educ_gdf[col]/educ_gdf['Total_Educ']) * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  educ_gdf["Percent " + col] = (educ_gdf[col]/educ_gdf['Total_Educ']) * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  educ_gdf["Percent " + col] = (educ_gdf[col]/educ_gdf['Total_Educ']) * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  educ_gdf["Percent " + col] = (educ_gdf[col]/educ_gdf['Tot

In [59]:
educ_gdf = educ_gdf.drop(cols, axis=1)

In [60]:
educ_gdf.head()

Unnamed: 0,Total_Educ,tract,Percent No schooling completed,Percent high school diploma,Percent Associates degree,Percent Bachelors degree,Percent Masters degree,Percent Professional school degree,Percent Doctorate degree,Percent less_highschool,Percent some_college
0,1469,43403,4.288632,33.08373,7.692308,5.718176,4.901293,0.0,0.0,28.386658,12.321307
1,2760,46402,1.268116,28.623188,7.789855,5.144928,1.557971,0.0,1.594203,23.043478,27.391304
2,1583,40903,1.958307,23.120657,8.085913,15.540114,4.990524,0.758054,0.0,21.225521,22.74163
3,1886,41203,0.636267,27.783669,2.545069,5.673383,0.0,1.378579,0.0,39.819724,19.034995
4,5795,43808,0.465919,28.921484,9.094047,15.358067,7.851596,1.3805,1.207938,6.902502,28.438309


## Occupation

In [61]:
#Table B24080: Sex by Class of Worker for the Civilian Population).


In [62]:
#for SB County
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B24080),geometry,&for=tract:*&in=state:06&in=county:071')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
SB_oc = pd.DataFrame(data_rows, columns=column_names)
SB_oc.head()

Unnamed: 0,B24080_001E,B24080_001EA,B24080_001M,B24080_001MA,B24080_002E,B24080_002EA,B24080_002M,B24080_002MA,B24080_003E,B24080_003EA,...,B24080_020MA,B24080_021E,B24080_021EA,B24080_021M,B24080_021MA,GEO_ID,NAME,state,county,tract
0,2446,,243,,1295,,180,,831,,...,,0,,132,,1400000US06071000103,"Census Tract 1.03, San Bernardino County, Cali...",6,71,103
1,3286,,337,,1819,,204,,1346,,...,,11,,17,,1400000US06071000104,"Census Tract 1.04, San Bernardino County, Cali...",6,71,104
2,3159,,356,,1785,,167,,1392,,...,,0,,132,,1400000US06071000105,"Census Tract 1.05, San Bernardino County, Cali...",6,71,105
3,6005,,354,,3560,,298,,2592,,...,,28,,47,,1400000US06071000106,"Census Tract 1.06, San Bernardino County, Cali...",6,71,106
4,1476,,165,,819,,89,,605,,...,,0,,132,,1400000US06071000107,"Census Tract 1.07, San Bernardino County, Cali...",6,71,107


In [63]:
#get it for Riverside
r = requests.get('https://api.census.gov/data/2009/acs/acs5?get=group(B24080),geometry,&for=tract:*&in=state:06&in=county:065')
censusdata = r.json()
#specify column names
column_names = censusdata[0]
data_rows = censusdata[1:]
#create dataframe
riverside_oc =  pd.DataFrame(data_rows, columns=column_names)
riverside_oc.head(2)

Unnamed: 0,B24080_001E,B24080_001EA,B24080_001M,B24080_001MA,B24080_002E,B24080_002EA,B24080_002M,B24080_002MA,B24080_003E,B24080_003EA,...,B24080_020MA,B24080_021E,B24080_021EA,B24080_021M,B24080_021MA,GEO_ID,NAME,state,county,tract
0,4060,,337,,2205,,257,,1540,,...,,0,,132,,1400000US06065043223,"Census Tract 432.23, Riverside County, California",6,65,43223
1,5284,,347,,3099,,242,,2023,,...,,15,,24,,1400000US06065043224,"Census Tract 432.24, Riverside County, California",6,65,43224


In [64]:
oc_gdf = pd.concat([riverside_oc,SB_oc], ignore_index=True)

In [65]:
#get male and female columns to sum
male_cols = oc_gdf[[
 'B24080_003E',
 'B24080_004E',
 'B24080_005E',
 'B24080_006E',
 'B24080_007E',
 'B24080_008E',
 'B24080_009E',
 'B24080_010E',
 'B24080_011E']]
female_cols = oc_gdf[[
 'B24080_013E',
 'B24080_014E',
 'B24080_015E',
 'B24080_016E',
 'B24080_017E',
 'B24080_018E',
 'B24080_019E',
 'B24080_020E',
 'B24080_021E',
]]


In [66]:
#rename columns to match
male_cols.rename(columns={
                "B24080_003E":  "Private for-profit wage and salary workers:",
                "B24080_004E": "Employee of private company workers",
                "B24080_005E":"Self-employed in own incorporated business workers",
                "B24080_006E": "Private not-for-profit wage and salary workers",
                "B24080_007E": "Local government workers",
                "B24080_008E": "State government workers",
                "B24080_009E": "Federal government workers",
                "B24080_010E":  "Self-employed in own not incorporated business workers",
                "B24080_011E": "Unpaid family workers"}, inplace=True)
female_cols.rename(columns={     
                "B24080_013E": "Private for-profit wage and salary workers:", 
                "B24080_014E": "Employee of private company workers", 
                "B24080_015E":"Self-employed in own incorporated business workers",
                "B24080_016E": "Private not-for-profit wage and salary workers",
                "B24080_017E": "Local government workers",
                "B24080_018E": "State government workers", 
                "B24080_019E": "Federal government workers",
                "B24080_020E":"Self-employed in own not incorporated business workers",  
                "B24080_021E": "Unpaid family workers"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  male_cols.rename(columns={
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_cols.rename(columns={


In [67]:
male_cols.head()

Unnamed: 0,Private for-profit wage and salary workers:,Employee of private company workers,Self-employed in own incorporated business workers,Private not-for-profit wage and salary workers,Local government workers,State government workers,Federal government workers,Self-employed in own not incorporated business workers,Unpaid family workers
0,1540,1475,65,95,154,38,53,325,0
1,2023,1654,369,83,456,52,60,408,17
2,1810,1743,67,67,202,61,39,164,0
3,839,729,110,69,144,41,0,306,0
4,1001,993,8,34,155,14,80,35,0


In [68]:
female_cols.head()

Unnamed: 0,Private for-profit wage and salary workers:,Employee of private company workers,Self-employed in own incorporated business workers,Private not-for-profit wage and salary workers,Local government workers,State government workers,Federal government workers,Self-employed in own not incorporated business workers,Unpaid family workers
0,1233,1173,60,32,260,156,0,174,0
1,1395,1191,204,85,277,147,30,236,15
2,1077,1048,29,44,317,54,28,77,0
3,672,657,15,90,167,46,31,110,0
4,686,667,19,110,178,98,25,34,0


In [69]:
#convert datatype to int
female_cols = female_cols.astype(int)

In [70]:
#convert to int
male_cols= male_cols.astype(int)

In [71]:
#sum to get total
df_sum = male_cols.add(female_cols, fill_value=0)

In [72]:
df_sum.head(4)

Unnamed: 0,Private for-profit wage and salary workers:,Employee of private company workers,Self-employed in own incorporated business workers,Private not-for-profit wage and salary workers,Local government workers,State government workers,Federal government workers,Self-employed in own not incorporated business workers,Unpaid family workers
0,2773,2648,125,127,414,194,53,499,0
1,3418,2845,573,168,733,199,90,644,32
2,2887,2791,96,111,519,115,67,241,0
3,1511,1386,125,159,311,87,31,416,0


In [73]:
#add back tract column
df_sum['tract'] = oc_gdf['tract']

In [74]:
df_sum.head(1)

Unnamed: 0,Private for-profit wage and salary workers:,Employee of private company workers,Self-employed in own incorporated business workers,Private not-for-profit wage and salary workers,Local government workers,State government workers,Federal government workers,Self-employed in own not incorporated business workers,Unpaid family workers,tract
0,2773,2648,125,127,414,194,53,499,0,43223


In [75]:
#merge to get back geometries
oc_gdf = pd.merge(df_sum,oc_gdf,on='tract')

In [76]:
oc_gdf.head(1)

Unnamed: 0,Private for-profit wage and salary workers:,Employee of private company workers,Self-employed in own incorporated business workers,Private not-for-profit wage and salary workers,Local government workers,State government workers,Federal government workers,Self-employed in own not incorporated business workers,Unpaid family workers,tract,...,B24080_020M,B24080_020MA,B24080_021E,B24080_021EA,B24080_021M,B24080_021MA,GEO_ID,NAME,state,county
0,2773,2648,125,127,414,194,53,499,0,43223,...,108,,0,,132,,1400000US06065043223,"Census Tract 432.23, Riverside County, California",6,65


In [77]:
#rename total workers column
oc_gdf.rename(columns={"B24080_001E": "Total Workers:"}, inplace=True)

In [78]:
#oc_gdf.columns.values.tolist()

In [79]:
#drop columns
oc_gdf = oc_gdf[['Private for-profit wage and salary workers:',
 'Employee of private company workers',
 'Self-employed in own incorporated business workers',
 'Private not-for-profit wage and salary workers',
 'Local government workers',
 'State government workers',
 'Federal government workers',
 'Self-employed in own not incorporated business workers',
 'Unpaid family workers',
                 "Total Workers:",
 'GEO_ID','tract']]

In [80]:

# Convert columns to integer data type

int_cols = [  "Total Workers:",'Private for-profit wage and salary workers:',
 'Employee of private company workers',
 'Self-employed in own incorporated business workers',
 'Private not-for-profit wage and salary workers',
 'Local government workers',
 'State government workers',
 'Federal government workers',
 'Self-employed in own not incorporated business workers',
 'Unpaid family workers']

for column in int_cols:
    oc_gdf[column] = oc_gdf[column].astype('int64')

In [81]:
oc_gdf.head(3)

Unnamed: 0,Private for-profit wage and salary workers:,Employee of private company workers,Self-employed in own incorporated business workers,Private not-for-profit wage and salary workers,Local government workers,State government workers,Federal government workers,Self-employed in own not incorporated business workers,Unpaid family workers,Total Workers:,GEO_ID,tract
0,2773,2648,125,127,414,194,53,499,0,4060,1400000US06065043223,43223
1,3418,2845,573,168,733,199,90,644,32,5284,1400000US06065043224,43224
2,2887,2791,96,111,519,115,67,241,0,3940,1400000US06065043225,43225


In [82]:
#get percentages
cols = [ 'Private for-profit wage and salary workers:',
 'Employee of private company workers',
 'Self-employed in own incorporated business workers',
 'Private not-for-profit wage and salary workers',
 'Local government workers',
 'State government workers',
 'Federal government workers',
 'Self-employed in own not incorporated business workers',
 'Unpaid family workers']

for col in cols:
    oc_gdf["Percent " + col] = (oc_gdf[col]/oc_gdf['Total Workers:']) * 100

In [83]:
#drop columns
oc_gdf = oc_gdf.drop(cols, axis=1)



In [84]:
##Percent Private for-profit wage and salary workers = employee of private company + self-employed  - dropping
oc_gdf = oc_gdf.drop(columns=['Percent Private for-profit wage and salary workers:'], axis=1)

In [85]:
oc_gdf.head()

Unnamed: 0,Total Workers:,GEO_ID,tract,Percent Employee of private company workers,Percent Self-employed in own incorporated business workers,Percent Private not-for-profit wage and salary workers,Percent Local government workers,Percent State government workers,Percent Federal government workers,Percent Self-employed in own not incorporated business workers,Percent Unpaid family workers
0,4060,1400000US06065043223,43223,65.221675,3.078818,3.128079,10.197044,4.778325,1.305419,12.29064,0.0
1,5284,1400000US06065043224,43224,53.841787,10.844058,3.17941,13.872067,3.766086,1.703255,12.187737,0.605602
2,3940,1400000US06065043225,43225,70.837563,2.436548,2.817259,13.172589,2.918782,1.700508,6.116751,0.0
3,2515,1400000US06065043304,43304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0
4,2450,1400000US06065043305,43305,67.755102,1.102041,5.877551,13.591837,4.571429,4.285714,2.816327,0.0


## Joining all the dataframes

In [86]:
#oc_gdf + educ_gdf + HI_gdf + gdf

census_df = pd.merge(gdf,HI_gdf, on = 'tract')



In [87]:
census_df.head()

Unnamed: 0,Median HH Income,state_x,county_x,tract,Total Pop,GEO_ID_x,NAME,Percent White alone,Percent Black or African American alone,Percent American Indian and Alaska Native alone,...,"Percent $35,000 - $39,999","Percent $40,000 - $44,999","Percent $45,000 - $49,999","Percent $50,000 - $59,999","Percent $60,000 - $74,999","Percent $75,000 - $99,999","Percent $100,000 - $124,999","Percent $125,000 - $149,999","Percent $150,000 - $199,999","Percent $200,000 or more"
0,71775,6,65,43223,9277,1400000US06065043223,"Census Tract 432.23, Riverside County, California",72.534224,2.953541,3.859006,...,1.423994,3.951584,5.339979,5.553578,10.11036,13.492346,14.097544,10.466358,4.449982,6.051976
1,108708,6,65,43224,12791,1400000US06065043224,"Census Tract 432.24, Riverside County, California",88.507544,1.852865,0.50817,...,2.180119,1.323644,1.998443,3.607579,8.590709,16.688295,8.642616,9.317415,18.375292,18.271477
2,69173,6,65,43225,8749,1400000US06065043225,"Census Tract 432.25, Riverside County, California",69.1279,4.069036,0.0,...,4.224299,4.224299,1.607477,10.46729,14.579439,17.009346,14.878505,4.785047,5.383178,3.925234
3,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,2.428964,3.345555,1.604033,7.51604,6.416132,11.824015,9.807516,7.974335,9.624198,9.670027
4,53998,6,65,43305,8872,1400000US06065043305,"Census Tract 433.05, Riverside County, California",72.576646,5.09468,1.048242,...,7.154419,5.212043,3.52865,10.877307,16.704435,14.535448,7.542894,3.431531,2.622208,0.0


In [88]:
census_df = pd.merge(census_df,educ_gdf,on = 'tract')

In [89]:
census_df = pd.merge(census_df,oc_gdf,on = 'tract')

In [90]:
#census_gdf.rename(columns={"Total Workers:":"Total Workers","state_left":"state","county_left":"county","tract_left":"tract","NAME_left":"NAME"}, inplace=True)

In [91]:
census_df_2009 = census_df

### Import geometries



In [93]:
census_df_2009.head()

Unnamed: 0,Median HH Income,state_x,county_x,tract,Total Pop,GEO_ID_x,NAME,Percent White alone,Percent Black or African American alone,Percent American Indian and Alaska Native alone,...,Total Workers:,GEO_ID,Percent Employee of private company workers,Percent Self-employed in own incorporated business workers,Percent Private not-for-profit wage and salary workers,Percent Local government workers,Percent State government workers,Percent Federal government workers,Percent Self-employed in own not incorporated business workers,Percent Unpaid family workers
0,71775,6,65,43223,9277,1400000US06065043223,"Census Tract 432.23, Riverside County, California",72.534224,2.953541,3.859006,...,4060,1400000US06065043223,65.221675,3.078818,3.128079,10.197044,4.778325,1.305419,12.29064,0.0
1,108708,6,65,43224,12791,1400000US06065043224,"Census Tract 432.24, Riverside County, California",88.507544,1.852865,0.50817,...,5284,1400000US06065043224,53.841787,10.844058,3.17941,13.872067,3.766086,1.703255,12.187737,0.605602
2,69173,6,65,43225,8749,1400000US06065043225,"Census Tract 432.25, Riverside County, California",69.1279,4.069036,0.0,...,3940,1400000US06065043225,70.837563,2.436548,2.817259,13.172589,2.918782,1.700508,6.116751,0.0
3,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,2515,1400000US06065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0
4,53998,6,65,43305,8872,1400000US06065043305,"Census Tract 433.05, Riverside County, California",72.576646,5.09468,1.048242,...,2450,1400000US06065043305,67.755102,1.102041,5.877551,13.591837,4.571429,4.285714,2.816327,0.0


In [97]:
#remove the first 9 characters from GEOID
census_df_2009['GEO_ID'] = census_df_2009['GEO_ID'].str.slice(9)

In [98]:
census_df_2009.head()

Unnamed: 0,Median HH Income,state_x,county_x,tract,Total Pop,GEO_ID_x,NAME,Percent White alone,Percent Black or African American alone,Percent American Indian and Alaska Native alone,...,Total Workers:,GEO_ID,Percent Employee of private company workers,Percent Self-employed in own incorporated business workers,Percent Private not-for-profit wage and salary workers,Percent Local government workers,Percent State government workers,Percent Federal government workers,Percent Self-employed in own not incorporated business workers,Percent Unpaid family workers
0,71775,6,65,43223,9277,1400000US06065043223,"Census Tract 432.23, Riverside County, California",72.534224,2.953541,3.859006,...,4060,6065043223,65.221675,3.078818,3.128079,10.197044,4.778325,1.305419,12.29064,0.0
1,108708,6,65,43224,12791,1400000US06065043224,"Census Tract 432.24, Riverside County, California",88.507544,1.852865,0.50817,...,5284,6065043224,53.841787,10.844058,3.17941,13.872067,3.766086,1.703255,12.187737,0.605602
2,69173,6,65,43225,8749,1400000US06065043225,"Census Tract 432.25, Riverside County, California",69.1279,4.069036,0.0,...,3940,6065043225,70.837563,2.436548,2.817259,13.172589,2.918782,1.700508,6.116751,0.0
3,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,2515,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0
4,53998,6,65,43305,8872,1400000US06065043305,"Census Tract 433.05, Riverside County, California",72.576646,5.09468,1.048242,...,2450,6065043305,67.755102,1.102041,5.877551,13.591837,4.571429,4.285714,2.816327,0.0


In [94]:
tract_geometries = gpd.read_file('tract_geometries.GPKG')
tract_geometries.head()


Unnamed: 0,GEO_ID,geometry
0,6065041904,"POLYGON ((-13099233.990 4011396.270, -13099207..."
1,6065041804,"POLYGON ((-13092259.370 4008841.160, -13092241..."
2,6065041910,"POLYGON ((-13082039.130 4004790.610, -13081965..."
3,6065041913,"POLYGON ((-13095193.760 4011880.940, -13095193..."
4,6065041912,"POLYGON ((-13096842.060 4012549.340, -13096833..."


In [99]:
census_df_2009 = pd.merge(census_df_2009,tract_geometries,on='GEO_ID')

In [100]:
census_df_2009.head()

Unnamed: 0,Median HH Income,state_x,county_x,tract,Total Pop,GEO_ID_x,NAME,Percent White alone,Percent Black or African American alone,Percent American Indian and Alaska Native alone,...,GEO_ID,Percent Employee of private company workers,Percent Self-employed in own incorporated business workers,Percent Private not-for-profit wage and salary workers,Percent Local government workers,Percent State government workers,Percent Federal government workers,Percent Self-employed in own not incorporated business workers,Percent Unpaid family workers,geometry
0,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
1,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
2,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
3,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
4,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."


In [102]:
gdf = gpd.GeoDataFrame(census_df_2009,geometry='geometry',crs=tract_geometries.crs)

In [103]:
gdf.head()

Unnamed: 0,Median HH Income,state_x,county_x,tract,Total Pop,GEO_ID_x,NAME,Percent White alone,Percent Black or African American alone,Percent American Indian and Alaska Native alone,...,GEO_ID,Percent Employee of private company workers,Percent Self-employed in own incorporated business workers,Percent Private not-for-profit wage and salary workers,Percent Local government workers,Percent State government workers,Percent Federal government workers,Percent Self-employed in own not incorporated business workers,Percent Unpaid family workers,geometry
0,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
1,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
2,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
3,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."
4,67813,6,65,43304,6088,1400000US06065043304,"Census Tract 433.04, Riverside County, California",83.47569,0.344941,0.558476,...,6065043304,55.109344,4.970179,6.322068,12.365805,3.459245,1.232604,16.540755,0.0,"POLYGON ((-13021206.040 3990732.690, -13021206..."


In [104]:
gdf.to_file('census_2009.GPKG', driver='GPKG')