In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
%matplotlib inline

*Loading the Data:*

**ACS 1 Year Estimates** - the American Community Survey (ACS) provides tract-level population estimates for the 2011-2015 period. ACS estimates were downloaded from the Census Bureau's FactFinder application and from the National Historic GIS website.

**FCC** - Tract-level data on household Internet connectivity is from the Federal Communications Commission's (FCC) Form 477 Census Tract Data on Internet Access Services (current as of June 30, 2016).


**IPUMS** - The Integrated Public Use Microdata Series (IPUMS-USA) provides user level socioeconomic data from as early as 1950-2016 organized by Public Use Microdata Area. Socioeconomic data includes touchpoints on: Rural / Urban Settings, Household Demographics, Gender, Age, Marriage, Race & Ethnicity, Education, Work, Income

**census_tract_converter** - code lookup to convert PUMA to census tract data

**Dropbox for Data** - https://www.dropbox.com/sh/v3viqffuphbpvee/AACm5N4_c7jGzTPxiYbeNo1sa?dl=0

In [7]:
#pdb 

pdb = pd.read_csv("pdb2016trv8_us.csv", sep = ",", encoding = "latin-1",dtype={'tract':object})

#acs 1 year estimate
#need to add new acs data from the acs notebook
# acs = pd.read_csv("ACS_16_5YR_B01003_with_all_states.csv", sep = ",", encoding = "latin-1", header = 1, dtype={'tractcode': object})

#fcc data
fcc = pd.read_csv("tract_map_jun_2016.csv", sep=",", encoding = "latin-1", dtype={'tractcode': object})

# IPUMs data broken out by PUMA Codes with Data Dictionary (DDI)
# https://usa.ipums.org/usa-action/extract_requests/download, login: robert.y.deng@gmail.com // w209
ipums = pd.read_csv("usa_00005.csv", sep=",", encoding="latin-1")

# Puma to census tract converter, https://www.census.gov/geo/maps-data/data/centract_rel.html
ptc = pd.read_csv("2010_Census_Tract_to_2010_PUMA.txt", sep=',', encoding='latin-1')

#shapefiles

census_shp = gpd.read_file("Tract_2010Census_DP1.shp")



In [None]:
census_shp

In [10]:
z=census_shp["GEOID10"].unique()

In [11]:
sorted(z)

['01001020100',
 '01001020200',
 '01001020300',
 '01001020400',
 '01001020500',
 '01001020600',
 '01001020700',
 '01001020801',
 '01001020802',
 '01001020900',
 '01001021000',
 '01001021100',
 '01003010100',
 '01003010200',
 '01003010300',
 '01003010400',
 '01003010500',
 '01003010600',
 '01003010701',
 '01003010703',
 '01003010704',
 '01003010705',
 '01003010800',
 '01003010903',
 '01003010904',
 '01003010905',
 '01003010906',
 '01003011000',
 '01003011101',
 '01003011102',
 '01003011201',
 '01003011202',
 '01003011300',
 '01003011401',
 '01003011403',
 '01003011405',
 '01003011406',
 '01003011407',
 '01003011408',
 '01003011501',
 '01003011502',
 '01003011601',
 '01003011602',
 '01003990000',
 '01005950100',
 '01005950200',
 '01005950300',
 '01005950400',
 '01005950500',
 '01005950600',
 '01005950700',
 '01005950800',
 '01005950900',
 '01007010001',
 '01007010002',
 '01007010003',
 '01007010004',
 '01009050101',
 '01009050102',
 '01009050200',
 '01009050300',
 '01009050400',
 '010090

In [None]:
"01001020100"

In [3]:
print("ACS Shape", acs.shape, "\nFCC Shape", fcc.shape, 
      "\nIPUMS Shape", ipums.shape, "\nPUMA Code Converter", ptc.shape)

ACS Shape (74001, 5) 
FCC Shape (73767, 3) 
IPUMS Shape (3156487, 24) 
PUMA Code Converter (74091, 4)


In [4]:
# acs.head()

Unnamed: 0,Id,Id2,Geography,Estimate; Total,Margin of Error; Total
0,1400000US01001020100,1001020100,"Census Tract 201, Autauga County, Alabama",2010,231
1,1400000US01001020200,1001020200,"Census Tract 202, Autauga County, Alabama",2196,281
2,1400000US01001020300,1001020300,"Census Tract 203, Autauga County, Alabama",3136,355
3,1400000US01001020400,1001020400,"Census Tract 204, Autauga County, Alabama",4563,464
4,1400000US01001020500,1001020500,"Census Tract 205, Autauga County, Alabama",10529,655


In [5]:
ipums.sample(5)

Unnamed: 0,YEAR,DATANUM,SERIAL,HHWT,PUMA,GQ,FARM,PERNUM,PERWT,FAMSIZE,...,YRSUSA1,SPEAKENG,HCOVANY,GRADEATT,GRADEATTD,DEGFIELD,DEGFIELDD,EMPSTAT,EMPSTATD,INCWAGE
645830,2016,1,268841,74,1101,1,1,3,95,3,...,15,3,2,6,60,0,0,1,10,30000
1099937,2016,1,470487,126,2200,1,1,2,143,2,...,0,3,2,0,0,0,0,3,30,0
2512899,2016,1,1103544,58,604,1,1,1,58,3,...,0,3,2,0,0,33,3301,1,10,53000
1442569,2016,1,626015,149,3208,1,1,1,149,3,...,23,5,2,0,0,0,0,3,30,0
993220,2016,1,423751,23,1701,1,1,2,22,2,...,0,3,2,0,0,0,0,3,30,0


Cleaning the IPUMS data, there are 3 steps:
    1. Dummify ordinal variables and remove the original ordinal columns
    2. Groupby Serial number, which is user level identifier data
        - Sum and average for specific metrics separately and then merge
    3. Groupby PUMA code 
        - Sum and average for specific metrics separately and then merge

In [6]:
#dummify everything with an ordinal variable:
#race, farm, Citizen, speak english, healthcare coverage, grade attended, employment
ipums_cleaned = pd.concat([ipums.drop(ipums[["YEAR", "DATANUM", "HHWT", "GQ", "PERNUM", "PERWT", "YRIMMIG",
                                            "RACE", "FARM", "CITIZEN", "SPEAKENG", "HCOVANY", 
                                            "GRADEATT", "DEGFIELD", "EMPSTAT", 
                                            "GRADEATTD", "RACED", 
                                            "DEGFIELDD", "EMPSTATD"]], axis = 1),
                          pd.get_dummies(ipums["RACE"], prefix="RACE"),
                          pd.get_dummies(ipums["FARM"], prefix="FARM"),
                          pd.get_dummies(ipums["CITIZEN"], prefix="CITIZEN"), 
                          pd.get_dummies(ipums["SPEAKENG"], prefix="SPEAKS_ENGLISH"), 
                          pd.get_dummies(ipums["HCOVANY"], prefix="HEALTH_CARE_COVERAGE"), 
                          pd.get_dummies(ipums["GRADEATT"], prefix = "EDUCATION"), 
                          #pd.get_dummies(ipums["DEGFIELD"], prefix="FIELD_OF_DEGREE"), Just TMI, so just remove it
                          pd.get_dummies(ipums["EMPSTAT"], prefix="EMPLOYMENT")], axis = 1)


ipums_cleaned.head(n=50)

#Famsize - average
#YRSUSA - median, or non-zero count
#IncWage - sum
#everything else just sum

Unnamed: 0,SERIAL,PUMA,FAMSIZE,YRSUSA1,INCWAGE,RACE_1,RACE_2,RACE_3,RACE_4,RACE_5,...,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3
0,1,700,2,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,1,700,2,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2,900,1,0,27300,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,3,302,4,0,65000,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,3,302,4,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,3,302,4,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
6,3,302,4,0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
7,4,600,2,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,4,600,2,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9,5,2500,2,0,78000,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [7]:
print("{:.2f} Total columns for IPUMS Cleaned".format(len(ipums_cleaned.columns),), 
      "\n\n", ipums_cleaned.columns)

40.00 Total columns for IPUMS Cleaned 

 Index(['SERIAL', 'PUMA', 'FAMSIZE', 'YRSUSA1', 'INCWAGE', 'RACE_1', 'RACE_2',
       'RACE_3', 'RACE_4', 'RACE_5', 'RACE_6', 'RACE_7', 'RACE_8', 'RACE_9',
       'FARM_1', 'FARM_2', 'CITIZEN_0', 'CITIZEN_1', 'CITIZEN_2', 'CITIZEN_3',
       'SPEAKS_ENGLISH_0', 'SPEAKS_ENGLISH_1', 'SPEAKS_ENGLISH_3',
       'SPEAKS_ENGLISH_4', 'SPEAKS_ENGLISH_5', 'SPEAKS_ENGLISH_6',
       'HEALTH_CARE_COVERAGE_1', 'HEALTH_CARE_COVERAGE_2', 'EDUCATION_0',
       'EDUCATION_1', 'EDUCATION_2', 'EDUCATION_3', 'EDUCATION_4',
       'EDUCATION_5', 'EDUCATION_6', 'EDUCATION_7', 'EMPLOYMENT_0',
       'EMPLOYMENT_1', 'EMPLOYMENT_2', 'EMPLOYMENT_3'],
      dtype='object')


In [8]:
#First groupby individual user level data in SERIAL, average for family size and YRS in the USA
ipums_groupby_average = ipums_cleaned.ix[:,0:4].groupby(["SERIAL"], as_index = False).mean()
ipums_groupby_average.head(n=5)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,SERIAL,PUMA,FAMSIZE,YRSUSA1
0,1,700.0,2.0,0.0
1,2,900.0,1.0,0.0
2,3,302.0,4.0,0.0
3,4,600.0,2.0,0.0
4,5,2500.0,2.0,0.0


In [9]:
#Groupby individual user level data in SERIAL, sum for every other metric
ix_to_sum = list([0]) + list(range(5,len(ipums_cleaned.columns)))
ipums_groupby_sum = ipums_cleaned.ix[:,ix_to_sum].groupby(["SERIAL"], as_index = False).sum()
ipums_groupby_sum.head()

Unnamed: 0,SERIAL,RACE_1,RACE_2,RACE_3,RACE_4,RACE_5,RACE_6,RACE_7,RACE_8,RACE_9,...,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3
0,1,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,2,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,4,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,3
3,4,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,5,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1


In [10]:
#Merge sum and average metrics
ipums_cleaned_2 = ipums_groupby_average.merge(ipums_groupby_sum, left_on = "SERIAL", right_on = "SERIAL", how = "left")
ipums_cleaned_2.head()

Unnamed: 0,SERIAL,PUMA,FAMSIZE,YRSUSA1,RACE_1,RACE_2,RACE_3,RACE_4,RACE_5,RACE_6,...,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3
0,1,700.0,2.0,0.0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,2,900.0,1.0,0.0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,302.0,4.0,0.0,4,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,3
3,4,600.0,2.0,0.0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,5,2500.0,2.0,0.0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1


In [11]:
#Now groupby PUMA, sum and average in the same way and rejoin
ipums_groupby_average_2 = ipums_cleaned_2.ix[:,1:4].groupby(["PUMA"], as_index = False).mean()
ix_to_sum_2 = list([1]) + list(range(5,39))
ipums_groupby_sum_2 = ipums_cleaned_2.ix[:,ix_to_sum_2].groupby(["PUMA"], as_index = False).sum()

#Final left join by PUMA, ready to merge with census data
ipums_master = ipums_groupby_average_2.merge(ipums_groupby_sum_2, left_on = "PUMA", right_on = "PUMA", how = "left")
ipums_master.head()

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,PUMA,FAMSIZE,YRSUSA1,RACE_2,RACE_3,RACE_4,RACE_5,RACE_6,RACE_7,RACE_8,...,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3
0,100.0,2.161538,1.197337,2453.0,3838.0,132.0,160.0,1406.0,748.0,1872.0,...,790.0,3175.0,3234.0,3370.0,3178.0,461.0,12405.0,29746.0,1633.0,23550.0
1,101.0,1.969199,2.744565,2084.0,165.0,326.0,53.0,741.0,579.0,616.0,...,174.0,830.0,820.0,797.0,2108.0,525.0,3126.0,8825.0,601.0,6638.0
2,102.0,2.222179,3.388744,2520.0,197.0,382.0,52.0,703.0,732.0,628.0,...,222.0,916.0,932.0,994.0,1002.0,308.0,3652.0,10447.0,507.0,6311.0
3,103.0,2.096533,3.106207,1754.0,191.0,238.0,30.0,545.0,631.0,534.0,...,155.0,648.0,679.0,713.0,1207.0,304.0,2666.0,8043.0,412.0,4603.0
4,104.0,2.182327,3.108139,2603.0,63.0,101.0,10.0,236.0,817.0,286.0,...,154.0,545.0,517.0,571.0,593.0,150.0,2133.0,5393.0,394.0,3757.0


In [12]:
ipums_master.PUMA = ipums_master.PUMA.astype(int)
ipums_master.head()

Unnamed: 0,PUMA,FAMSIZE,YRSUSA1,RACE_2,RACE_3,RACE_4,RACE_5,RACE_6,RACE_7,RACE_8,...,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3
0,100,2.161538,1.197337,2453.0,3838.0,132.0,160.0,1406.0,748.0,1872.0,...,790.0,3175.0,3234.0,3370.0,3178.0,461.0,12405.0,29746.0,1633.0,23550.0
1,101,1.969199,2.744565,2084.0,165.0,326.0,53.0,741.0,579.0,616.0,...,174.0,830.0,820.0,797.0,2108.0,525.0,3126.0,8825.0,601.0,6638.0
2,102,2.222179,3.388744,2520.0,197.0,382.0,52.0,703.0,732.0,628.0,...,222.0,916.0,932.0,994.0,1002.0,308.0,3652.0,10447.0,507.0,6311.0
3,103,2.096533,3.106207,1754.0,191.0,238.0,30.0,545.0,631.0,534.0,...,155.0,648.0,679.0,713.0,1207.0,304.0,2666.0,8043.0,412.0,4603.0
4,104,2.182327,3.108139,2603.0,63.0,101.0,10.0,236.0,817.0,286.0,...,154.0,545.0,517.0,571.0,593.0,150.0,2133.0,5393.0,394.0,3757.0


In [13]:
ptc.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,PUMA5CE
0,1,1,20100,2100
1,1,1,20200,2100
2,1,1,20300,2100
3,1,1,20400,2100
4,1,1,20500,2100


In [14]:
ipums_merged_ptc = ipums_master.join(ptc, lsuffix = "PUMA", rsuffix = "PUMA5CE", how = "inner", sort = True)

print("\nipums_master shape", ipums_master.shape, 
      "\nptc shape", ptc.shape, 
      "\nipums_merged shape", ipums_merged_ptc.shape)

ipums_merged_ptc.head()


ipums_master shape (982, 37) 
ptc shape (74091, 4) 
ipums_merged shape (982, 41)


Unnamed: 0,PUMA,FAMSIZE,YRSUSA1,RACE_2,RACE_3,RACE_4,RACE_5,RACE_6,RACE_7,RACE_8,...,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3,STATEFP,COUNTYFP,TRACTCE,PUMA5CE
0,100,2.161538,1.197337,2453.0,3838.0,132.0,160.0,1406.0,748.0,1872.0,...,3178.0,461.0,12405.0,29746.0,1633.0,23550.0,1,1,20100,2100
1,101,1.969199,2.744565,2084.0,165.0,326.0,53.0,741.0,579.0,616.0,...,2108.0,525.0,3126.0,8825.0,601.0,6638.0,1,1,20200,2100
2,102,2.222179,3.388744,2520.0,197.0,382.0,52.0,703.0,732.0,628.0,...,1002.0,308.0,3652.0,10447.0,507.0,6311.0,1,1,20300,2100
3,103,2.096533,3.106207,1754.0,191.0,238.0,30.0,545.0,631.0,534.0,...,1207.0,304.0,2666.0,8043.0,412.0,4603.0,1,1,20400,2100
4,104,2.182327,3.108139,2603.0,63.0,101.0,10.0,236.0,817.0,286.0,...,593.0,150.0,2133.0,5393.0,394.0,3757.0,1,1,20500,2100


In [15]:
acs["Census_Tract"] = (acs["Geography"].str.replace("Census Tract ", "").str.split(",").str[0].astype(float)*100).astype(int)
acs["County"] = acs["Geography"].str.replace("Census Tract ", "").str.split(",").str[1]
acs["State"] = acs["Geography"].str.replace("Census Tract ", "").str.split(",").str[2]
acs_to_merge = acs[["Census_Tract", "County", "State", "Id", "Id2","Margin of Error; Total", "Estimate; Total"]]
acs_to_merge.head()

Unnamed: 0,Census_Tract,County,State,Id,Id2,Margin of Error; Total,Estimate; Total
0,20100,Autauga County,Alabama,1400000US01001020100,1001020100,231,2010
1,20200,Autauga County,Alabama,1400000US01001020200,1001020200,281,2196
2,20300,Autauga County,Alabama,1400000US01001020300,1001020300,355,3136
3,20400,Autauga County,Alabama,1400000US01001020400,1001020400,464,4563
4,20500,Autauga County,Alabama,1400000US01001020500,1001020500,655,10529


In [16]:
# acs_ipums = acs_to_merge.join(ipums_merged_ptc, lsuffix = "Census_Tract", rsuffix = "TRACTCE", how = "left", sort = True)
# acs_ipums = acs_ipums.drop(acs_ipums[["TRACTCE", "PUMA5CE"]], axis = 1)

# print("\nacs_to_merge shape", acs_to_merge.shape,
#       "\nipums_merged_ptc shape", ipums_merged_ptc.shape,
#       "\nacs_ipums shape", acs_ipums.shape)

# acs_ipums.head(n=10)


acs_to_merge shape (74001, 7) 
ipums_merged_ptc shape (982, 41) 
acs_ipums shape (74001, 46)


Unnamed: 0,Census_Tract,County,State,Id,Id2,Margin of Error; Total,Estimate; Total,PUMA,FAMSIZE,YRSUSA1,...,EDUCATION_4,EDUCATION_5,EDUCATION_6,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3,STATEFP,COUNTYFP
0,20100,Autauga County,Alabama,1400000US01001020100,1001020100,231,2010,100.0,2.161538,1.197337,...,3234.0,3370.0,3178.0,461.0,12405.0,29746.0,1633.0,23550.0,1.0,1.0
1,20200,Autauga County,Alabama,1400000US01001020200,1001020200,281,2196,101.0,1.969199,2.744565,...,820.0,797.0,2108.0,525.0,3126.0,8825.0,601.0,6638.0,1.0,1.0
2,20300,Autauga County,Alabama,1400000US01001020300,1001020300,355,3136,102.0,2.222179,3.388744,...,932.0,994.0,1002.0,308.0,3652.0,10447.0,507.0,6311.0,1.0,1.0
3,20400,Autauga County,Alabama,1400000US01001020400,1001020400,464,4563,103.0,2.096533,3.106207,...,679.0,713.0,1207.0,304.0,2666.0,8043.0,412.0,4603.0,1.0,1.0
4,20500,Autauga County,Alabama,1400000US01001020500,1001020500,655,10529,104.0,2.182327,3.108139,...,517.0,571.0,593.0,150.0,2133.0,5393.0,394.0,3757.0,1.0,1.0
5,20600,Autauga County,Alabama,1400000US01001020600,1001020600,397,3742,105.0,2.130796,3.428072,...,441.0,448.0,599.0,216.0,1640.0,4994.0,224.0,2868.0,1.0,1.0
6,20700,Autauga County,Alabama,1400000US01001020700,1001020700,394,3047,106.0,2.372272,5.540165,...,114.0,126.0,123.0,27.0,433.0,1232.0,62.0,721.0,1.0,1.0
7,20801,Autauga County,Alabama,1400000US01001020801,1001020801,338,3025,107.0,2.60089,6.378065,...,163.0,154.0,174.0,44.0,581.0,1419.0,62.0,931.0,1.0,1.0
8,20802,Autauga County,Alabama,1400000US01001020802,1001020802,664,10743,108.0,2.650866,7.329578,...,124.0,121.0,147.0,31.0,496.0,1326.0,70.0,727.0,1.0,1.0
9,20900,Autauga County,Alabama,1400000US01001020900,1001020900,458,5912,109.0,2.286133,6.400713,...,126.0,114.0,333.0,108.0,552.0,1549.0,61.0,858.0,1.0,1.0


In [17]:
# FCC Doesn't Merge Well
# fcc["tractcode"] = fcc["10"].astype(str).str[5:11]
# fcc.head()

#fcc_providers = pd.DataFrame(fcc.groupby('Census_Tract').Provider_Id.nunique()).reset_index()
#fcc_providers.rename(index=str, columns={"Census_Tract" : "Census_Tract", "Provider_Counts" : "Provider_Counts"})
#fcc_providers.head()

In [19]:

fcc.rename(index=str, columns={"Census_Tract" : "tractcode", "pct_all" : "Percentage_internet"})
fcc.head()

Unnamed: 0,tractcode,pcat_all,pcat_10x1
0,1001020100,4,4
1,1001020200,4,3
2,1001020300,5,4
3,1001020400,5,4
4,1001020500,5,4


In [25]:
master_merged = acs_ipums.merge(fcc, left_on = "Census_Tract", right_on = "tractcode", how = "left")

In [22]:
master_merged

Unnamed: 0,Census_Tract,County,State,Id,Id2,Margin of Error; Total,Estimate; Total,PUMA,FAMSIZE,YRSUSA1,...,EDUCATION_7,EMPLOYMENT_0,EMPLOYMENT_1,EMPLOYMENT_2,EMPLOYMENT_3,STATEFP,COUNTYFP,tractcode,pcat_all,pcat_10x1
0,20100,Autauga County,Alabama,1400000US01001020100,1001020100,231,2010,100.0,2.161538,1.197337,...,461.0,12405.0,29746.0,1633.0,23550.0,1.0,1.0,,,
1,20200,Autauga County,Alabama,1400000US01001020200,1001020200,281,2196,101.0,1.969199,2.744565,...,525.0,3126.0,8825.0,601.0,6638.0,1.0,1.0,,,
2,20300,Autauga County,Alabama,1400000US01001020300,1001020300,355,3136,102.0,2.222179,3.388744,...,308.0,3652.0,10447.0,507.0,6311.0,1.0,1.0,,,
3,20400,Autauga County,Alabama,1400000US01001020400,1001020400,464,4563,103.0,2.096533,3.106207,...,304.0,2666.0,8043.0,412.0,4603.0,1.0,1.0,,,
4,20500,Autauga County,Alabama,1400000US01001020500,1001020500,655,10529,104.0,2.182327,3.108139,...,150.0,2133.0,5393.0,394.0,3757.0,1.0,1.0,,,
5,20600,Autauga County,Alabama,1400000US01001020600,1001020600,397,3742,105.0,2.130796,3.428072,...,216.0,1640.0,4994.0,224.0,2868.0,1.0,1.0,,,
6,20700,Autauga County,Alabama,1400000US01001020700,1001020700,394,3047,106.0,2.372272,5.540165,...,27.0,433.0,1232.0,62.0,721.0,1.0,1.0,,,
7,20801,Autauga County,Alabama,1400000US01001020801,1001020801,338,3025,107.0,2.600890,6.378065,...,44.0,581.0,1419.0,62.0,931.0,1.0,1.0,,,
8,20802,Autauga County,Alabama,1400000US01001020802,1001020802,664,10743,108.0,2.650866,7.329578,...,31.0,496.0,1326.0,70.0,727.0,1.0,1.0,,,
9,20900,Autauga County,Alabama,1400000US01001020900,1001020900,458,5912,109.0,2.286133,6.400713,...,108.0,552.0,1549.0,61.0,858.0,1.0,1.0,,,


In [None]:
acs_ipums.to_csv("Master_Data.csv")