
# Organize and Cleanse City Demographic Data
  1. import metro area Demographic ("ACS_12_1YR_CP05" and "ACS_17_1YR_CP05") data. Source:
     * https://factfinder.census.gov/faces/nav/jsf/pages/download_center.xhtml
  2. ACS_12: drop unneeded cities; drop unneeded cols; cleanse data 
  3. ACS_17: drop unneeded cities; drop unneeded cols; cleanse data
  4. merge ACS_12 and ACS_17

In [1]:
import pandas as pd

### 1) import metro area Demographic ("ACS_12_1YR_CP05" and "ACS_17_1YR_CP05") data

In [2]:
# import the US Census's "American Community Survey" (ACS) Metro Area Demographic data: 2012
dfD12 = pd.read_csv('raw/ACS_12_1YR_CP05_with_ann.csv')
dfD12.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC03_VC03,HC04_VC03,HC05_VC03,HC06_VC03,HC07_VC03,...,HC09_VC96,HC01_VC98,HC02_VC98,HC03_VC98,HC04_VC98,HC05_VC98,HC06_VC98,HC07_VC98,HC08_VC98,HC09_VC98
0,Id,Id2,Geography,2012 Estimate; SEX AND AGE - Total population,2011 Estimate; SEX AND AGE - Total population,2012 - 2011 Statistical Significance; SEX AND ...,2010 Estimate; SEX AND AGE - Total population,2012 - 2010 Statistical Significance; SEX AND ...,2009 Estimate; SEX AND AGE - Total population,2012 - 2009 Statistical Significance; SEX AND ...,...,2012 - 2008 Statistical Significance; HISPANIC...,2012 Estimate; HISPANIC OR LATINO AND RACE - T...,2011 Estimate; HISPANIC OR LATINO AND RACE - T...,2012 - 2011 Statistical Significance; HISPANIC...,2010 Estimate; HISPANIC OR LATINO AND RACE - T...,2012 - 2010 Statistical Significance; HISPANIC...,2009 Estimate; HISPANIC OR LATINO AND RACE - T...,2012 - 2009 Statistical Significance; HISPANIC...,2008 Estimate; HISPANIC OR LATINO AND RACE - T...,2012 - 2008 Statistical Significance; HISPANIC...
1,310M100US10180,10180,"Abilene, TX Metro Area",167800,165858,,164941,*,(X),,...,,70553,69887,,69711,,(X),,(X),
2,310M100US10420,10420,"Akron, OH Metro Area",702262,701456,c,702951,c,(X),,...,,312864,313023,,312600,,(X),,(X),
3,310M100US10500,10500,"Albany, GA Metro Area",155019,161617,*,162659,*,(X),,...,,65861,66150,,67169,,(X),,(X),
4,310M100US10580,10580,"Albany-Schenectady-Troy, NY Metro Area",874646,871478,c,870832,c,(X),,...,,394602,395203,,393443,,(X),,(X),


In [3]:
# import the US Census's "American Community Survey" (ACS) Metro Area Demographic data:2017
dfD17 = pd.read_csv('raw/ACS_17_1YR_CP05_with_ann.csv')
dfD17.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC03_VC03,HC04_VC03,HC05_VC03,HC06_VC03,HC07_VC03,...,HC09_VC114,HC01_VC115,HC02_VC115,HC03_VC115,HC04_VC115,HC05_VC115,HC06_VC115,HC07_VC115,HC08_VC115,HC09_VC115
0,Id,Id2,Geography,2017 Estimate; SEX AND AGE - Total population,2016 Estimate; SEX AND AGE - Total population,2017 - 2016 Statistical Significance; SEX AND ...,2015 Estimate; SEX AND AGE - Total population,2017 - 2015 Statistical Significance; SEX AND ...,2014 Estimate; SEX AND AGE - Total population,2017 - 2014 Statistical Significance; SEX AND ...,...,"2017 - 2013 Statistical Significance; CITIZEN,...","2017 Estimate; CITIZEN, VOTING AGE POPULATION ...","2016 Estimate; CITIZEN, VOTING AGE POPULATION ...","2017 - 2016 Statistical Significance; CITIZEN,...","2015 Estimate; CITIZEN, VOTING AGE POPULATION ...","2017 - 2015 Statistical Significance; CITIZEN,...","2014 Estimate; CITIZEN, VOTING AGE POPULATION ...","2017 - 2014 Statistical Significance; CITIZEN,...","2013 Estimate; CITIZEN, VOTING AGE POPULATION ...","2017 - 2013 Statistical Significance; CITIZEN,..."
1,310M300US10180,10180,"Abilene, TX Metro Area",169747,170860,,168922,,166900,,...,,49.9,48.7,*,49.3,,49.1,*,49.2,
2,310M300US10420,10420,"Akron, OH Metro Area",703505,702221,c,704243,c,703825,c,...,*,52.0,52.0,,52.2,,52.4,*,52.4,*
3,310M300US10500,10500,"Albany, GA Metro Area",151754,152506,,156997,*,152596,,...,,53.9,54.2,,54.6,,54.0,,53.4,
4,310M300US10540,10540,"Albany, OR Metro Area",125047,122849,c,120547,c,119356,c,...,,51.0,51.4,,51.0,,51.1,,51.1,


In [4]:
# eliminate " (Metro Area)" suffix in each row: 2012
dfD12['GEO.display-label'] = dfD12['GEO.display-label'].str.replace(' Metro Area','')
dfD12['GEO.display-label'].unique()

array(['Geography', 'Abilene, TX', 'Akron, OH', 'Albany, GA',
       'Albany-Schenectady-Troy, NY', 'Albuquerque, NM', 'Alexandria, LA',
       'Allentown-Bethlehem-Easton, PA-NJ', 'Altoona, PA', 'Amarillo, TX',
       'Ames, IA', 'Anchorage, AK', 'Anderson, IN', 'Anderson, SC',
       'Ann Arbor, MI', 'Anniston-Oxford, AL', 'Appleton, WI',
       'Asheville, NC', 'Athens-Clarke County, GA',
       'Atlanta-Sandy Springs-Marietta, GA',
       'Atlantic City-Hammonton, NJ', 'Auburn-Opelika, AL',
       'Augusta-Richmond County, GA-SC',
       'Austin-Round Rock-San Marcos, TX', 'Bakersfield-Delano, CA',
       'Baltimore-Towson, MD', 'Bangor, ME', 'Barnstable Town, MA',
       'Baton Rouge, LA', 'Battle Creek, MI', 'Bay City, MI',
       'Beaumont-Port Arthur, TX', 'Bellingham, WA', 'Bend, OR',
       'Billings, MT', 'Binghamton, NY', 'Birmingham-Hoover, AL',
       'Bismarck, ND', 'Blacksburg-Christiansburg-Radford, VA',
       'Bloomington, IN', 'Bloomington-Normal, IL',
       'Boise

In [5]:
# eliminate " (Metro Area)" suffix in each row: 2017
dfD17['GEO.display-label'] = dfD17['GEO.display-label'].str.replace(' Metro Area','')
dfD17['GEO.display-label'].unique()

array(['Geography', 'Abilene, TX', 'Akron, OH', 'Albany, GA',
       'Albany, OR', 'Albany-Schenectady-Troy, NY', 'Albuquerque, NM',
       'Alexandria, LA', 'Allentown-Bethlehem-Easton, PA-NJ',
       'Altoona, PA', 'Amarillo, TX', 'Ames, IA', 'Anchorage, AK',
       'Ann Arbor, MI', 'Anniston-Oxford-Jacksonville, AL',
       'Appleton, WI', 'Asheville, NC', 'Athens-Clarke County, GA',
       'Atlanta-Sandy Springs-Roswell, GA', 'Atlantic City-Hammonton, NJ',
       'Auburn-Opelika, AL', 'Augusta-Richmond County, GA-SC',
       'Austin-Round Rock, TX', 'Bakersfield, CA',
       'Baltimore-Columbia-Towson, MD', 'Bangor, ME',
       'Barnstable Town, MA', 'Baton Rouge, LA', 'Battle Creek, MI',
       'Bay City, MI', 'Beaumont-Port Arthur, TX', 'Beckley, WV',
       'Bellingham, WA', 'Bend-Redmond, OR', 'Billings, MT',
       'Binghamton, NY', 'Birmingham-Hoover, AL', 'Bismarck, ND',
       'Blacksburg-Christiansburg-Radford, VA', 'Bloomington, IL',
       'Bloomington, IN', 'Bloomsburg-

In [6]:
# import the city file and group it
dfCities = pd.read_csv('cities.csv')
grpCities = dfCities.groupby('ACSMetro')
dfCities = grpCities.count()
dfCities

Unnamed: 0_level_0,BEAMetro,WikipediaMetro,City,State
ACSMetro,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Atlanta-Sandy Springs-Marietta, GA",1,1,1,1
"Baltimore-Towson, MD",1,1,1,1
"Boston-Cambridge-Quincy, MA-NH",1,1,1,1
"Buffalo-Niagara Falls, NY",1,1,1,1
"Charlotte-Gastonia-Rock Hill, NC-SC",1,1,1,1
"Chicago-Joliet-Naperville, IL-IN-WI",1,1,1,1
"Cincinnati-Middletown, OH-KY-IN",1,1,1,1
"Cleveland-Elyria-Mentor, OH",1,1,1,1
"Columbus, OH",1,1,1,1
"Dallas-Fort Worth-Arlington, TX",2,2,2,2


## 2) ACS_12: drop unneeded cities; drop unneeded cols; cleanse data 

In [7]:
# use the grouped city DataFrame to drop unneeded rows from dfGDP via merge
dfD12 = pd.merge(dfD12, dfCities, left_on='GEO.display-label', right_on='ACSMetro')
dfD12.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC03_VC03,HC04_VC03,HC05_VC03,HC06_VC03,HC07_VC03,...,HC04_VC98,HC05_VC98,HC06_VC98,HC07_VC98,HC08_VC98,HC09_VC98,BEAMetro,WikipediaMetro,City,State
0,310M100US12060,12060,"Atlanta-Sandy Springs-Marietta, GA",5442113,5365726,*,5288302,*,(X),,...,2168806,*,(X),,(X),,1,1,1,1
1,310M100US12580,12580,"Baltimore-Towson, MD",2753149,2729110,c,2714183,c,(X),,...,1133012,*,(X),,(X),,1,1,1,1
2,310M100US14460,14460,"Boston-Cambridge-Quincy, MA-NH",4640802,4591112,c,4560689,c,(X),,...,1883591,,(X),,(X),,1,1,1,1
3,310M100US15380,15380,"Buffalo-Niagara Falls, NY",1134210,1134039,c,1135198,c,(X),,...,519128,,(X),,(X),,1,1,1,1
4,310M100US16740,16740,"Charlotte-Gastonia-Rock Hill, NC-SC",1831084,1795472,c,1764313,c,(X),,...,741184,*,(X),,(X),,1,1,1,1


In [8]:
# import ACS metadata file
dfD12Meta = pd.read_csv('raw/ACS_12_1YR_CP05_metadata.csv', header=None, names=['col','description'])
dfD12Meta.head(10)

Unnamed: 0,col,description
0,GEO.id,Id
1,GEO.id2,Id2
2,GEO.display-label,Geography
3,HC01_VC03,2012 Estimate; SEX AND AGE - Total population
4,HC02_VC03,2011 Estimate; SEX AND AGE - Total population
5,HC03_VC03,2012 - 2011 Statistical Significance; SEX AND ...
6,HC04_VC03,2010 Estimate; SEX AND AGE - Total population
7,HC05_VC03,2012 - 2010 Statistical Significance; SEX AND ...
8,HC06_VC03,2009 Estimate; SEX AND AGE - Total population
9,HC07_VC03,2012 - 2009 Statistical Significance; SEX AND ...


In [9]:
# create col to identify multi-year rows in metadata file
dfD12Meta['droprow'] = dfD12Meta['description'].str.contains('\d\d\d\d - \d\d\d\d')
dfD12Meta.head(10)

Unnamed: 0,col,description,droprow
0,GEO.id,Id,False
1,GEO.id2,Id2,False
2,GEO.display-label,Geography,False
3,HC01_VC03,2012 Estimate; SEX AND AGE - Total population,False
4,HC02_VC03,2011 Estimate; SEX AND AGE - Total population,False
5,HC03_VC03,2012 - 2011 Statistical Significance; SEX AND ...,True
6,HC04_VC03,2010 Estimate; SEX AND AGE - Total population,False
7,HC05_VC03,2012 - 2010 Statistical Significance; SEX AND ...,True
8,HC06_VC03,2009 Estimate; SEX AND AGE - Total population,False
9,HC07_VC03,2012 - 2009 Statistical Significance; SEX AND ...,True


In [10]:
# drop multi-year rows from metadata file
dfD12Meta = dfD12Meta.loc[dfD12Meta['droprow'] == False]
dfD12Meta = dfD12Meta[['col','description']]
dfD12Meta.head(10)

Unnamed: 0,col,description
0,GEO.id,Id
1,GEO.id2,Id2
2,GEO.display-label,Geography
3,HC01_VC03,2012 Estimate; SEX AND AGE - Total population
4,HC02_VC03,2011 Estimate; SEX AND AGE - Total population
6,HC04_VC03,2010 Estimate; SEX AND AGE - Total population
8,HC06_VC03,2009 Estimate; SEX AND AGE - Total population
10,HC08_VC03,2008 Estimate; SEX AND AGE - Total population
12,HC01_VC04,2012 Estimate; SEX AND AGE - Male
13,HC02_VC04,2011 Estimate; SEX AND AGE - Male


In [11]:
# drop unneeded cols
dfD12 = dfD12[dfD12Meta['col']]
dfD12.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC04_VC03,HC06_VC03,HC08_VC03,HC01_VC04,HC02_VC04,...,HC01_VC96,HC02_VC96,HC04_VC96,HC06_VC96,HC08_VC96,HC01_VC98,HC02_VC98,HC04_VC98,HC06_VC98,HC08_VC98
0,310M100US12060,12060,"Atlanta-Sandy Springs-Marietta, GA",5442113,5365726,5288302,(X),(X),48.7,48.9,...,1.6,1.7,1.4,(X),(X),2175303,2169873,2168806,(X),(X)
1,310M100US12580,12580,"Baltimore-Towson, MD",2753149,2729110,2714183,(X),(X),48.1,48.1,...,2.2,2.2,2.0,(X),(X),1139559,1138113,1133012,(X),(X)
2,310M100US14460,14460,"Boston-Cambridge-Quincy, MA-NH",4640802,4591112,4560689,(X),(X),48.5,48.4,...,1.5,1.6,1.5,(X),(X),1885961,1891039,1883591,(X),(X)
3,310M100US15380,15380,"Buffalo-Niagara Falls, NY",1134210,1134039,1135198,(X),(X),48.4,48.4,...,1.6,1.7,1.8,(X),(X),518979,518757,519128,(X),(X)
4,310M100US16740,16740,"Charlotte-Gastonia-Rock Hill, NC-SC",1831084,1795472,1764313,(X),(X),48.5,48.4,...,2.1,1.6,1.8,(X),(X),747886,742559,741184,(X),(X)


In [12]:
# eliminate "(X)" values in DataFrame
for strCol in dfD12.columns:
    dfD12[strCol] = dfD12[strCol].str.replace('\(X\)','')
dfD12.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC04_VC03,HC06_VC03,HC08_VC03,HC01_VC04,HC02_VC04,...,HC01_VC96,HC02_VC96,HC04_VC96,HC06_VC96,HC08_VC96,HC01_VC98,HC02_VC98,HC04_VC98,HC06_VC98,HC08_VC98
0,310M100US12060,12060,"Atlanta-Sandy Springs-Marietta, GA",5442113,5365726,5288302,,,48.7,48.9,...,1.6,1.7,1.4,,,2175303,2169873,2168806,,
1,310M100US12580,12580,"Baltimore-Towson, MD",2753149,2729110,2714183,,,48.1,48.1,...,2.2,2.2,2.0,,,1139559,1138113,1133012,,
2,310M100US14460,14460,"Boston-Cambridge-Quincy, MA-NH",4640802,4591112,4560689,,,48.5,48.4,...,1.5,1.6,1.5,,,1885961,1891039,1883591,,
3,310M100US15380,15380,"Buffalo-Niagara Falls, NY",1134210,1134039,1135198,,,48.4,48.4,...,1.6,1.7,1.8,,,518979,518757,519128,,
4,310M100US16740,16740,"Charlotte-Gastonia-Rock Hill, NC-SC",1831084,1795472,1764313,,,48.5,48.4,...,2.1,1.6,1.8,,,747886,742559,741184,,


## 3) ACS_17: drop unneeded cities; drop unneeded cols; cleanse data 

In [13]:
# use the grouped city DataFrame to drop unneeded rows from dfGDP via merge
dfD17 = pd.merge(dfD17, dfCities, left_on='GEO.display-label', right_on='ACSMetro')
dfD17.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC03_VC03,HC04_VC03,HC05_VC03,HC06_VC03,HC07_VC03,...,HC04_VC115,HC05_VC115,HC06_VC115,HC07_VC115,HC08_VC115,HC09_VC115,BEAMetro,WikipediaMetro,City,State
0,310M300US18140,18140,"Columbus, OH",2078725,2041520,c,2021632,c,1994536,c,...,51.4,,51.4,,51.6,,1,1,1,1
1,310M300US19100,19100,"Dallas-Fort Worth-Arlington, TX",7400479,7232599,*,7102165,*,6954003,*,...,52.2,*,52.1,,52.1,,2,2,2,2
2,310M300US24580,24580,"Green Bay, WI",320050,318236,c,316519,c,314531,c,...,50.8,,50.7,,50.6,,1,1,1,1
3,310M300US27260,27260,"Jacksonville, FL",1504980,1478212,c,1449481,c,1419127,c,...,52.1,,51.8,,52.1,,1,1,1,1
4,310M300US28140,28140,"Kansas City, MO-KS",2126945,2104115,*,2088269,*,2070221,*,...,52.0,*,51.8,,51.8,,1,1,1,1


In [14]:
# import ACS metadata file
dfD17Meta = pd.read_csv('raw/ACS_17_1YR_CP05_metadata.csv', header=None, names=['col','description'])
dfD17Meta.head(10)

Unnamed: 0,col,description
0,GEO.id,Id
1,GEO.id2,Id2
2,GEO.display-label,Geography
3,HC01_VC03,2017 Estimate; SEX AND AGE - Total population
4,HC02_VC03,2016 Estimate; SEX AND AGE - Total population
5,HC03_VC03,2017 - 2016 Statistical Significance; SEX AND ...
6,HC04_VC03,2015 Estimate; SEX AND AGE - Total population
7,HC05_VC03,2017 - 2015 Statistical Significance; SEX AND ...
8,HC06_VC03,2014 Estimate; SEX AND AGE - Total population
9,HC07_VC03,2017 - 2014 Statistical Significance; SEX AND ...


In [15]:
# create col to identify multi-year rows in metadata file
dfD17Meta['droprow'] = dfD17Meta['description'].str.contains('\d\d\d\d - \d\d\d\d')
dfD17Meta.head(10)

Unnamed: 0,col,description,droprow
0,GEO.id,Id,False
1,GEO.id2,Id2,False
2,GEO.display-label,Geography,False
3,HC01_VC03,2017 Estimate; SEX AND AGE - Total population,False
4,HC02_VC03,2016 Estimate; SEX AND AGE - Total population,False
5,HC03_VC03,2017 - 2016 Statistical Significance; SEX AND ...,True
6,HC04_VC03,2015 Estimate; SEX AND AGE - Total population,False
7,HC05_VC03,2017 - 2015 Statistical Significance; SEX AND ...,True
8,HC06_VC03,2014 Estimate; SEX AND AGE - Total population,False
9,HC07_VC03,2017 - 2014 Statistical Significance; SEX AND ...,True


In [16]:
# drop multi-year rows from metadata file
dfD17Meta = dfD17Meta.loc[dfD17Meta['droprow'] == False]
dfD17Meta = dfD17Meta[['col','description']]
dfD17Meta.head(10)

Unnamed: 0,col,description
0,GEO.id,Id
1,GEO.id2,Id2
2,GEO.display-label,Geography
3,HC01_VC03,2017 Estimate; SEX AND AGE - Total population
4,HC02_VC03,2016 Estimate; SEX AND AGE - Total population
6,HC04_VC03,2015 Estimate; SEX AND AGE - Total population
8,HC06_VC03,2014 Estimate; SEX AND AGE - Total population
10,HC08_VC03,2013 Estimate; SEX AND AGE - Total population
12,HC01_VC04,2017 Estimate; SEX AND AGE - Total population ...
13,HC02_VC04,2016 Estimate; SEX AND AGE - Total population ...


In [17]:
# drop unneeded cols
dfD17 = dfD17[dfD17Meta['col']]
dfD17.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC04_VC03,HC06_VC03,HC08_VC03,HC01_VC04,HC02_VC04,...,HC01_VC114,HC02_VC114,HC04_VC114,HC06_VC114,HC08_VC114,HC01_VC115,HC02_VC115,HC04_VC115,HC06_VC115,HC08_VC115
0,310M300US18140,18140,"Columbus, OH",2078725,2041520,2021632,1994536,1967066,49.1,49.2,...,48.4,48.5,48.6,48.6,48.4,51.6,51.5,51.4,51.4,51.6
1,310M300US19100,19100,"Dallas-Fort Worth-Arlington, TX",7400479,7232599,7102165,6954003,6812373,49.2,49.2,...,48.0,47.9,47.8,47.9,47.9,52.0,52.1,52.2,52.1,52.1
2,310M300US24580,24580,"Green Bay, WI",320050,318236,316519,314531,312409,49.9,50.1,...,49.3,49.6,49.2,49.3,49.4,50.7,50.4,50.8,50.7,50.6
3,310M300US27260,27260,"Jacksonville, FL",1504980,1478212,1449481,1419127,1394624,48.6,48.7,...,48.0,47.9,47.9,48.2,47.9,52.0,52.1,52.1,51.8,52.1
4,310M300US28140,28140,"Kansas City, MO-KS",2126945,2104115,2088269,2070221,2052048,49.2,49.1,...,48.3,48.3,48.0,48.2,48.2,51.7,51.7,52.0,51.8,51.8


In [18]:
# eliminate "(X)" values in DataFrame
for strCol in dfD17.columns:
    dfD17[strCol] = dfD17[strCol].str.replace('\(X\)','')
dfD17

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC03,HC02_VC03,HC04_VC03,HC06_VC03,HC08_VC03,HC01_VC04,HC02_VC04,...,HC01_VC114,HC02_VC114,HC04_VC114,HC06_VC114,HC08_VC114,HC01_VC115,HC02_VC115,HC04_VC115,HC06_VC115,HC08_VC115
0,310M300US18140,18140,"Columbus, OH",2078725,2041520,2021632,1994536,1967066,49.1,49.2,...,48.4,48.5,48.6,48.6,48.4,51.6,51.5,51.4,51.4,51.6
1,310M300US19100,19100,"Dallas-Fort Worth-Arlington, TX",7400479,7232599,7102165,6954003,6812373,49.2,49.2,...,48.0,47.9,47.8,47.9,47.9,52.0,52.1,52.2,52.1,52.1
2,310M300US24580,24580,"Green Bay, WI",320050,318236,316519,314531,312409,49.9,50.1,...,49.3,49.6,49.2,49.3,49.4,50.7,50.4,50.8,50.7,50.6
3,310M300US27260,27260,"Jacksonville, FL",1504980,1478212,1449481,1419127,1394624,48.6,48.7,...,48.0,47.9,47.9,48.2,47.9,52.0,52.1,52.1,51.8,52.1
4,310M300US28140,28140,"Kansas City, MO-KS",2126945,2104115,2088269,2070221,2052048,49.2,49.1,...,48.3,48.3,48.0,48.2,48.2,51.7,51.7,52.0,51.8,51.8
5,310M300US32820,32820,"Memphis, TN-MS-AR",1346626,1346437,1343572,1344121,1343850,48.0,47.9,...,46.7,46.7,46.5,46.5,46.7,53.3,53.3,53.5,53.5,53.3
6,310M300US33340,33340,"Milwaukee-Waukesha-West Allis, WI",1576236,1572482,1575747,1572245,1569659,48.8,48.7,...,48.0,47.8,47.7,47.7,47.7,52.0,52.2,52.3,52.3,52.3
7,310M300US33460,33460,"Minneapolis-St. Paul-Bloomington, MN-WI",3600618,3551036,3524583,3495176,3459146,49.6,49.5,...,49.0,48.9,48.7,48.7,48.7,51.0,51.1,51.3,51.3,51.3
8,310M300US34980,34980,"Nashville-Davidson--Murfreesboro--Franklin, TN",1904226,1865535,1830298,1792468,1757424,48.7,48.6,...,47.9,47.7,47.7,47.6,47.9,52.1,52.3,52.3,52.4,52.1
9,310M300US36420,36420,"Oklahoma City, OK",1383737,1373211,1358452,1336767,1319677,49.4,49.3,...,48.3,48.5,48.1,48.1,48.3,51.7,51.5,51.9,51.9,51.7


## 4) merge ACS_12 and ACS_17

In [19]:
dfDemo = pd.merge(dfD12, dfD17, on='GEO.display-label')