In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_columns', None)

# MI Townships 09_23_22

## Background:
- We received a data request from a news editor for a newspaper in Michigan
- He was interested in 2010 census and 2020 census total population data for a number of townships across 7 counties in MI
- The RDH does not have 2010 or 2020 census data aggregated to the minor civil divison level (which would contain township statistics).

## Approach:
- Download 2010 and 2020 census data
- Make a subset of just MCD data in the needed counties
- Join that data to the total population data for the MCD
- Subset down to needed columns
    - Going to include water and land area to give some sense of possible annexations
- Export files

## Counties / MCDs of interest:
#### Note: These are quoted from the email
- Here are the townships in BAY COUNTY I need numbers for: 
Hampton, Merritt

- Here are the townships in GENESEE COUNTY I need numbers for:
Forest, Thetford

- Here are the townships in HURON COUNTY I need numbers for:
Bingham, Bloomfield, Brookfield, Caseville, Chandler, Colfax, Dwight, Fairhaven, Gore, Grant, Hume, Huron, Lake, Lincoln, McKinley, Meade, Oliver, Paris, Pointe Aux Barques, Port Austin, Rubicon, Sand Beach, Sebewaing, Sheridan, Sherman, Sigel, Verona, Winsor

- Here are the townships in LAPEER COUNTY I need numbers for: 
Burlington, Deerfield, Marathon, North Branch, Rich

- Here are the townships in SAGINAW COUNTY I need numbers for: Birch Run, Blumfield, Frankenmuth

- Here are the townships in SANILAC COUNTY I need numbers for:
Argyle, Austin, Bridgehampton, Buel, Custer, Delaware, Elk, Elmer, Evergreen, Flynn, Forester, Fremont, Greenleaf, Lamotte, Lexington, Maple Valley, Marion, Marlette, Minden, Moore, Sanilac, Speaker, Washington, Watertown, Wheatland, Worth

- Here are the townships in TUSCOLA COUNTY I need numbers for:
Akron, Almer, Arbela, Columbia, Dayton, Denmark, Elkland, Ellington, Elmwood, Fairgrove, Fremont, Gilford, Indianfields, Juniata, Kingston, Koylton, Millington, Novesta, Tuscola, Vassar, Watertown, Wells, Wisner

## Links to Download Raw Files
- 2010 PL data
    - Link: https://www2.census.gov/census_2010/01-Redistricting_File--PL_94-171/Michigan/
    - Note: Download "mi2010.pl.zip" and then unzip the file
- 2020 PL data:
    - Link: https://www2.census.gov/programs-surveys/decennial/2020/data/01-Redistricting_File--PL_94-171/Michigan/
    - Note: Download "mi2020.pl.zip" and then unzip the file

#### Note: A full "raw-from-source" file is also available upon request. Please email info@redistrictingdatahub.org

In [3]:
# FIPS list of the relevant counties
fips_list = [17, 49, 63, 87, 145, 151, 157]

fips_name_dict = {17:"Bay", 49:"Genesee", 63:"Huron", 87:"Lapeer", 145:"Saginaw", 151:"Sanilac", 157:"Tuscola"}

## 2020 Data

In [4]:
# Load the geography information from the 2020 release
mi_data = pd.read_csv("./raw-from-source/mi2020.pl/migeo2020.pl",delimiter="|", header = None)

# Load the population data from the first table from the 2020 release
mi_pop_data_1 = pd.read_csv("./raw-from-source/mi2020.pl/mi000012020.pl", delimiter = "|", header = None)

# Rename columns
mi_data.rename(columns = {7:"LOGRECNO", 14:"COUNTY", 84:"AREALAND", 85:'AREAWTR', 87:"NAME"}, inplace = True)
mi_pop_data_1.rename(columns={4:"LOGRECNO", 5:"TOT_POP"}, inplace = True)

# Filter down to the townships
townships = mi_data[mi_data[2]==60]

# Join the townships with the population data
township_pop_data_2020 = pd.merge(townships,mi_pop_data_1, how = "left", on = "LOGRECNO", indicator = True)

# Filter down to the needed columns
township_pop_data_2020 = township_pop_data_2020[["COUNTY","NAME","TOT_POP","AREALAND",'AREAWTR']]

# Change the COUNTY column from a FIP to a name
township_pop_data_2020_filtered = township_pop_data_2020[township_pop_data_2020["COUNTY"].isin(fips_list)]

# Change the COUNTY column from a FIP to a name
township_pop_data_2020_filtered["COUNTY"] = township_pop_data_2020_filtered["COUNTY"].map(fips_name_dict)

# Export to CSV
township_pop_data_2020_filtered.to_csv("./mi_2020_selected_townships_pop.csv", index = False)


  mi_data = pd.read_csv("./raw-from-source/mi2020.pl/migeo2020.pl",delimiter="|", header = None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  township_pop_data_2020_filtered["COUNTY"] = township_pop_data_2020_filtered["COUNTY"].map(fips_name_dict)


## 2010 Data

Note: The 2010 geography data is a bit difficult to load as there are fixed widths for each column.

The fixed lengths and column names are loaded below

In [5]:
col_length = [6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18]
col_name = ['FILEID', 'STUSAB', 'SUMLEV', 'GEOCOMP', 'CHARITER', 'CIFSN', 'LOGRECNO', 'REGION', 'DIVISION', 'STATE', 'COUNTY', 'COUNTYCC', 'COUNTYSC', 'COUSUB', 'COUSUBCC', 'COUSUBSC', 'PLACE', 'PLACECC', 'PLACESC', 'TRACT', 'BLKGRP', 'BLOCK', 'IUC', 'CONCIT', 'CONCITCC', 'CONCITSC', 'AIANHH', 'AIANHHFP', 'AIANHHCC', 'AIHHTLI', 'AITSCE', 'AITS', 'AITSCC', 'TTRACT', 'TBLKGRP', 'ANRC', 'ANRCCC', 'CBSA', 'CBSASC', 'METDIV', 'CSA', 'NECTA', 'NECTASC', 'NECTADIV', 'CNECTA', 'CBSAPCI', 'NECTAPCI', 'UA', 'UASC', 'UATYPE', 'UR', 'CD', 'SLDU', 'SLDL', 'VTD', 'VTDI', 'RESERVE2', 'ZCTA5', 'SUBMCD', 'SUBMCDCC', 'SDELM', 'SDSEC', 'SDUNI', 'AREALAND', 'AREAWATR', 'NAME', 'FUNCSTAT', 'GCUNI', 'POP100', 'HU100', 'INTPTLAT', 'INTPTLON', 'LSADC', 'PARTFLAG', 'RESERVE3', 'UGA', 'STATENS', 'COUNTYNS', 'COUSUBNS', 'PLACENS', 'CONCITNS', 'AIANHHNS', 'AITSNS', 'ANRCNS', 'SUBMCDNS', 'CD113', 'CD114', 'CD115', 'SLDU2', 'SLDU3', 'SLDU4', 'SLDL2', 'SLDL3', 'SLDL4', 'AIANHHSC', 'CSASC', 'CNECTASC', 'MEMI', 'NMEMI', 'PUMA', 'RESERVED']

# Create a dictionary mapping from index to the column name
col_rename_dict = {i:col_name[i] for i in range(0,101)}

In [6]:
# Load in the geography portion of the 2010 PL data
mi_data_2010 = pd.read_fwf("./raw-from-source/mi2010.pl/migeo2010.pl", header = None, widths = col_length, index_col = False)

# Load in the first section of the 2010 PL data, with total population numbers
mi_data_2010_1 = pd.read_csv("./raw-from-source/mi2010.pl/mi000012010.pl", header = None)

# Rename the columns
mi_data_2010.rename(columns = col_rename_dict, inplace = True)

# Filter down to townships
townships_2010 = mi_data_2010[mi_data_2010["SUMLEV"]==60]

# Rename the columns
mi_data_2010_1.rename(columns = {4:"LOGRECNO", 5:"TOT_POP"}, inplace = True)

# Merge the geographies and the population data
township_pop_2010 = pd.merge(townships_2010, mi_data_2010_1, how = "left", on = "LOGRECNO", indicator = True)

# Filter down to appropriate columns
township_pop_2010 = township_pop_2010[["COUNTY","NAME","TOT_POP","AREALAND", "AREAWATR"]]

#Filter down to relevant counties
township_pop_2010_filtered = township_pop_2010[township_pop_2010["COUNTY"].isin(fips_list)]

# Change from FIPS to county name
township_pop_2010_filtered["COUNTY"] = township_pop_2010_filtered["COUNTY"].map(fips_name_dict)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  township_pop_2010_filtered["COUNTY"] = township_pop_2010_filtered["COUNTY"].map(fips_name_dict)


In [7]:
# Export just the data in the relevant counties
township_pop_2010_filtered.to_csv("./mi_2010_selected_townships_pop.csv", index = False)