# Get data for population change, by county, 1990-2020

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import urllib.request, json
import us
import time

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

---

## Get data

#### Read redistricting population totals from the [U.S. Census API](https://www.census.gov/data/developers/data-sets/decennial-census.2020.html#list-tab-X1A4XQYAKRNL6H3EPY)

In [4]:
url_2000 = (
    "https://api.census.gov/data/2000/dec/sf1?get=P001001,NAME&for=county:*&in=state:*"
)
url_2010 = (
    "https://api.census.gov/data/2010/dec/sf1?get=P001001,NAME&for=county:*&in=state:*"
)
url_2020 = (
    "https://api.census.gov/data/2020/dec/pl?get=P1_001N,NAME&for=county:*&in=state:*"
)

#### Loop through the URLs, snag returned data and place in a list of dataframes

In [5]:
# flattened_list = [item for sublist in nested_list for item in sublist]

In [6]:
# flattened_list = []
# for sublist in nested_list:
#     for item in sublist:
#         flattened_list.append(item)

In [23]:
census_urls = [url_2000, url_2010, url_2020]

In [7]:
%%time

dataframes = []
for u in census_urls:
    with urllib.request.urlopen(u) as url:
        dataframes.append(
            pd.DataFrame(
                json.loads(url.read().decode()),
                columns=["population", "county_name", "state_fips", "county_fips"],
            ).assign(
                year=u.replace("https://api.census.gov/data/", "")
                .replace("/dec/pl?get=P1_001N,NAME&for=county:*&in=state:*", "")
                .replace("/dec/sf1?get=P001001,NAME&for=county:*&in=state:*", "")
            )
        )

CPU times: user 215 ms, sys: 24.9 ms, total: 240 ms
Wall time: 21.9 s


#### The bureau doesn't have an API for 1990, so read manually

In [8]:
df_1990 = pd.read_csv(
    "data/raw/us_census_redist_1990_2000_change.csv", dtype={"fips": str}
)

#### The data are formatted differently, so change to match API 

In [9]:
postal_to_name = us.states.mapping("abbr", "name")
df_1990["state_name"] = df_1990["state_abbr"].map(postal_to_name)

In [10]:
df_1990["state_fips"] = df_1990["fips"].str[:2]
df_1990["county_fips"] = df_1990["fips"].str[2:]

In [11]:
df_1990["county_name"] = df_1990["county_name"] + ", " + df_1990["state_name"]

In [12]:
df_1990.drop(["state_name", "state_abbr", "fips"], axis=1, inplace=True)

In [13]:
df_1990.head()

Unnamed: 0,county_name,population,year,state_fips,county_fips
0,"Maricopa County, Arizona",2122101,1990,4,13
1,"Los Angeles County, California",8863164,1990,6,37
2,"Clark County, Nevada",741459,1990,32,3
3,"Harris County, Texas",2818199,1990,48,201
4,"Orange County, California",2410556,1990,6,59


#### Concatenate the list of dataframes

In [14]:
src_df = pd.concat(dataframes).reset_index(drop=True)
src = pd.concat([src_df, df_1990]).reset_index(drop=True)

In [15]:
src.value_counts("year")

year
2010    3222
2020    3222
2000    3220
1990    3141
dtype: int64

#### Get rid of headers we don't need

In [16]:
df = src[src["county_name"] != "NAME"].copy()

#### Get state from the county column

In [17]:
df[["county_name", "state_name"]] = df["county_name"].str.split(", ", expand=True)

#### AP states

In [18]:
name_to_ap = us.states.mapping("name", "ap_abbr")

In [19]:
df["ap_name"] = df["state_name"].map(name_to_ap)

#### Re-order and slim the dataframe

In [20]:
df_slim = df[
    [
        "state_fips",
        "county_fips",
        "year",
        "county_name",
        "state_name",
        "ap_name",
        "population",
    ]
].copy()

#### Did we get enough counties for each year? 

In [21]:
df_slim.year.value_counts()

2010    3221
2020    3221
2000    3219
1990    3141
Name: year, dtype: int64

---

## Export

#### All three years. All the counties. 

In [22]:
df_slim.to_csv(
    "data/processed/decennial_census_county_pop_1990-00-10-20.csv", index=False
)