# IRS `inflow` migration at the county level: 2011-2020

#### Import Python tools

In [26]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [27]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np
import us

In [28]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [29]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Read [migration data](https://www.irs.gov/statistics/soi-tax-stats-data-by-geographic-area) from IRS

#### Get `INFLOW` file by county from 2011-2020

In [30]:
out_urls = []

for d, i in zip(range(11, 20), range(12, 21)):
    out_url = f"https://www.irs.gov/pub/irs-soi/countyinflow{str(d) + str(i)}.csv"
    out_urls.append(out_url)

In [31]:
out_urls

['https://www.irs.gov/pub/irs-soi/countyinflow1112.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1213.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1314.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1415.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1516.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1617.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1718.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1819.csv',
 'https://www.irs.gov/pub/irs-soi/countyinflow1920.csv']

In [35]:
dataframes = []

for url in out_urls:
    dataframes.append(
        pd.read_csv(
            url,
            encoding="Latin-1",
            dtype={
                "y1_statefips": "str",
                "y1_countyfips": "str",
                "y2_statefips": "str",
                "y2_countyfips": "str",
            },
        ).assign(
            year=url.replace(
                "https://www.irs.gov/pub/irs-soi/countyinflow", ""
            ).replace(".csv", "")
        )
    )

In [36]:
src = pd.concat(dataframes)

In [37]:
src.head(5)

Unnamed: 0,y2_statefips,y2_countyfips,y1_statefips,y1_countyfips,y1_state,y1_countyname,n1,n2,agi,year
0,1,0,96,0,AL,Total Migration-US and Foreign,114109,238230,4549431,1112
1,1,0,97,0,AL,Total Migration-US,113093,235901,4500247,1112
2,1,0,97,1,AL,Total Migration-Same State,63752,135124,2381712,1112
3,1,0,97,3,AL,Total Migration-Different State,49341,100777,2118535,1112
4,1,0,98,0,AL,Total Migration-Foreign,1016,2329,49184,1112


In [38]:
aggregates = ["57", "58", "59", "96", "97", "98"]

In [40]:
src_df = src[
    (~src["y1_statefips"].isin(aggregates))
    & (~src["y1_countyname"].str.contains("Non-migrants"))
].copy()

In [43]:
src_df.head()

Unnamed: 0,y2_statefips,y2_countyfips,y1_statefips,y1_countyfips,y1_state,y1_countyname,n1,n2,agi,year
11,1,1,1,51,AL,Elmore County,466,1016,18398,1112
12,1,1,1,101,AL,Montgomery County,443,982,15955,1112
13,1,1,1,21,AL,Chilton County,72,192,2228,1112
14,1,1,1,47,AL,Dallas County,52,126,1638,1112
15,1,1,1,73,AL,Jefferson County,46,104,1585,1112


In [44]:
src_df.rename(
    columns={
        "n1": "returns",
        "n2": "exemptions",
        "AGI": "adjusted_gross_income",
        "y1_statefips": "origin_state_fips",
        "y1_countyfips": "origin_county_fips",
        "y2_statefips": "destination_state_fips",
        "y2_countyfips": "destination_county_fips",
        "y1_state": "origin_state_abbr",
        "y1_countyname": "origin_county_name",
    },
    inplace=True,
)

In [45]:
src_df["origin_fips"] = src_df["origin_state_fips"] + src_df["origin_county_fips"]
src_df["destination_fips"] = (
    src_df["destination_state_fips"] + src_df["destination_county_fips"]
)

In [56]:
in_grouped = (
    src_df.groupby(
        [
            "origin_fips",
            "origin_county_name",
            "origin_state_abbr",
            "year",
        ]
    )
    .agg({"returns": "sum"})
    .round()
    .sort_values("returns", ascending=False)
    .reset_index()
)

In [57]:
in_grouped.head(10)

Unnamed: 0,origin_fips,origin_county_name,origin_state_abbr,year,returns
0,6037,Los Angeles County,CA,1617,177367
1,6037,Los Angeles County,CA,1920,166493
2,6037,Los Angeles County,CA,1718,147119
3,6037,Los Angeles County,CA,1819,145204
4,6037,Los Angeles County,CA,1213,134512
5,6037,Los Angeles County,CA,1112,132218
6,6037,Los Angeles County,CA,1516,131648
7,6037,Los Angeles County,CA,1314,124942
8,17031,Cook County,IL,1617,123072
9,48201,Harris County,TX,1617,107754


In [58]:
mean_in = (
    in_grouped.groupby(
        [
            "origin_fips",
            "origin_county_name",
            "origin_state_abbr",
        ]
    )
    .agg({"returns": "mean"})
    .round()
    .sort_values("returns", ascending=False)
    .reset_index()
)

In [59]:
mean_in.head(20)

Unnamed: 0,origin_fips,origin_county_name,origin_state_abbr,returns
0,6037,Los Angeles County,CA,140088.0
1,17031,Cook County,IL,90530.0
2,48201,Harris County,TX,79482.0
3,36061,New York County,NY,76950.0
4,36047,Kings County,NY,66414.0
5,48113,Dallas County,TX,65270.0
6,6073,San Diego County,CA,60134.0
7,4013,Maricopa County,AZ,58957.0
8,6059,Orange County,CA,57989.0
9,36081,Queens County,NY,55012.0


In [60]:
# mean_in.to_csv("data/processed/irs_migration_origins_mean.csv", index=False)

In [62]:
mean_in.to_csv("data/processed/irs_migration_left_from_mean.csv", index=False)
in_grouped.to_csv("data/processed/irs_migration_left_from_annual.csv", index=False)