# IRS `net` migration at the county level: 2011-2020

#### Import Python tools

In [2]:
%load_ext lab_black

In [3]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np
import us

In [4]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [5]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Read [migration data](https://www.irs.gov/statistics/soi-tax-stats-data-by-geographic-area) from IRS. Processed in `00` and `01` notebooks. 

In [13]:
inflow = pd.read_csv(
    "data/processed/irs_migration_arrived_to_annual.csv",
    dtype={"destination_fips": str},
)

In [14]:
outflow = pd.read_csv(
    "data/processed/irs_migration_left_from_annual.csv", dtype={"origin_fips": str}
)

In [15]:
outflow.head()

Unnamed: 0,origin_fips,origin_county_name,origin_state_abbr,year,returns
0,6037,Los Angeles County,CA,1617,177367
1,6037,Los Angeles County,CA,1920,166493
2,6037,Los Angeles County,CA,1718,147119
3,6037,Los Angeles County,CA,1819,145204
4,6037,Los Angeles County,CA,1213,134512


In [23]:
src = (
    pd.merge(
        outflow,
        inflow,
        left_on=["origin_fips", "year"],
        right_on=["destination_fips", "year"],
    )
    .rename(columns={"returns_x": "left", "returns_y": "arrived"})
    .copy()
)

In [24]:
src[src["origin_county_name"] == "Los Angeles County"]

Unnamed: 0,origin_fips,origin_county_name,origin_state_abbr,year,left,destination_fips,destination_county_name,destination_state_abbr,arrived
0,6037,Los Angeles County,CA,1617,177367,6037,Los Angeles County,CA,135672
1,6037,Los Angeles County,CA,1920,166493,6037,Los Angeles County,CA,111728
2,6037,Los Angeles County,CA,1718,147119,6037,Los Angeles County,CA,113335
3,6037,Los Angeles County,CA,1819,145204,6037,Los Angeles County,CA,111327
4,6037,Los Angeles County,CA,1213,134512,6037,Los Angeles County,CA,118308
5,6037,Los Angeles County,CA,1112,132218,6037,Los Angeles County,CA,118900
6,6037,Los Angeles County,CA,1516,131648,6037,Los Angeles County,CA,109896
7,6037,Los Angeles County,CA,1314,124942,6037,Los Angeles County,CA,109736
11,6037,Los Angeles County,CA,1415,101293,6037,Los Angeles County,CA,88909


In [25]:
src["net"] = src["arrived"] - src["left"]

In [28]:
df = (
    src[
        [
            "origin_fips",
            "origin_county_name",
            "origin_state_abbr",
            "left",
            "arrived",
            "net",
            "year",
        ]
    ]
    .rename(
        columns={
            "origin_fips": "fips",
            "origin_county_name": "county_name",
            "origin_state_abbr": "state_abbr",
        }
    )
    .sort_values("net", ascending=False)
    .copy()
)

In [38]:
df_grouped = (
    df.groupby(["fips", "county_name", "state_abbr"])
    .agg({"left": sum, "arrived": sum, "net": sum})
    .sort_values("net", ascending=False)
    .reset_index()
)

---

## Exports

In [39]:
df_grouped.to_csv("data/processed/irs_migration_decade_net.csv", index=False)