# IRS `outflow` migration at the county level: 2011-2020

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np
import us

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Read [migration data](https://www.irs.gov/statistics/soi-tax-stats-data-by-geographic-area) from IRS

#### Get `OUTFLOW` file by county from 2011-2020

In [5]:
out_urls = []

for d, i in zip(range(11, 20), range(12, 21)):
    out_url = f"https://www.irs.gov/pub/irs-soi/countyoutflow{str(d) + str(i)}.csv"
    out_urls.append(out_url)

In [6]:
out_urls

['https://www.irs.gov/pub/irs-soi/countyoutflow1112.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1213.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1314.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1415.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1516.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1617.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1718.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1819.csv',
 'https://www.irs.gov/pub/irs-soi/countyoutflow1920.csv']

In [7]:
dataframes = []

for url in out_urls:
    dataframes.append(
        pd.read_csv(
            url,
            encoding="Latin-1",
            dtype={
                "y1_statefips": "str",
                "y1_countyfips": "str",
                "y2_statefips": "str",
                "y2_countyfips": "str",
            },
        ).assign(
            year=url.replace(
                "https://www.irs.gov/pub/irs-soi/countyoutflow", ""
            ).replace(".csv", "")
        )
    )

In [8]:
src = pd.concat(dataframes)

In [9]:
src.head(5)

Unnamed: 0,y1_statefips,y1_countyfips,y2_statefips,y2_countyfips,y2_state,y2_countyname,n1,n2,agi,year
0,1,0,96,0,AL,Total Migration-US and Foreign,115723,242428,4490820,1112
1,1,0,97,0,AL,Total Migration-US,114692,240130,4441354,1112
2,1,0,97,1,AL,Total Migration-Same State,63752,135124,2381712,1112
3,1,0,97,3,AL,Total Migration-Different State,50940,105006,2059642,1112
4,1,0,98,0,AL,Total Migration-Foreign,1031,2298,49465,1112


In [10]:
aggregates = ["57", "58", "59", "96", "97", "98"]

In [11]:
src_df = src[
    (~src["y2_statefips"].isin(aggregates))
    & (~src["y2_countyname"].str.contains("Non-migrants"))
].copy()

In [12]:
src_df.rename(
    columns={
        "n1": "returns",
        "n2": "exemptions",
        "AGI": "adjusted_gross_income",
        "y1_statefips": "origin_state_fips",
        "y1_countyfips": "origin_county_fips",
        "y2_statefips": "destination_state_fips",
        "y2_countyfips": "destination_county_fips",
        "y2_state": "destination_state_abbr",
        "y2_countyname": "destination_county_name",
    },
    inplace=True,
)

#### Get a five-digit FIPS code state and county

In [13]:
src_df["origin_fips"] = src_df["origin_state_fips"] + src_df["origin_county_fips"]
src_df["destination_fips"] = (
    src_df["destination_state_fips"] + src_df["destination_county_fips"]
)

In [14]:
out_grouped = (
    src_df.groupby(
        [
            "destination_fips",
            "destination_county_name",
            "destination_state_abbr",
            "year",
        ]
    )
    .agg({"returns": "sum"})
    .round()
    .sort_values("returns", ascending=False)
    .reset_index()
)

In [15]:
out_grouped.head(10)

Unnamed: 0,destination_fips,destination_county_name,destination_state_abbr,year,returns
0,6037,Los Angeles County,CA,1617,135672
1,6037,Los Angeles County,CA,1112,118900
2,6037,Los Angeles County,CA,1213,118308
3,6037,Los Angeles County,CA,1718,113335
4,6037,Los Angeles County,CA,1920,111728
5,6037,Los Angeles County,CA,1819,111327
6,6037,Los Angeles County,CA,1516,109896
7,6037,Los Angeles County,CA,1314,109736
8,4013,Maricopa County,AZ,1617,98532
9,48453,Travis County,TX,1314,96627


In [16]:
mean_out = (
    out_grouped.groupby(
        [
            "destination_fips",
            "destination_county_name",
            "destination_state_abbr",
        ]
    )
    .agg({"returns": "mean"})
    .round()
    .sort_values("returns", ascending=False)
    .reset_index()
)

In [17]:
mean_out.head(20)

Unnamed: 0,destination_fips,destination_county_name,destination_state_abbr,returns
0,6037,Los Angeles County,CA,113090.0
1,48201,Harris County,TX,76368.0
2,4013,Maricopa County,AZ,74646.0
3,17031,Cook County,IL,69922.0
4,36061,New York County,NY,62614.0
5,48113,Dallas County,TX,62109.0
6,6073,San Diego County,CA,58105.0
7,48453,Travis County,TX,55220.0
8,36047,Kings County,NY,54263.0
9,53033,King County,WA,53849.0


In [18]:
# mean_out.to_csv("data/processed//irs_migration_destinations_mean.csv", index=False)

In [20]:
mean_out.to_csv("data/processed/irs_migration_arrived_to_mean.csv", index=False)
out_grouped.to_csv("data/processed/irs_migration_arrived_to_annual.csv", index=False)