# Migration from Los Angeles County

### Import python tools

In [1]:
import pandas as pd
import jenkspy 
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import json
import os
import seaborn as sns
import altair_latimes as lat
import altair as alt
from pandas.io.json import json_normalize 
alt.renderers.enable('notebook')
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Read data downloaded from IRS

In [2]:
# source_files = sorted(Path('/Users/mhustiles/data/github/notebooks/migration/input/irs/').glob('*.csv'))

# dataframes = []
# for f in source_files:
#     df = pd.read_csv(f, encoding = "ISO-8859-1")
#     df['source'] = f.name
#     df['source'] = df['source'].str.replace('countyoutflow', '').str.replace('.csv', '', regex=False)
#     dataframes.append(df)

# src = pd.concat(dataframes, sort=False)

In [3]:
df_1011 = pd.read_csv('input/irs/countyoutflow1011.csv', sep=',', encoding='latin-1')
df_1112 = pd.read_csv('input/irs/countyoutflow1112.csv', sep=',', encoding='latin-1')
df_1213 = pd.read_csv('input/irs/countyoutflow1213.csv', sep=',', encoding='latin-1')
df_1314 = pd.read_csv('input/irs/countyoutflow1314.csv', sep=',', encoding='latin-1')
df_1415 = pd.read_csv('input/irs/countyoutflow1415.csv', sep=',', encoding='latin-1')
df_1516 = pd.read_csv('input/irs/countyoutflow1516.csv', sep=',', encoding='latin-1')

In [4]:
src = pd.concat([df_1011,df_1112,df_1213,df_1314,df_1415,df_1516], sort=False)

### Process data types and fields

In [5]:
all_years = src.astype({'y1_statefips': 'str',\
                        'y1_countyfips': 'str',\
                        'y2_statefips': 'str',\
                        'y2_countyfips': 'str'}, inplace=True)

In [6]:
all_years['y1_statefips'] = all_years['y1_statefips'].str.zfill(2)
all_years['y1_countyfips'] = all_years['y1_countyfips'].str.zfill(3)
all_years['y2_statefips'] = all_years['y2_statefips'].str.zfill(2)
all_years['y2_countyfips'] = all_years['y2_countyfips'].str.zfill(3)

In [7]:
all_years['y1_fips'] = all_years['y1_statefips'] + all_years['y1_countyfips']
all_years['y2_fips'] = all_years['y2_statefips'] + all_years['y2_countyfips']
all_years['geoid'] = all_years['y2_fips']

In [8]:
all_years.head(10)

Unnamed: 0,y1_statefips,y1_countyfips,y2_statefips,y2_countyfips,y2_state,y2_countyname,n1,n2,agi,y1_fips,y2_fips,geoid
0,0,0,96,0,US,Total Mig - US & For,6991456,12967005,304464050,0,96000,96000
1,0,0,97,0,US,Total Mig - US,6890106,12775807,299138260,0,97000,97000
2,0,0,97,1,US,Total Mig - US Same St,3916380,7260905,157646960,0,97001,97001
3,0,0,97,3,US,Total Mig - US Diff St,2973726,5514902,141491300,0,97003,97003
4,0,0,98,0,US,Total Mig - Foreign,101350,191198,5325790,0,98000,98000
5,1,0,96,0,AL,Total Mig - US & For,99179,202679,3665124,1000,96000,96000
6,1,0,97,0,AL,Total Mig - US,98140,200358,3620373,1000,97000,97000
7,1,0,97,1,AL,Total Mig - US Same St,54577,112357,1832756,1000,97001,97001
8,1,0,97,3,AL,Total Mig - US Diff St,43563,88001,1787617,1000,97003,97003
9,1,0,98,0,AL,Total Mig - Foreign,1039,2321,44751,1000,98000,98000


### Map year 1 FIPS codes

In [9]:
y2 = all_years[['y2_fips','y2_countyname','y2_state']]

In [10]:
fipsgroup = y2.groupby(['y2_fips','y2_countyname','y2_state'])

### Filter national aggregates and people who stayed in LAC

In [11]:
aggregates = ['57', '58', '59', '96', '97', '98']

In [12]:
la_in = all_years[\
#                       (all_years['y2_state'] != 'NY') &\
#                       (all_years['y2_state'] != 'NJ') &\
                      (all_years['y2_fips'] == '06037') &\
                      (all_years['y1_fips'] != '06037') &\
                      (~all_years.y2_statefips.isin(aggregates))]

In [13]:
la_in.head()

Unnamed: 0,y1_statefips,y1_countyfips,y2_statefips,y2_countyfips,y2_state,y2_countyname,n1,n2,agi,y1_fips,y2_fips,geoid
106,1,3,6,37,CA,Los Angeles County,11,19,518,1003,6037,6037
897,1,73,6,37,CA,Los Angeles County,76,114,2576,1073,6037,6037
1281,1,89,6,37,CA,Los Angeles County,34,52,1444,1089,6037,6037
1490,1,97,6,37,CA,Los Angeles County,35,59,1130,1097,6037,6037
1663,1,101,6,37,CA,Los Angeles County,19,36,508,1101,6037,6037


In [14]:
aggregated = all_years[all_years.y2_statefips.isin(aggregates)]

In [15]:
# total = pd.DataFrame(aggregated[(aggregated['y2_countyname'].str.contains('Total Migration-Different State')) &\
#                    (aggregated['y2_statefips'] != '96') &\
#                   (aggregated['y1_countyfips'] != '000')])

In [16]:
# total['merge_fips_norm'] = total['y1_statefips'] + total['y1_countyfips']

In [17]:
# normalization_totals = total.groupby(['merge_fips_norm']).agg({'n1':'sum'}).reset_index()

### Group by locations where tax filers moved

In [18]:
migration = la_in.groupby(['y1_fips', 'geoid']).agg({'n1':'sum'}).reset_index()

In [19]:
migration.rename(columns={'n1':'tax_migrants_to_lac'}, 
                 inplace=True)

In [20]:
migration.head(5)

Unnamed: 0,y1_fips,geoid,tax_migrants_to_lac
0,1003,6037,58
1,1069,6037,12
2,1073,6037,423
3,1081,6037,57
4,1089,6037,252


### How many tax filers who left LAC in total?

In [21]:
migration.tax_migrants_to_lac.sum()

647449

### How many tax filers who left LAC moved to each state?

In [None]:
states = migration.groupby(['y1_fips']).agg('sum').reset_index()

In [27]:
states.sort_values(by='tax_migrants_to_lac',ascending=False).head(10)

Unnamed: 0,y1_fips,tax_migrants_to_lac
58,6059,96859
64,6071,68640
61,6065,39617
65,6073,30482
82,6111,26255
375,32003,17475
45,6029,14333
432,36061,14128
17,4013,13046
198,17031,11459


In [28]:
migration.sort_values(by='tax_migrants_to_lac',ascending=False).head(10)

Unnamed: 0,y1_fips,geoid,tax_migrants_to_lac
58,6059,6037,96859
64,6071,6037,68640
61,6065,6037,39617
65,6073,6037,30482
82,6111,6037,26255
375,32003,6037,17475
45,6029,6037,14333
432,36061,6037,14128
17,4013,6037,13046
198,17031,6037,11459


In [None]:
normal_migration = pd.merge(migration, normalization_totals, left_on='geoid', right_on='merge_fips_norm', how='inner')

In [None]:
normal_migration.head()

In [None]:
normal_migration.rename(columns={'n1':'all_tax_migrants_to_county'}, 
                 inplace=True)

In [None]:
normal_migration.head()

In [None]:
normal_migration['rate_per_1000'] = \
    (normal_migration['tax_migrants_from_lac'] / normal_migration['all_tax_migrants_to_county'])*1000

In [None]:
normal_migration['pct_all_migrants_from_lac'] = \
    (normal_migration['tax_migrants_from_lac'] / 740469)*100

In [None]:
normal_migration.sort_values(by='rate_per_1000', ascending=False).head(10)

In [None]:
normal_migration.sort_values(by='pct_all_migrants_from_lac', ascending=False).head(10)

### Prepare natural breaks for mapping

In [None]:
breaks = jenkspy.jenks_breaks(list(normal_migration.rate_per_1000), nb_class=7)

In [None]:
breaks

In [None]:
def get_group(value):
    for i, b in enumerate(breaks):
        if value <= breaks[i+1]:
            return i

### Attach break groups to dataframe

In [None]:
normal_migration['mover_group'] = normal_migration.rate_per_1000.apply(get_group)

In [None]:
normal_migration.sort_values(by='rate_per_1000',ascending=False).head(20)

---

### Import U.S. counties geography

In [None]:
counties = gpd.read_file('/Users/mhustiles/data/data/GIS/usa/counties_lakes.shp')

In [None]:
counties.columns = counties.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

### Remove Alaska and Hawaii

In [None]:
lower48 = counties[(counties['statefp'] != '15') & (counties['statefp'] != '02')]

In [None]:
lower48.plot()

In [None]:
lower48.head()

---

### Merge geography with migration data

In [None]:
migration_map = pd.merge(lower48, normal_migration, on='geoid')

In [None]:
migration_map.loc[0]

In [None]:
slim = migration_map[['geoid','name','mover_group','rate_per_1000','geometry']]

In [None]:
slim.plot()

In [None]:
geojson = json.loads(slim.to_json())
features = alt.Data(values=geojson['features'])

In [None]:
background = alt.Chart(features).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=500
).project('albersUsa')

In [None]:
base = alt.Chart(features).mark_geoshape(
    fill='lightgray',
    strokeWidth=0.1,
    stroke='white'
).properties(
    width=800,
    height=500
).project('albersUsa')

In [None]:
geoshape = alt.Chart(features).mark_geoshape(
    fill='lightgray',
    stroke='white'
)

In [None]:
movers = geoshape.encode(
    color=alt.Color(
        "properties.mover_group:N",
        scale=alt.Scale(
            domain=[0,1, 2, 3, 4, 5, 6],
            range=lat.palette['schemes']['fire-7']
        ),
        legend=None
    ),
)

In [None]:
(base + movers).properties(title="LA County out migration: 2010-2015").configure_view(
    strokeWidth=0 
)