In [None]:
import pandas as pd
from pathlib import Path
import numpy as np
import os


In [None]:
root_path = Path(os.getcwd())
data_path = root_path.parent / "data" / "recvd_net_vars_v7_20180829.csv"

In [None]:
with open(data_path, "r") as f:
    cols = f.readline().strip().split(",")

cols

## We need the following variables:

### Identifying characteristics
- adr_net_behid_u_2014
- adr_net_dunsnumber_x_2014
- adr_net_behloc_x_2014

### Time characteristics
- adr_net_firstyear_x_2014
- adr_net_lastyear_x_2014

### NETS categories
- Medical Neighborhoods - adr_net_acth_c_2014 - ACT (All clinical treatment)
- Walking Destinations Neighborhoods - adr_net_walh_c_2014 -WAL (walkability)
- Fast food Neighborhoods - adr_net_ffah_c_2014 - FFA (all fast food)

### Geographic characteristics
- c10_cen_uid_u_2010
- m10_cen_uid_u_2010

Only within the Philadelphia CBSA, 37980

# Reading / Tidying

In [None]:
cols = ["adr_net_dunsnumber_x_2014",
"adr_net_behid_u_2014",
"adr_net_firstyear_x_2014",
"adr_net_lastyear_x_2014",
"adr_net_acth_c_2014",
"adr_net_walh_c_2014",
"adr_net_ffah_c_2014",
"t10_cen_uid_u_2010",
"m10_cen_uid_u_2010"]

df = pd.read_csv(data_path, chunksize=10**6, usecols=cols)


try:
    df_philly = pd.read_csv(root_path.parent / "data" / "nets_philly_ACT_FFA_WAL.csv", usecols=cols)

# If we don't already have it, make it and write it to disk
except IOError:
    df_philly = pd.DataFrame()
    for chunk in df:
        df_philly = df_philly.append(chunk[chunk["m10_cen_uid_u_2010"] == "37980"])

    df_philly.to_csv(root_path.parent / "data" / "nets_philly_ACT_FFA_WAL.csv")
   

In [None]:
len(df_philly)

In [None]:
df_philly.c10_cen_uid_u_2010.unique()

In [None]:
grouped_census = df_philly.groupby([ "adr_net_dunsnumber_x_2014", "c10_cen_uid_u_2010"]).agg({
    'adr_net_firstyear_x_2014': min,
    'adr_net_lastyear_x_2014': max
}) \
    .set_axis(["enter_year", "exit_year"], axis="columns", inplace=False)

grouped_census.head()

In [None]:
len(grouped_census)

In [None]:
enter_year = grouped_census.reset_index(drop=False).groupby(["c10_cen_uid_u_2010", "enter_year"]) \
    .size() \
    .rename_axis(["c10_cen_uid_u_2010", "year"], axis="index") \
    .rename("enter_year")
    
exit_year = grouped_census.reset_index(drop=False).groupby(["c10_cen_uid_u_2010", "exit_year"]) \
    .size() \
    .rename_axis(["c10_cen_uid_u_2010", "year"], axis="index") \
    .rename("exit_year")

In [None]:
len(exit_year)

In [None]:
df_all = pd.concat([enter_year, exit_year], axis=1) \
    .reset_index(drop=False) \
    .assign(year = lambda x: pd.to_datetime(x.year, format="%Y")) \
    .set_index(["c10_cen_uid_u_2010", "year"])

In [None]:
ten_yr = df_all.groupby(level=0).resample("10Y", level=1).sum()
ten_yr.loc[(slice(None), slice("1/1/1990", "12/31/2014")),:]

##### groupy