## Converting mobility measures into centorids for Kepler interaction

#### Creating Centroid Subplaces for Boroughs, Manhattan is in `Kepler_USA` notebook.

In [1]:
# make display wider
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:88% !important; }</style>"))

In [2]:
# read in packages
%matplotlib inline
import pandas as pd
import json
import geopandas as gpd
import numpy as np
from shapely import wkt
import os
from keplergl import KeplerGl

In [None]:
# allow max rows and colums to be displayed
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
os.chdir(r'C:\Users\steve\GitHub\rp-covid-migration')

In [None]:
subplaces = r'data/shapefiles/nyc_subplace_centroid.geojson'
subplaces = gpd.read_file(subplaces)
subplaces.head()

In [None]:
bk_df = pd.read_csv(r'data/dest_bkxsubpl_daily_01-09-2020.csv')
bx_df = pd.read_csv(r'data/dest_bxxsubpl_daily_01-09-2020.csv')
qn_df = pd.read_csv(r'data/dest_qnxsubpl_daily_01-09-2020.csv')
si_df = pd.read_csv(r'data/dest_sixsubpl_daily_01-09-2020.csv')
decimals = 0
brooklyn = 'brooklyn'
bronx = 'bronx'
queens = 'queens'
staten = 'staten'
def clean_counties(df, county):
    # alter file, convert geoid field to string
    df = df.rename(columns={'dest_subpl':'id'})
    df['id'] = df['id'].str[1:]
    list_date = df.columns.to_list()
    list_date = list_date[1:]  

    df = pd.melt(df, id_vars=['id'],value_vars = list_date)

    df['variable'] = pd.to_datetime(df['variable'])
    df = pd.merge(subplaces, df, left_on='GEOID', right_on='id', how = "inner")
    df['date'] = pd.to_datetime(df['variable'])
    df['date'] = df["date"].dt.strftime('%m/%d/%Y')
    df['datetime'] = df['date'].astype(str) + ' 0:00'
    #df_centroid.drop(columns = 'geometry')
    #df['geom'] = df['geom'].apply(wkt.loads)
    df['value'] = df['value'].astype(float)
    df['value'] = df['value'].apply(lambda x: round(x, decimals)) # round to nearest whole num
    df['value'] = df['value'].astype(int) # remove decimal
    # selecting rows based on value being greater than 0
    df = df.loc[df['value'] >= 1] 
    #renaming tooltip columns for Kepler.gl hover
    df = df.rename(columns = {'NAME':'Name','date':'Date','value':'Trips/Device'})
    df.to_csv(f'data/{county}_tosubplace.csv', index = False)
    return(df.head())

In [None]:
clean_counties(bk_df, brooklyn)
clean_counties(bx_df, bronx)
clean_counties(qn_df, queens)
clean_counties(si_df, staten)


#### NTA Conversion to centroids to allow viz in Kepler.gl

In [None]:
nta = r'data/shapefiles/nynta.geojson'
nta = gpd.read_file(nta)
# convert to global CRS
nta = nta.to_crs("EPSG:4326")
nta.crs

In [None]:
nta['geometry'] = nta.centroid # make centroid
nta = nta.set_geometry('geometry')
# add independent lat/longs for KeplerGl
nta['lon'] = nta['geometry'].x
nta['lat'] = nta['geometry'].y
#counties.head()
nta.to_file("data/shapefiles/nta_centroid.geojson", driver="GeoJSON")

In [None]:
nta.head()

In [None]:
away = pd.read_csv(r'data/time_away_ntas_2020.csv')
df = pd.merge(nta, away, left_on="NTACode", right_on="NTA", how="inner")
df.head()

In [None]:

list_date = df.columns.to_list() 

#dff = pd.melt(df, id_vars=['NTA'], value_vars = list_date)
df = df.rename(columns={'NTA':'id'}) #rename so the join ids are the same
df['id'] = df['id'].str
df = pd.melt(df, id_vars=['id'], value_vars = list_date)
df['variable'] = pd.to_datetime(df['variable'])
df['val_pct'] = df['value']*100 # convert it into percentage
df.head()