## Converting mobility measures into centorids for Kepler interaction

#### Creating Centroid Subplaces for Boroughs, Manhattan is in `Kepler_USA` notebook.

In [1]:
# make display wider
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:88% !important; }</style>"))

In [1]:
# read in packages
%matplotlib inline
import pandas as pd
import json
import geopandas as gpd
import numpy as np
from shapely import wkt
import os
from keplergl import KeplerGl

In [2]:
# allow max rows and colums to be displayed
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
os.chdir(r'C:\Users\steve\GitHub\rp-covid-migration')

In [3]:
subplaces = r'data/shapefiles/nyc_subplace_centroid.geojson'
subplaces = gpd.read_file(subplaces)
subplaces.head()

Unnamed: 0,OBJECTID,STATEFP,COUNTYFP,COUSUBFP,COUSUBNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CNECTAFP,NECTAFP,NCTADVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,PLACEFP,PLACENS,PCICBSA,PCINECTA,Shape_Leng,Shape_Area,ATOTAL,ATOTAL_mi,geoid2,TotIndust,geometry
0,1,9,9,46940,213459,900946940,Middlebury,Middlebury town,43,T1,G4040,720.0,78700.0,,A,45986818.0,1786816.0,41.5246912,-73.1230162,,,,,0.32104,0.005154,47773634.0,18.4454,900946940.0,-234.0,POINT (-73.12283 41.52707)
1,2,9,9,47535,213462,900947535,Milford,Milford town,43,T5,G4040,720.0,71950.0,,C,57442071.0,10218658.0,41.2250861,-73.0611101,,,,,0.48127,0.007266,67660729.0,26.12381,900947535.0,-1743.0,POINT (-73.06185 41.22575)
2,3,9,9,58300,213486,900958300,Oxford,Oxford town,43,T1,G4040,720.0,71950.0,,A,84803121.0,1531057.0,41.4440006,-73.1479992,,,,,0.439399,0.0093,86334178.0,33.33363,900958300.0,248.0,POINT (-73.13503 41.43129)
3,4,9,9,0,0,900900000,County subdivisions not defined,County subdivisions not defined,0,Z9,G4040,,,,F,0.0,599104136.0,41.1874659,-72.8153339,,,,,1.7,0.0643,599104136.0,231.31411,900900000.0,,POINT (-72.79470 41.25792)
4,5,9,9,44560,213454,900944560,Madison,Madison town,43,T1,G4040,720.0,75700.0,,A,93622105.0,1251401.0,41.344481,-72.6245213,,,,,0.739953,0.010206,94873506.0,36.63066,900944560.0,-218.0,POINT (-72.62809 41.34013)


In [6]:
bk_df = pd.read_csv(r'data/dest_bkxsubpl_daily_01-09-2020.csv')
bx_df = pd.read_csv(r'data/dest_bxxsubpl_daily_01-09-2020.csv')
qn_df = pd.read_csv(r'data/dest_qnxsubpl_daily_01-09-2020.csv')
si_df = pd.read_csv(r'data/dest_sixsubpl_daily_01-09-2020.csv')
mn_df = pd.read_csv(r'data/dest_mnxsubpl_daily_01-09-2020.csv')
decimals = 0
brooklyn = 'brooklyn'
bronx = 'bronx'
queens = 'queens'
staten = 'staten'
manhattan = 'manhattan'
def clean_counties(df, county):
    # alter file, convert geoid field to string
    df = df.rename(columns={'dest_subpl':'id'})
    df['id'] = df['id'].str[1:]
    list_date = df.columns.to_list()
    list_date = list_date[1:]  

    df = pd.melt(df, id_vars=['id'],value_vars = list_date)

    df['variable'] = pd.to_datetime(df['variable'])
    df = pd.merge(subplaces, df, left_on='GEOID', right_on='id', how = "inner")
    df['date'] = pd.to_datetime(df['variable'])
    df['date'] = df["date"].dt.strftime('%m/%d/%Y')
    df['datetime'] = df['date'].astype(str) + ' 0:00'
    #df_centroid.drop(columns = 'geometry')
    #df['geom'] = df['geom'].apply(wkt.loads)
    df['value'] = df['value'].astype(float)
    df['value'] = df['value'].apply(lambda x: round(x, decimals)) # round to nearest whole num
    df['value'] = df['value'].astype(int) # remove decimal
    # selecting rows based on value being greater than 0
    df = df.loc[df['value'] >= 1] 
    #renaming tooltip columns for Kepler.gl hover
    df = df.rename(columns = {'NAME':'Name','date':'Date','value':'Trips/Device'})
    df.to_csv(f'data/{county}_tosubplace.csv', index = False)
    return(df.head())

In [7]:
clean_counties(bk_df, brooklyn)
clean_counties(bx_df, bronx)
clean_counties(qn_df, queens)
clean_counties(si_df, staten)
clean_counties(mn_df, manhattan)

Unnamed: 0,OBJECTID,STATEFP,COUNTYFP,COUSUBFP,COUSUBNS,GEOID,Name,NAMELSAD,LSAD,CLASSFP,MTFCC,CNECTAFP,NECTAFP,NCTADVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,PLACEFP,PLACENS,PCICBSA,PCINECTA,Shape_Leng,Shape_Area,ATOTAL,ATOTAL_mi,geoid2,TotIndust,geometry,id,variable,Trips/Device,Date,datetime
0,1,9,9,46940,213459,900946940,Middlebury,Middlebury town,43,T1,G4040,720,78700,,A,45986818.0,1786816.0,41.5246912,-73.1230162,,,,,0.32104,0.005154,47773634.0,18.4454,900946940.0,-234.0,POINT (-73.12283 41.52707),900946940,2020-01-01,2,01/01/2020,01/01/2020 0:00
1,1,9,9,46940,213459,900946940,Middlebury,Middlebury town,43,T1,G4040,720,78700,,A,45986818.0,1786816.0,41.5246912,-73.1230162,,,,,0.32104,0.005154,47773634.0,18.4454,900946940.0,-234.0,POINT (-73.12283 41.52707),900946940,2020-01-02,2,01/02/2020,01/02/2020 0:00
2,1,9,9,46940,213459,900946940,Middlebury,Middlebury town,43,T1,G4040,720,78700,,A,45986818.0,1786816.0,41.5246912,-73.1230162,,,,,0.32104,0.005154,47773634.0,18.4454,900946940.0,-234.0,POINT (-73.12283 41.52707),900946940,2020-01-03,2,01/03/2020,01/03/2020 0:00
3,1,9,9,46940,213459,900946940,Middlebury,Middlebury town,43,T1,G4040,720,78700,,A,45986818.0,1786816.0,41.5246912,-73.1230162,,,,,0.32104,0.005154,47773634.0,18.4454,900946940.0,-234.0,POINT (-73.12283 41.52707),900946940,2020-01-04,2,01/04/2020,01/04/2020 0:00
4,1,9,9,46940,213459,900946940,Middlebury,Middlebury town,43,T1,G4040,720,78700,,A,45986818.0,1786816.0,41.5246912,-73.1230162,,,,,0.32104,0.005154,47773634.0,18.4454,900946940.0,-234.0,POINT (-73.12283 41.52707),900946940,2020-01-05,2,01/05/2020,01/05/2020 0:00


#### NTA Conversion to centroids to allow viz in Kepler.gl

In [5]:
nta = r'data/shapefiles/nynta.geojson'
nta = gpd.read_file(nta)
# convert to global CRS
nta = nta.to_crs("EPSG:4326")
nta.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [6]:
nta['geometry'] = nta.centroid # make centroid
nta = nta.set_geometry('geometry')
# add independent lat/longs for KeplerGl
nta['lon'] = nta['geometry'].x
nta['lat'] = nta['geometry'].y
#counties.head()
nta.to_file("data/shapefiles/nta_centroid.geojson", driver="GeoJSON")


  """Entry point for launching an IPython kernel.


In [7]:
nta.head()

Unnamed: 0,BoroCode,BoroName,CountyFIPS,NTACode,NTAName,Shape_Leng,Shape_Area,geometry,lon,lat
0,3,Brooklyn,47,BK88,Borough Park,39247.227831,54005020.0,POINT (-73.98866 40.63095),-73.988661,40.63095
1,4,Queens,81,QN51,Murray Hill,33266.904797,52488280.0,POINT (-73.80955 40.76835),-73.809546,40.768352
2,4,Queens,81,QN27,East Elmhurst,19816.711908,19726850.0,POINT (-73.86840 40.76335),-73.868396,40.763352
3,4,Queens,81,QN07,Hollis,20976.335574,22887770.0,POINT (-73.76114 40.71064),-73.761137,40.710639
4,3,Brooklyn,47,BK25,Homecrest,27514.022918,29991970.0,POINT (-73.96433 40.59995),-73.964334,40.599954


In [40]:
dfnta = pd.read_csv(r'data/time_away_ntas_2020.csv')
decimals1 = 1
def nta_kepler(df):
    list_date = df.columns.to_list() 
    list_date = list_date[1:]  
    df = df.rename(columns={'NTA':'id'}) #rename so the join ids are the same
    df = pd.melt(df, id_vars=['id'], value_vars = list_date)
    df.head()
    df['val_pct'] = df['value']*100 # convert it into percentage
    df = pd.merge(nta, df, left_on="NTACode", right_on="id", how="inner")
    df['date'] = pd.to_datetime(df['variable'])
    df['date'] = df["date"].dt.strftime('%m/%d/%Y')
    df['datetime'] = df['date'].astype(str) + ' 0:00'
    df['val_pct'] = df['val_pct'].astype(float)
    df['val_pct'] = df['val_pct'].apply(lambda x: round(x, decimals1)) # round to nearest whole num
    # selecting rows based on value being greater than 0
    df = df.loc[df['val_pct'] >= 0.1] 
    #renaming tooltip columns for Kepler.gl hover
    df = df.rename(columns = {'NTAName':'Neighborhood','date':'Date','val_pct':'percent_away'})
    df.to_csv(f'data/nta_2020.csv', index = False)
    return(df.head())

In [41]:
nta_kepler(dfnta)

Unnamed: 0,BoroCode,BoroName,CountyFIPS,NTACode,Neighborhood,Shape_Leng,Shape_Area,geometry,lon,lat,id,variable,value,percent_away,Date,datetime
0,3,Brooklyn,47,BK88,Borough Park,39247.227831,54005020.0,POINT (-73.98866 40.63095),-73.988661,40.63095,BK88,1/1/20,0.020629,2.1,01/01/2020,01/01/2020 0:00
1,3,Brooklyn,47,BK88,Borough Park,39247.227831,54005020.0,POINT (-73.98866 40.63095),-73.988661,40.63095,BK88,1/2/20,0.018991,1.9,01/02/2020,01/02/2020 0:00
2,3,Brooklyn,47,BK88,Borough Park,39247.227831,54005020.0,POINT (-73.98866 40.63095),-73.988661,40.63095,BK88,1/3/20,0.013498,1.3,01/03/2020,01/03/2020 0:00
3,3,Brooklyn,47,BK88,Borough Park,39247.227831,54005020.0,POINT (-73.98866 40.63095),-73.988661,40.63095,BK88,1/4/20,0.019186,1.9,01/04/2020,01/04/2020 0:00
4,3,Brooklyn,47,BK88,Borough Park,39247.227831,54005020.0,POINT (-73.98866 40.63095),-73.988661,40.63095,BK88,1/5/20,0.021511,2.2,01/05/2020,01/05/2020 0:00


In [38]:
len(dfnta2)

5

In [None]:
df = pd.melt(df, id_vars=['id'], value_vars = list_date)
df['variable'] = pd.to_datetime(df['variable'])
df['val_pct'] = df['value']*100 # convert it into percentage
df.head()