In [None]:
import pandas as pd
import numpy as np
import boto3

from shapely.geometry.polygon import Polygon
from shapely.geometry import box
from shapely.geometry import Polygon, LineString, Point
import shapely.wkt
import geopandas as gpd
import json

import folium
import plotly.express as px
import matplotlib.pyplot as plt


import skmob
from skmob.preprocessing import filtering
from skmob.preprocessing import detection

import datetime

# Find Home Location

## find weekday home location

In [None]:
# v1
"""
create table m_hk_table2_weekday as
with df as(
select * from m_hk_filter_v2_b
where (day_of_week(timestamp) between 1 and 5) and (length(cast(latitude as varchar)) > 5) and (length(cast(longitude as varchar)) > 6)
),

df1 as (
SELECT *, ST_AsText(st_point(round(longitude,4),round(latitude,4))) location, date(distinct timestamp) date
FROM (select * from df where (hour(timestamp) between 0 and 8) or (hour(timestamp) between 20 and 23) )
),

df2 as (
select ifa, location, min(latitude) latitude, min(longitude) longitude, count(distinct date) ndays from df1
group by ifa, location
),

df3 as(
select *, ROW_NUMBER() OVER(PARTITION BY ifa ORDER BY ndays desc) rn from df2
),

df4 as(
select * from df3
where rn <=3
)

select a.*, b.name as hk_district, b.geometry hk_district_geometry from df4 a cross join m_hk_districts b
where ST_Contains(ST_Polygon(b.geometry), a.location)
"""
# number of unique ifa: 6174205
print(f"there are {6174205/23223382 *100}% of ifa have home location")


In [None]:
"""
select hk_district, count(distinct ifa) nifa from m_hk_table2_weekday
where rn = 1
group by hk_district
"""
hk_district = pd.read_csv('./data/2_hk_district_council.csv',sep="|")
df1 = pd.read_csv('./data/2_nifa_weekday_home_in_hk_district_v2.csv')



In [None]:
df_merge = pd.merge(df1,hk_district.loc[:,'Name':],how='left',left_on = 'hk_district',right_on='Name')

fig = px.histogram(df_merge.sort_values('hk_district'), x="Name", y=["nifa",'PopulationCensus2016-06-30'],
              barmode='group',
             height=400)
fig.show()

# find the closet building for the home location

In [None]:
# add district to buildings
"""
create table m_hk_district_building as
select a.*, b.name district from m_hk_buildings_v2 a inner join m_hk_districts b
on ST_Contains(ST_GeometryFromText(b.geometry), ST_Point(a.centroid_lon,a.centroid_lat))
"""
# there are 48600 buildings in m_hk_district_building
# there are 48603 buildings in m_hk_buildings_polygon

## find buildings for all home location (1,2,3)

In [None]:
# find buildings around
"""
create table m_hk_table2_weekday_v2 as
with a as(
select * from m_hk_table2_weekday
where (hk_district = 'Central and Western') 
),

b as(
select * from m_hk_district_building
where district = 'Central and Western'
)

select a.*, b.building_id, b.centroid_lat building_lat,b.centroid_lon building_lon, 
b.name building_name, b.tag building_tag,
great_circle_distance(a.latitude, a.longitude, b.centroid_lat,b.centroid_lon) dist 
from a 
inner join 
b 
on great_circle_distance(a.latitude, a.longitude, b.centroid_lat,b.centroid_lon)<=0.3
"""

# ...
"""
insert into m_hk_table2_weekday_v2 
with a as(
select * from m_hk_table2_weekday
where (hk_district = 'Southern') 
),

b as(
select * from m_hk_district_building
where district = 'Southern'
)

select a.*, b.building_id, b.centroid_lat building_lat,b.centroid_lon building_lon, 
b.name building_name, b.tag building_tag,
great_circle_distance(a.latitude, a.longitude, b.centroid_lat,b.centroid_lon) dist 
from a 
inner join 
b 
on great_circle_distance(a.latitude, a.longitude, b.centroid_lat,b.centroid_lon)<=0.3
"""

# number of unique ifa: 6099439
6099439/23223382
# there are some ifa don't have home building

In [None]:
# select the closest building
"""
create table m_hk_table2_weekday_v2_2 as
with df1 as(
select *, ROW_NUMBER() OVER (PARTITION BY ifa, rn ORDER BY dist ASC) dist_rn from m_hk_table2_weekday_v2
)
select * from df1 where dist_rn = 1
"""
# number of unique ifa: 6099439


In [None]:
# check building types
"""
select distinct building_tag from m_hk_table2_weekday_v2_2 
"""


# find the residential building from the top 3 frequent location
"""
create table m_hk_table2_weekday_v2_3 as
with df1 as(
select * from m_hk_table2_weekday_v2_2
where (building_tag = 'dormitory') or (building_tag = 'residential') or (building_tag = 'hotel') or 
(building_tag = 'apartments') or (building_tag = 'house') or (building_tag = 'houseboat') or  
(building_tag = 'nursing_home')  or 
(building_tag = '觀瀾雅軒 Baycrest') or (building_tag = 'building') or (building_tag = 'building]') 
or (building_tag = 'stilt_house') or (building_tag = 'shed') or (building_tag = 'pavilion') or 
(building_tag = 'podium') or (building_tag = 'terrace') or (building_tag = 'TS') or 
(building_tag = 'detached') or (building_tag = 'bungalow') or (building_tag = 'hut')
),
df2 as(
select *, ROW_NUMBER() OVER(PARTITION BY ifa ORDER BY rn ASC) home_rn from df1
)


select * from df2 where home_rn = 1
"""
# number of rows: 5439173
# number of distinct ifa: 5439173
5439173/23223382

In [None]:
# df with building_tag = building
"""
select hk_district, count(distinct ifa) from m_hk_table2_weekday_v2_3
group by hk_district
"""
df = pd.read_csv('./data/2_nifa_weekday_home_in_hk_homebuilding_district.csv') # with building_tag = building

# df2 without building_tag = building
"""
with df1 as(
select * from m_hk_table2_weekday_v2_2
where (building_tag = 'dormitory') or (building_tag = 'residential') or (building_tag = 'hotel') or (building_tag = 'apartments') or (building_tag = 'house') or (building_tag = 'houseboat') or  (building_tag = 'nursing_home')  or 
(building_tag = '觀瀾雅軒 Baycrest') 
or (building_tag = 'stilt_house') or (building_tag = 'shed') or (building_tag = 'pavilion') or 
(building_tag = 'podium') or (building_tag = 'terrace') or (building_tag = 'TS') or 
(building_tag = 'detached') or (building_tag = 'bungalow') or (building_tag = 'hut')
),
df2 as(
select *, ROW_NUMBER() OVER(PARTITION BY ifa ORDER BY rn ASC) home_rn from df1
)

select hk_district, count(distinct ifa) from df2
where home_rn = 1
group by hk_district
"""
df2 = pd.read_csv('./data/2_nifa_weekday_home_in_hk_homebuilding_district_v2.csv') # without building_tag = building
df.columns = ['hk_district','num_ifa']
df2.columns = ['hk_district','num_ifa']
df_merge = pd.merge(df,hk_district.loc[:,'Name':],how='left',left_on = 'hk_district',right_on='Name')
df_merge2 = pd.merge(df_merge,df2,how='left',left_on = 'hk_district',right_on='hk_district',suffixes=('_v1', '_v2'))

fig = px.histogram(df_merge2, x="Name", y=['num_ifa_v2',"num_ifa_v1",'PopulationCensus2016-06-30'],
              barmode='group',
             height=400)
fig.show()

In [None]:
# with building_tag = building
fig = px.histogram(df_merge2, x="Name", y=["num_ifa_v1",'PopulationCensus2016-06-30'],
              barmode='group',
             height=400)
fig.show()

In [None]:
# without building_tag = building
fig = px.histogram(df_merge2, x="Name", y=['num_buildings_v2','PopulationCensus2016-06-30'],
              barmode='group',
             height=400)
fig.show()

# Merge Home data with daily unique ifa Mobility data

## merge data

In [None]:
"""
create table m_hk_filter_w_home_v1 as 
with df1 as(
select a.ifa, a.timestamp,a.latitude,a.longitude, b.name as hk_district 
from m_hk_filter_v2_b a cross join m_hk_districts b
where ST_Contains(ST_Polygon(b.geometry), st_point(a.longitude,a.latitude))
)

select distinct ifa, date(timestamp) date, latitude, longitude, hk_district from df1
"""

In [None]:
"""
create table m_hk_filter_w_home_v2 as
select b.*, a.latitude home_latitude, a.longitude home_longitude,a.hk_district home_district, 
a.building_id, a.building_lat, a.building_lon, a.building_tag 
from m_hk_table2_weekday_v2_3 a 
join m_hk_filter_w_home_v1 b 
on a.ifa = b.ifa
"""

## flowmap with unique ifa

### district level 

In [None]:
"""
select date, home_district, hk_district, count(distinct ifa) from m_hk_filter_w_home_v2 
group by date, home_district, hk_district
"""

### specific building

In [None]:
# 嘉悅半島 The Sea Crest
"""
select date, building_id, building_lat, building_lon, round(latitude,3) latitude, round(longitude,3) longitude, count(distinct ifa) from m_hk_filter_w_home_v2 
where building_id = 223136834
group by date, building_id, building_lat, building_lon, round(latitude,3), round(longitude,3)
"""
df = pd.read_csv('data/2_m_hk_filtered_home_specificbuilding223136834_movement_v2.csv')

In [None]:
location = df.loc[:,'latitude':'longitude'].drop_duplicates().reset_index()
df_merge = pd.merge(df,location,how='left',on=['latitude','longitude'])
# df_merge = df_merge.loc[df_merge['_col6']>10]
df_merge.loc[:,['building_id','index','_col6','date']].to_csv('./data/flowmap.csv')

location2 = df.loc[:,'building_id':'building_lon'].drop_duplicates()
location2.columns = location.columns
location = pd.concat([location,location2])
location = location.reset_index(drop=True)
location.to_csv('./data/location.csv')

##  movements of specific ifa

### ifa=3b45468c-8516-47db-afb6-8f5b5a201e36

In [None]:
"""
select ifa, timestamp, latitude,longitude from m_hk_filter_v2_b 
where ifa = '3b45468c-8516-47db-afb6-8f5b5a201e36'
"""
ifa = pd.read_csv('./data/2_ifa=3b45468c-8516-47db-afb6-8f5b5a201e36.csv')
tdf = skmob.TrajDataFrame(ifa[['ifa','timestamp','latitude','longitude']], latitude='latitude', longitude = 'longitude', datetime='timestamp', user_id='ifa')
ifa.loc[pd.to_datetime(ifa['timestamp'])> datetime.datetime(2022,1,1)]

In [None]:
m = tdf.plot_trajectory( start_end_markers=False)
tdf.plot_stops(max_users=1, map_f=m)


### ifa=adbf8587-4415-4d41-8bd3-5b8d69552917.csv

In [None]:
"""
select ifa, timestamp, latitude,longitude from m_hk_filter_v2_b 
where ifa = 'adbf8587-4415-4d41-8bd3-5b8d69552917'
"""
ifa = pd.read_csv('./data/2_ifa=adbf8587-4415-4d41-8bd3-5b8d69552917.csv')
tdf = skmob.TrajDataFrame(ifa[['ifa','timestamp','latitude','longitude']], latitude='latitude', longitude = 'longitude', datetime='timestamp', user_id='ifa')
ifa.loc[pd.to_datetime(ifa['timestamp'])> datetime.datetime(2022,1,1)]

In [None]:
m = tdf.plot_trajectory( start_end_markers=False)
tdf.plot_stops(max_users=1, map_f=m)


# Merge Home data with daily obervations