In [None]:
%matplotlib inline

import ast
from functools import partial
import geopandas as gpd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from keplergl import KeplerGl
import matplotlib.pyplot as plt
import pandas as pd

## Datasets
1. Kinki area origin and destination dataset
2. Myanmar 2014 census dataset

## Origin and destination visualization

In [None]:
# loading in the dataset
odf = gpd.read_file("data/kinki/S05-b-12_KINKI_GML/S05-b-12_KINKI-g_PersonTripODAmount.shp")
odf.head()

In [None]:
# year, origin code, destination code, total train trips count, total trips count, geometry
odf[['S05b_002', 'S05b_003', 'S05b_004', 'S05b_010', 'S05b_035', 'geometry']]

In [None]:
# http://nlftp.mlit.go.jp/ksj/gml/codelist/KinkiAreaZoneCd.html

In [None]:
# kobe data
kobe = odf.query('S05b_003 == "71110"')
len(kobe)

In [None]:
kobe.plot(figsize=(15,9));

In [None]:
# retain only necessary columns
odf_necessary = odf[['S05b_003', 'S05b_004',  'S05b_010', 'S05b_035', 'geometry']]
odf_necessary.columns = ['zone01', 'zone02', 'trip_train', 'trip_all', 'geometry']
odf_necessary.head()

### extract latitude and longitude of the Origin & Destination

In [None]:
odf_lat_long = odf_necessary.assign(
    lng_to=lambda odf_necessary: odf_necessary.apply(lambda row: row['geometry'].xy[0][0], axis=1) ,
    lng_from=lambda odf_necessary: odf_necessary.apply(lambda row: row['geometry'].xy[0][1], axis=1) ,
    lat_to=lambda odf_necessary: odf_necessary.apply(lambda row: row['geometry'].xy[1][0], axis=1) ,
    lat_from=lambda odf_necessary: odf_necessary.apply(lambda row: row['geometry'].xy[1][1], axis=1) 
)

In [None]:
odf_lat_long.head()

In [None]:
# reading in the kepler config file
keplerconfig = open("config/kepler_config.txt").read()

In [None]:
keplerconfig

In [None]:
map_personTripOD_curve = KeplerGl(height=600, config=ast.literal_eval(keplerconfig))
map_personTripOD_curve.add_data(data=odf_lat_long, name='ODTrip')
map_personTripOD_curve

In [None]:
map_personTripOD_curve.save_to_html(file_name="TripOD_map.html")

In [None]:
map_personTripOD_curve.config

In [None]:
# saving the current kepler settings
# Note, if you didnt change the rendered output, you dont need to run this cell
# with open("config/kepler_config.txt",'w') as kepler_config_file:
#     kepler_config_file.write(str(map_personTripOD_curve.config))

### Myanmar census dataset

In [None]:
householdf = pd.read_csv("data/census/householdspopulationbaseddatasetsrunion.csv")

In [None]:
householdf

In [None]:
householdf = householdf.drop([0])
householdf

In [None]:
# keep only the columns we need
# state, population in conventional households, mean household size, population on both genders
householdf_needed_cols = householdf[['name_st', 'pop_hh',  'mean_hhsize', 'pop_t']]
householdf_needed_cols

### geocode the dataset

In [None]:
geolocator = Nominatim(user_agent="geopy/1.20.0")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
householdf_needed_cols['location'] = householdf_needed_cols['name_st'].apply(partial(geocode, language='my'))

In [None]:
householdf_needed_cols_lat_long = householdf_needed_cols.assign(
    latitude=lambda householdf_needed_cols: householdf_needed_cols.apply(lambda x: x["location"].latitude, axis=1),
    longitude=lambda householdf_needed_cols: householdf_needed_cols.apply(lambda x: x["location"].longitude, axis=1)

)
householdf_needed_cols_lat_long

### hexbin distribution

In [None]:
# reading in the kepler config file
spatialconfig = open("config/hexbin_config.txt").read()
spatialconfig

In [None]:
myanmar_population_dist = KeplerGl(height=600, data={"householdf_needed_cols_lat_long": householdf_needed_cols_lat_long[["pop_t", "name_st", "latitude", "longitude"]]}, config=ast.literal_eval(spatialconfig))
myanmar_population_dist

In [None]:
myanmar_population_dist.save_to_html(file_name="myanmar_population_dist.html")

### as you can see the geocoding process doesnt always return the correct coordinates

### Homework :-)
1. if you are interested you can correct the coordinates for chin, mon and shan then change the map to show the correct coordinates(one way would be to check if a returned coordinate belongs to a myanmar polygon(*remember the within...?*) before adding it to the dataframe).
2. you can also try to use a polygon instead of a hexbin to have a more accurate graph