## Preparing Real-World Mobility Data

#### Processing and binning mobility data

In [9]:
import math
import numpy as np
import dask.dataframe as dd
import numpy.random
import pandas as pd
import time
import configparser
import xarray

import geopandas as gpd

In [13]:
# load data into DataFrame
df_path = "counts_merged.csv"
df = pd.read_csv(df_path)

Data Frame to tensor/matrix
- Bin tensor (poi_cbg, dt, dist_to_fire_bin modes)
- Do not bin matrix (poi_cbg mode: pop2019, poi_cnt), use I * 2 matrix

In [16]:
# Bin dist_to_fire_m for tensor

dist_to_fire_m_bin = pd.qcut(df.loc[:,'dist_to_fire_m'], 10, labels = np.arange(1, 11))
df_bin = df
df_bin['dist_to_fire_m_bin'] = dist_to_fire_m_bin

In [17]:
# Create tensor
df_tensor = df_bin.loc[:, ('poi_cbg', 'dt','dist_to_fire_m_bin', 'raw_visit_counts')]
pd_tensor = df_tensor.set_index(['poi_cbg', 'dt','dist_to_fire_m_bin'])

# remove duplicates
dupe_index_tensor = pd_tensor.index.duplicated(keep="first")
pd_tensor = pd_tensor[~dupe_index_tensor]
    
pd_tensor = pd_tensor.sort_index()
xa_tensor = pd_tensor.to_xarray()

np_tensor = xa_tensor.to_array().to_numpy()
np_tensor = np.squeeze(np_tensor)
np_tensor = np.nan_to_num(np_tensor) # convert nan's to zeroes

In [19]:
# Create matrix
df_matrix = df_bin.loc[:, ('poi_cbg', 'pop2019', 'poi_cnt')]
pd_matrix = df_matrix.set_index(['poi_cbg'])

# remove duplicates
dupe_index_matrix = pd_matrix.index.duplicated(keep="first")
pd_matrix = pd_matrix[~dupe_index_matrix]

pd_matrix = pd_matrix.sort_index()
xa_matrix = pd_matrix.to_xarray()

np_matrix = xa_matrix.to_array().to_numpy()
np_matrix = np.squeeze(np_matrix)
np_matrix = np.nan_to_num(np_matrix)
np_matrix = np_matrix.transpose()

In [22]:
import scipy.io

# Save tensor and matrix
scipy.io.savemat('mobility_data_tensor_matrix.mat', dict(tensor = np_tensor, matrix = np_matrix))