# Application: Creating a Telco Mobility Index

1. Total Travel Distance
2. Radius of Gyration
3. Activity Entropy

In [None]:
import os
os.chdir("../") # cd back to the root folder

In [None]:
import warnings
warnings.filterwarnings("ignore")

import random
import shapely
import pendulum
import numpy as np
import pandas as pd
pd.options.display.max_rows=200
import geopandas as gpd
import matplotlib.pyplot as plt
from sds4gdsp.processor import convert_cel_to_point, calc_haversine_distance
from sds4gdsp.plotter import get_route_fig, load_images, plot_images
from IPython.display import HTML, display
from functools import reduce
import pyproj
from functools import partial

Please load, take a peek, and examine the given datasets

### Fake Subscribers

The subscriber base, **by default**, should have:
- 100 unique subs
- At least 18 years old
- At most 72 years old

In [None]:
filepath_subscribers = "data/fake_subscribers.csv"
dtype = dict(
    gender="category",
    age=int,
    name=str,
    chi_indicator=bool,
    ewallet_user_indicator="category"
)
fake_subscribers = pd.read_csv(filepath_subscribers, dtype=dtype)
fake_subscribers.sample(5)

In [None]:
fake_subscribers.shape

In [None]:
cat_cols = ["gender", "chi_indicator", "ewallet_user_indicator"]
fs_breakdown = fake_subscribers.groupby(cat_cols).size().reset_index(name="cnt")
fs_breakdown.assign(pcnt=fs_breakdown.cnt.div(len(fake_subscribers)).mul(100).round(2))

### Fake Cellsites

The cellsites, **by default**, should:
- be situated in Taguig City, Metro Manila, PH
- have cellsites that are at least 300 meters apart

In [None]:
filepath_cellsites = "data/fake_cellsites.csv"
fake_cellsites = pd.read_csv(filepath_cellsites)
fake_cellsites.sample(5)

In [None]:
fake_cellsites.shape

In [None]:
HTML('<img src="../docs/fake_cellsites.png" width="600" height="600"/>')

### Fake Transactions

The transactions, **by default**, should:
- start the earliest at around 7:00 AM
- cover the whole month of July 2023
- have intra-Taguig transactions only

In [None]:
filepath_transactions = "data/fake_transactions.csv"
fake_transactions = pd.read_csv(filepath_transactions)

In [None]:
fake_transactions.shape

In [None]:
min_date = pendulum.parse(fake_transactions.transaction_dt.min(), exact=True)
max_date = pendulum.parse(fake_transactions.transaction_dt.max(), exact=True)
period = pendulum.period(min_date, max_date)

In [None]:
sample_dt = str(random.sample(list(period), 1)[0])
filter_dt = fake_transactions.transaction_dt == sample_dt
sample_sub = fake_subscribers.sub_uid.sample(1).item()
filter_sub = fake_transactions.sub_uid == sample_sub
fake_transactions.loc[filter_sub&filter_dt]

Create a helper function to help fetch subscriber trajectory

In [None]:
def get_sub_traj(
    sub: str,
    date: str,
    window: str,
    transactions: pd.DataFrame,
    cellsites: pd.DataFrame
):
    if window=="month":
        date_filter = transactions.transaction_dt.apply(lambda d: pendulum.parse(d, exact=True).start_of("month").to_date_string())==date
    elif window=="day":
        date_filter = transactions.transaction_dt==date
    sub_filter = transactions.sub_uid==sub
    transactions_red = transactions.loc[sub_filter&date_filter]
    transactions_red = transactions_red.merge(cellsites, on="cel_uid")
    return transactions_red.sort_values(by=["transaction_dt", "transaction_hr"], ascending=[1, 1]).reset_index(drop=True)

def fetch_sample_trajs(scoring_base, metric, date, window, fake_transactions, fake_cellsites):
    scoring_base.sort_values(by=metric, ascending=True, inplace=True)
    scoring_base.reset_index(drop=True, inplace=True)
    sample_sub_low = scoring_base.loc[0, "sub_uid"]
    sample_sub_mid = scoring_base.loc[len(scoring_base)//2, "sub_uid"]
    sample_sub_high = scoring_base.loc[len(scoring_base)-1, "sub_uid"]
    sample_traj_low = get_sub_traj(sample_sub_low, date, window, fake_transactions, fake_cellsites)
    sample_traj_mid = get_sub_traj(sample_sub_mid, date, window, fake_transactions, fake_cellsites)
    sample_traj_high = get_sub_traj(sample_sub_high, date, window, fake_transactions, fake_cellsites)
    return sample_traj_low, sample_traj_mid, sample_traj_high

In [None]:
get_sub_traj("glo-sub-001", "2023-06-01", "month", fake_transactions, fake_cellsites)

We'll use this scoring base moving forward

In [None]:
scoring_base = fake_subscribers.copy()

In [None]:
# for uniformity
date = "2023-06-01"
window = "month"

## 1. `total_travel_distance`

Pertains to the total covered Haversine (or great circle) distance on a given time `window`. What constitutes a single movement is a cell site hop (due to subcriber's physical movement or a handoff).

$ d = 2 \cdot R \cdot \text{asin}\left(\sqrt{\sin^2\left(\frac{{\text{lat}_2 - \text{lat}_1}}{2}\right) + \cos(\text{lat}_1) \cdot \cos(\text{lat}_2) \cdot \sin^2\left(\frac{{\text{lng}_2 - \text{lng}_1}}{2}\right)}\right) $ <br>

$ d $ is the Haversine distance <br>
$ R $ is the radius of Earth (6371 in Kilometers) <br>
$ lat_1, lng_1 $ is the orig coordinate <br>
$ lat_2, lng_2 $ is the dest coordinate

### a. **CHECK** the `total_travel_distance` of a subscriber.

In [None]:
sub = fake_subscribers.sample(1).sub_uid.item()
traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
traj.head()

In [None]:
coords = traj.coords.tolist()
od_pairs = list(zip(coords, coords[1:])) # think lag-1 in SQL

In [None]:
od_pairs[:4] # check with the traj table above, this should match

In [None]:
# calculate the distance covered per cellsite hop, then sum those distances
total_travel_distance = reduce(
    lambda a, b: a + b,
    list(map(lambda p: calc_haversine_distance(*p), od_pairs))
)

In [None]:
# convert to kilometer
total_travel_distance / 1_000

### b. **APPLY** the `total_travel_distance` to the scoring base.

In [None]:
def calc_total_travel_distance(traj):
    coords = traj.coords.tolist()
    od_pairs = list(zip(coords, coords[1:]))
    total_travel_distance = reduce(
        lambda a, b: a + b,
        list(map(lambda p: calc_haversine_distance(*p), od_pairs))
    )
    return total_travel_distance

In [None]:
total_travel_distances = []
for sub in scoring_base.sub_uid.tolist():
    traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
    total_travel_distance = calc_total_travel_distance(traj)
    total_travel_distances.append(total_travel_distance)

scoring_base["total_travel_distance"] = total_travel_distances

In [None]:
scoring_base.sample(5)

### c. **VISUALIZE** the `total_travel_distance` results.

Let's see the results for the whole scoring base.

In [None]:
def plot1_total_travel_distance(fake_transactions, scoring_base):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
    left = fake_transactions.groupby("sub_uid").txn_uid.nunique().reset_index(name="num_transactions")
    right = scoring_base[["sub_uid", "total_travel_distance"]]
    f1 = left.merge(right, on="sub_uid")
    f1["total_travel_distance"] = f1["total_travel_distance"].div(1_000)
    f1.plot(ax=ax1, kind="scatter", x="num_transactions", y="total_travel_distance")
    f2 = scoring_base.copy()
    f2.total_travel_distance.div(1_000).hist(ax=ax2)
    ax1.set_ylabel("travel distance (in KM)")
    ax1.set_xlabel("num transactions")
    ax2.set_xlabel("travel distance (in KM)")
    ax2.set_ylabel("frequency")
    ax2.grid(False)
    plt.close();
    return fig

In [None]:
plot1_total_travel_distance(fake_transactions, scoring_base)

`NOTES`

1. As expected, the number of transactions is **positively correlated** with the total travel distance
2. The distribution of the total travel distance should mirror a **normal distibution** (refer to stay_proba).

Let's check the extreme scenarios. Sample subs with the lowest, highest, and the median `total_travel_distance`

In [None]:
metric = "total_travel_distance"
sample_traj_low, sample_traj_mid, sample_traj_high = fetch_sample_trajs(scoring_base, metric, date, window, fake_transactions, fake_cellsites)

In [None]:
def fetch_total_travel_distance(traj):
    coords = traj.coords.tolist()
    od_pairs = list(zip(coords, coords[1:]))
    dts = traj.transaction_dt.tolist()
    hrs = traj.transaction_hr.tolist()
    cels = traj.cel_uid.tolist()
    hr_pairs = list(zip(hrs, hrs[1:]))
    travel_distances = list(map(lambda p: calc_haversine_distance(*p), od_pairs))
    dt_df = pd.DataFrame(list(zip(dts, dts[1:])), columns=["orig_dt", "dest_dt"])
    hr_df = pd.DataFrame(list(zip(hrs, hrs[1:])), columns=["orig_hr", "dest_hr"])
    cel_df = pd.DataFrame(list(zip(cels, cels[1:])), columns=["orig_cel", "dest_cel"])
    data = pd.concat([dt_df, hr_df, cel_df], axis=1)
    data["travel_distance"] = travel_distances
    return data.loc[data.travel_distance>0].reset_index(drop=True)

def plot_total_travel_distances(traj_low, traj_mid, traj_high):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
    sample_low = fetch_total_travel_distance(traj_low)
    sample_mid = fetch_total_travel_distance(traj_mid)
    sample_high = fetch_total_travel_distance(traj_high)
    sample_low["orig_dt"] = pd.to_datetime(sample_low["orig_dt"])
    sample_mid["orig_dt"] = pd.to_datetime(sample_mid["orig_dt"])
    sample_high["orig_dt"] = pd.to_datetime(sample_high["orig_dt"])
    sample_low.groupby("orig_dt")["travel_distance"].sum().div(1_000).plot(ax=ax1, color="red", kind="line")
    sample_mid.groupby("orig_dt")["travel_distance"].sum().div(1_000).plot(ax=ax1, color="blue", kind="line")
    sample_high.groupby("orig_dt")["travel_distance"].sum().div(1_000).plot(ax=ax1, color="green", kind="line")
    sample_low.groupby("orig_dt")["travel_distance"].sum().div(1_000).plot(ax=ax2, color="red", kind="density")
    sample_mid.groupby("orig_dt")["travel_distance"].sum().div(1_000).plot(ax=ax2, color="blue", kind="density")
    sample_high.groupby("orig_dt")["travel_distance"].sum().div(1_000).plot(ax=ax2, color="green", kind="density")
    ax1.set_ylabel("travel distance (in KM)")
    ax2.set_ylabel("density")
    ax1.set_xlabel("")
    ax2.set_xlabel("travel distance (in KM)")
    ax1.legend(["low", "mid", "high"], frameon=False)
    ax2.legend(["low", "mid", "high"], frameon=False)
    plt.tight_layout()
    plt.close();
    return fig

In [None]:
# this is a sample 
plot_total_travel_distances(sample_traj_low, sample_traj_mid, sample_traj_high)

`NOTE`

This contains only a few sample of the scoring base, you should not draw conclusions for the whole population here. We just wanted to check the extreme scenarios, how varied is the day-to-day movement of these subs. We also wanted to check how is the travel distance distributed in a single month for the three extreme scenarios.

## 2: `radius of_gyration`

In mobility analysis, this indicates the characteristic distance travelled by the agent (in our case, the mobile subscriber). This is computed using the given formula.

$ RoG = \sqrt{\frac{1}{n} \sum_{i=1}^{n} d(CoM, coord_i)^2} $  <br>

$ RoG $ is the Radius of Gyration <br>
$ d $ is the Haversine distance <br>

$ CoM = \left( \frac{1}{n} \sum_{i=1}^{n} lat_i, \frac{1}{n} \sum_{i=1}^{n} lng_i \right) $  <br>

$ CoM $ is the Center of Mass


### a. **CHECK** the `radius_of_gyration` of a subscriber.

In [None]:
sub = fake_subscribers.sample(1).sub_uid.item()
traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
traj.head()

In [None]:
coords = traj.coords.tolist()

In [None]:
mean_lat = np.mean([shapely.wkt.loads(coord).y for coord in coords])
mean_lng = np.mean([shapely.wkt.loads(coord).x for coord in coords])
com = shapely.geometry.Point(mean_lng, mean_lat).wkt

In [None]:
print(mean_lat, mean_lng, com)

In [None]:
# compute for the distances from center of mass to each of the individual points
pt_pairs = list(zip(coords, [com]*len(coords))) # points to center of mass
radius_of_gyration = np.sqrt(
    reduce(
        lambda a, b: a + b,
        list(map(lambda p: calc_haversine_distance(*p)**2, pt_pairs))
    ) / len(coords)
)

In [None]:
radius_of_gyration / 1_000

### b. **APPLY** the `radius_of_gyration` to the scoring base.

In [None]:
def calc_radius_of_gyration(traj):
    
    coords = traj.coords.tolist()
    
    # compute for the center of mass
    mean_lat = np.mean([shapely.wkt.loads(coord).y for coord in coords])
    mean_lng = np.mean([shapely.wkt.loads(coord).x for coord in coords])
    com = shapely.geometry.Point(mean_lng, mean_lat).wkt
    
    # compute for the distances from CoM to individual points
    pt_pairs = list(zip(coords, [com]*len(coords)))
    radius_of_gyration = np.sqrt(
        reduce(
            lambda a, b: a + b,
            list(map(lambda p: calc_haversine_distance(*p)**2, pt_pairs))
        ) / len(coords)
    )
    return com, radius_of_gyration

In [None]:
radius_of_gyrations = []
for sub in scoring_base.sub_uid.tolist():
    traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
    radius_of_gyration = calc_radius_of_gyration(traj)[-1]
    radius_of_gyrations.append(radius_of_gyration)

scoring_base["radius_of_gyration"] = radius_of_gyrations

In [None]:
scoring_base.sample(5)

### c. **VISUALIZE** the `radius of gyration` results.

In [None]:
metric = "radius_of_gyration"
sample_traj_low, sample_traj_mid, sample_traj_high = fetch_sample_trajs(scoring_base, metric, date, window, fake_transactions, fake_cellsites)

In [None]:
sample_traj_low.head()

In [None]:
def buffer_from_point(lat, lng, radius):
    wgs84_globe = pyproj.Proj(proj="latlong", ellps="WGS84")
    aeqd = pyproj.Proj(proj="aeqd", ellps="WGS84", datum="WGS84", lat_0=lat, lon_0=lng)
    project_coords = pyproj.transform(wgs84_globe, aeqd, lng, lat)
    aeqd_buffer = shapely.geometry.Point(project_coords).buffer(radius) 
    projected_pol = shapely.ops.transform(partial(pyproj.transform, aeqd, wgs84_globe), aeqd_buffer)
    return projected_pol

def plot_radius_of_gyrations(traj_low, traj_mid, traj_high):
    com_low, rog_low = calc_radius_of_gyration(traj_low)
    com_mid, rog_mid = calc_radius_of_gyration(traj_mid)
    com_high, rog_high = calc_radius_of_gyration(traj_high)
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(13, 5), sharex=True, sharey=True)
    gpd.GeoSeries(shapely.wkt.loads(com_low)).plot(ax=ax1, marker="*", color="black", markersize=300, zorder=2)
    gpd.GeoSeries(buffer_from_point(lat=shapely.wkt.loads(com_low).y, lng=shapely.wkt.loads(com_low).x, radius=rog_low))\
        .plot(ax=ax1, color="white", edgecolor="black", zorder=3, alpha=0.2, linewidth=10)
    gpd.GeoSeries(shapely.wkt.loads(com_mid)).plot(ax=ax2, marker="*", color="black", markersize=300, zorder=2)
    gpd.GeoSeries(buffer_from_point(lat=shapely.wkt.loads(com_mid).y, lng=shapely.wkt.loads(com_mid).x, radius=rog_mid))\
        .plot(ax=ax2, color="white", edgecolor="black", zorder=3, alpha=0.2, linewidth=10)
    gpd.GeoSeries(shapely.wkt.loads(com_high)).plot(ax=ax3, marker="*", color="black", markersize=300, zorder=2)
    gpd.GeoSeries(buffer_from_point(lat=shapely.wkt.loads(com_high).y, lng=shapely.wkt.loads(com_high).x, radius=rog_high))\
        .plot(ax=ax3, color="white", edgecolor="black", zorder=3, alpha=0.2, linewidth=10)
    gpd.GeoSeries(traj_low["coords"].apply(shapely.wkt.loads)).plot(ax=ax1, color="red", markersize=60, zorder=1)
    gpd.GeoSeries(traj_mid["coords"].apply(shapely.wkt.loads)).plot(ax=ax2, color="blue", markersize=60, zorder=1)
    gpd.GeoSeries(traj_high["coords"].apply(shapely.wkt.loads)).plot(ax=ax3, color="green", markersize=60, zorder=1)
    ax1.ticklabel_format(useOffset=False)
    ax2.ticklabel_format(useOffset=False)
    ax3.ticklabel_format(useOffset=False)
    ax1.set_xticks([])
    ax1.set_yticks([])
    ax2.set_xticks([])
    ax2.set_yticks([])
    ax3.set_xticks([])
    ax3.set_yticks([])
    plt.tight_layout()
    plt.close();
    return fig

def plot_radius_of_gyration(traj):
    com, rog = calc_radius_of_gyration(traj)
    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    gpd.GeoSeries(shapely.wkt.loads(com)).plot(ax=ax, marker="*", color="black", markersize=300, zorder=2)
    gpd.GeoSeries(buffer_from_point(lat=shapely.wkt.loads(com).y, lng=shapely.wkt.loads(com).x, radius=rog))\
        .plot(ax=ax, color="white", edgecolor="black", zorder=3, alpha=0.2, linewidth=10)
    gpd.GeoSeries(traj["coords"].apply(shapely.wkt.loads)).plot(ax=ax, color="red", markersize=60, zorder=1)
    ax.ticklabel_format(useOffset=False)
    plt.axis("off")
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    plt.close();
    return fig

In [None]:
plot_radius_of_gyrations(sample_traj_low, sample_traj_mid, sample_traj_high)

`NOTE`

This contains only a handful sample of the scoring base, particularly the highest, lowest, and the median `radius_of_gyrations`. This simply illustrates (visually) the extremity of the range of the radius for each scenarios, to understand the concept of RoG better.

## 3: `activity_entropy`

### a. **CHECK** the `activity_entropy` of a subscriber.

In [None]:
sub = fake_subscribers.sample(1).sub_uid.item()
traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
traj.head()

In [None]:
loc_hrs = traj[["cel_uid", "transaction_hr"]].values.tolist()
od_loc_hrs = list(zip(loc_hrs, loc_hrs[1:]))

In [None]:
cels = list(set(traj.cel_uid.tolist()))

In [None]:
loc_hr_counter = dict(zip(cels, [0]*len(cels))) # THIS IS THE HASHMAP
for od_loc_hr in od_loc_hrs:
    orig = od_loc_hr[0]
    dest = od_loc_hr[1]
    cel = orig[0]
    if dest[1] > orig[1]:
        time_elapsed = dest[1] - orig[1]
    elif dest[1] < orig[1]:
        # next time elapsed jumps to another day
        time_elapsed = 24 - abs(dest[1] - orig[1])
    loc_hr_counter[cel] = loc_hr_counter[cel] + time_elapsed #  # I UPDATED THE HASHMAP

In [None]:
loc_hr_counter

In [None]:
traj # sanity check

In [None]:
proba_per_site = [hr_spent/sum(loc_hr_counter.values()) for hr_spent in loc_hr_counter.values()]

In [None]:
np.sum(proba_per_site)

In [None]:
activity_entropy = reduce(lambda a, b: a + b, map(lambda p: p*np.log2(1/p), proba_per_site))
print(activity_entropy)

# TODO: add logbase2
# try float dtype

### b. **APPLY** the `activity_entropy` to the scoring base.

In [None]:
def calc_activity_entropy(traj):
    
    try:
    
        loc_hrs = traj[["cel_uid", "transaction_hr"]].values.tolist()
        od_loc_hrs = list(zip(loc_hrs, loc_hrs[1:]))    
        cels = list(set(traj.cel_uid.tolist()))

        loc_hr_counter = dict(zip(cels, [0]*len(cels)))
        for od_loc_hr in od_loc_hrs:
            orig = od_loc_hr[0]
            dest = od_loc_hr[1]
            cel = orig[0]
            if dest[1] > orig[1]:
                time_elapsed = dest[1] - orig[1]
            elif dest[1] < orig[1]:
                # next time elapsed jumps to another day
                time_elapsed = 24 - abs(dest[1] - orig[1])
            loc_hr_counter[cel] = loc_hr_counter[cel] + time_elapsed

        # time spent on a single site vs time spent on all sites
        proba_per_site = [hr_spent/sum(loc_hr_counter.values()) for hr_spent in loc_hr_counter.values()]

        # sum(p * log(1/p)) where p is the proba for a single site
        activity_entropy = reduce(lambda a, b: a + b, map(lambda p: p*np.log2(1/p), proba_per_site))
    
        return activity_entropy
    
    except:
        
        return None

In [None]:
activity_entropys = []
for sub in scoring_base.sub_uid.tolist():
    traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
    activity_entropy = calc_activity_entropy(traj)
    activity_entropys.append(activity_entropy)

scoring_base["activity_entropy"] = activity_entropys

In [None]:
scoring_base.sample(5)

## 4. Miscellany

In [None]:
def plot_routes(traj_low, traj_mid, traj_high):
    coords_low = traj_low.coords.tolist()
    coords_mid = traj_mid.coords.tolist()
    coords_high = traj_high.coords.tolist()
    od_pairs_low = list(zip(coords_low, coords_low[1:]))
    od_pairs_mid = list(zip(coords_mid, coords_mid[1:]))
    od_pairs_high = list(zip(coords_high, coords_high[1:]))
    routes_low, routes_mid, routes_high = [], [], []
    for od_pair in od_pairs_low:
        routes_low.append(
            shapely.geometry.LineString(
                map(shapely.wkt.loads, od_pair)
            )
        )
    for od_pair in od_pairs_mid:
        routes_mid.append(
            shapely.geometry.LineString(
                map(shapely.wkt.loads, od_pair)
            )
        )
    for od_pair in od_pairs_high:
        routes_high.append(
            shapely.geometry.LineString(
                map(shapely.wkt.loads, od_pair)
            )
        )
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(13, 7), sharex=True, sharey=True)
    gpd.GeoSeries(traj_low["coords"].apply(shapely.wkt.loads)).plot(ax=ax1, color="red", markersize=60, zorder=2);
    gpd.GeoSeries(traj_mid["coords"].apply(shapely.wkt.loads)).plot(ax=ax2, color="blue", markersize=60, zorder=2);
    gpd.GeoSeries(traj_high["coords"].apply(shapely.wkt.loads)).plot(ax=ax3, color="green", markersize=60, zorder=2);
    gpd.GeoSeries(routes_low).plot(ax=ax1, color="grey", linewidth=3, zorder=1, alpha=0.6)
    gpd.GeoSeries(routes_mid).plot(ax=ax2, color="grey", linewidth=3, zorder=1, alpha=0.6)
    gpd.GeoSeries(routes_high).plot(ax=ax3, color="grey", linewidth=3, zorder=1, alpha=0.6)
    ax1.ticklabel_format(useOffset=False)
    ax2.ticklabel_format(useOffset=False)
    ax3.ticklabel_format(useOffset=False)
    ax1.axis("off")
    ax2.axis("off")
    ax3.axis("off")
    ax1.legend(["low"], frameon=False, loc="lower center")
    ax2.legend(["mid"], frameon=False, loc="lower center")
    ax3.legend(["high"], frameon=False, loc="lower center")
    plt.tight_layout()
    plt.close()
    return fig

In [None]:
plot_routes(sample_traj_low, sample_traj_mid, sample_traj_high)

In [None]:
scoring_base.sort_values(by="total_travel_distance", ascending=True, inplace=True)
scoring_base.reset_index(drop=True, inplace=True)

In [None]:
sample_sub_low = scoring_base.loc[0, "sub_uid"]
sample_sub_mid = scoring_base.loc[len(scoring_base)//2, "sub_uid"]
sample_sub_high = scoring_base.loc[len(scoring_base)-1, "sub_uid"]

In [None]:
sample_traj_low = get_sub_traj(sample_sub_low, date, window, fake_transactions, fake_cellsites)
sample_traj_mid = get_sub_traj(sample_sub_mid, date, window, fake_transactions, fake_cellsites)
sample_traj_high = get_sub_traj(sample_sub_high, date, window, fake_transactions, fake_cellsites)

In [None]:
def visualize_route(traj):
    coords = traj.coords.tolist()
    od_pairs = list(zip(coords, coords[1:]))
    routes = []
    for od_pair in od_pairs:
        routes.append(
            shapely.geometry.LineString(
                map(shapely.wkt.loads, od_pair)
            )
        )
    fig, ax = plt.subplots(1, figsize=(7, 5))
    gpd.GeoSeries(traj["coords"].apply(shapely.wkt.loads)).plot(ax=ax, color="blue", markersize=60, zorder=2);
    gpd.GeoSeries(traj.head(1)["coords"].apply(shapely.wkt.loads)).plot(ax=ax, color="green", markersize=120, zorder=3);
    gpd.GeoSeries(traj.tail(1)["coords"].apply(shapely.wkt.loads)).plot(ax=ax, color="red", markersize=120, zorder=3);
    gpd.GeoSeries(routes).plot(ax=ax, color="grey", linewidth=3, zorder=1, alpha=0.6)
    ax.ticklabel_format(useOffset=False)
    ax.legend(["_", "orig", "dest"])
    plt.tight_layout()
    plt.axis("off")
    plt.close()
    return fig

In [None]:
travel_distance_in_km = round(scoring_base.loc[scoring_base.sub_uid==sample_sub_low, "total_travel_distance"].item() / 1_000, 2)
print(f"this sub travelled {travel_distance_in_km} KM in a {window}, LIKELY TO HAVE LOW MOBILITY")
display(visualize_route(sample_traj_low))

In [None]:
travel_distance_in_km = round(scoring_base.loc[scoring_base.sub_uid==sample_sub_mid, "total_travel_distance"].item() / 1_000, 2)
print(f"this sub travelled {travel_distance_in_km} KM in a {window}, LIKELY TO HAVE MID MOBILITY")
display(visualize_route(sample_traj_mid))

In [None]:
travel_distance_in_km = round(scoring_base.loc[scoring_base.sub_uid==sample_sub_high, "total_travel_distance"].item() / 1_000, 2)
print(f"this sub travelled {travel_distance_in_km} KM in a {window}, LIKELY TO HAVE HIGH MOBILITY")
display(visualize_route(sample_traj_high))

TODO:
1. Make your own Mobility Index: HIGH-MID-LOW

## RESULTS QA

In [None]:
scoring_base.total_travel_distance.describe().iloc[1:]

In [None]:
scoring_base.radius_of_gyration.describe().iloc[1:]

In [None]:
scoring_base.activity_entropy.describe().iloc[1:]

In [None]:
sub = fake_subscribers.sample(1).sub_uid.item()
traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)

In [None]:
traj.head()

In [None]:
calc_activity_entropy(traj)

In [None]:
calc_radius_of_gyration(traj)

In [None]:
calc_total_travel_distance(traj)

## PENDING ITEMS

[X] Redo the fake data simulation, make stay proba variable instead of fixed <br>
[] Add visuals on total travel distance, radius of gyration, activity entropy <br>
[] Finish lecture part 1, grammar of spatial data science

## ARCHIVE

In [None]:
def get_route_fig(r):
    fig, ax = plt.subplots(1, 1)
    gpd.GeoSeries(r).plot(ax=ax, linewidth=5, zorder=1)
    orig = shapely.geometry.Point([r.xy[0][0], r.xy[1][0]])
    dest = shapely.geometry.Point([r.xy[0][-1], r.xy[1][-1]])
    gpd.GeoSeries(orig).plot(ax=ax, color="red", markersize=250, zorder=2, alpha=0.8)
    gpd.GeoSeries(dest).plot(ax=ax, color="green", markersize=250, zorder=2, alpha=0.8)
    plt.axis("off")
    ax.ticklabel_format(useOffset=False)
    plt.close()
    return fig

import os
import shapely
import geopandas as gpd
import matplotlib.pyplot as plt
from PIL import Image
import shutil

In [None]:
# day in a life of a sub
d = fake_transactions.copy()
sample_sub = d.sample(1).sub_id.item()
d = d.loc[d.sub_id==sample_sub]
days = d.transaction_dt.unique().tolist()
route_figs = []
for sample_day in days:
    sites = fake_transactions\
        .loc[fake_transactions.sub_id==sample_sub]\
        .loc[fake_transactions.transaction_dt==sample_day]\
        .cel_id.tolist()
    points = list(map(lambda z: convert_cel_to_point(z, fake_cellsites), sites))
    r = shapely.geometry.LineString(points)
    route_figs.append(get_route_fig(r))
os.mkdir("../sample/")
for idx, fig in enumerate(route_figs):
    fname = "../sample/{}_tmp.jpg".format(idx+1)
    fig.savefig(fname)
imgs = load_images("../sample")
plot_images(imgs)
shutil.rmtree("../sample")