# Application: Creating a Telco Mobility Index

1. Add total travel distance
2. Add radius of gyration
3. Add activity entropy

In [1]:
import os
os.chdir("../")

In [2]:
import random
import shapely
import pendulum
import pandas as pd
from sds4gdsp.processor import convert_cel_to_point, calc_haversine_distance
from sds4gdsp.plotter import get_route_fig, load_images, plot_images
from IPython.display import HTML, display
from functools import reduce

Load, take a peek, and get a gist of the given datasets

a. Fake Subscribers <br>
b. Fake Cellsites <br>
c. Fake Transactions

In [3]:
filepath_subscribers = "data/fake_subscribers.csv"
dtype = dict(
    gender="category",
    age=int,
    name=str,
    chi_indicator=bool,
    ewallet_user_indicator="category"
)
fake_subscribers = pd.read_csv(filepath_subscribers, dtype=dtype)
fake_subscribers.sample(5)

Unnamed: 0,sub_uid,gender,age,name,chi_indicator,ewallet_user_indicator
35,glo-sub-036,male,68,Brian Mayer,False,Y
6,glo-sub-007,female,52,Kelly Torres,True,Y
32,glo-sub-033,male,35,Antonio Pham,True,Y
79,glo-sub-080,female,48,Molly Vasquez,False,N
0,glo-sub-001,female,62,Beverly Bailey,True,N


In [4]:
fake_subscribers.shape

(100, 6)

In [5]:
cat_cols = ["gender", "chi_indicator", "ewallet_user_indicator"]
fs_breakdown = fake_subscribers.groupby(cat_cols).size().reset_index(name="cnt")
fs_breakdown.assign(pcnt=fs_breakdown.cnt.div(len(fake_subscribers)).mul(100).round(2))

Unnamed: 0,gender,chi_indicator,ewallet_user_indicator,cnt,pcnt
0,female,False,N,14,14.0
1,female,False,Y,12,12.0
2,female,True,N,10,10.0
3,female,True,Y,14,14.0
4,male,False,N,12,12.0
5,male,False,Y,17,17.0
6,male,True,N,14,14.0
7,male,True,Y,7,7.0


In [6]:
filepath_cellsites = "data/fake_cellsites.csv"
fake_cellsites = pd.read_csv(filepath_cellsites)
fake_cellsites.sample(5)

Unnamed: 0,cel_uid,coords
25,glo-cel-026,POINT (121.0480144 14.4988026)
35,glo-cel-036,POINT (121.052231 14.519476)
45,glo-cel-046,POINT (121.0560984 14.5100246)
37,glo-cel-038,POINT (121.0527667 14.4780747)
4,glo-cel-005,POINT (121.0281256 14.5208775)


In [7]:
fake_cellsites.shape

(111, 2)

In [8]:
HTML('<img src="../docs/fake_cellsites.png" width="600" height="600"/>')

In [9]:
filepath_transactions = "data/fake_transactions.csv"
fake_transactions = pd.read_csv(filepath_transactions)

In [10]:
fake_transactions.shape

(15090, 5)

In [11]:
min_date = pendulum.parse(fake_transactions.transaction_dt.min(), exact=True)
max_date = pendulum.parse(fake_transactions.transaction_dt.max(), exact=True)
period = pendulum.period(min_date, max_date)

In [12]:
sample_dt = str(random.sample(list(period), 1)[0])
filter_dt = fake_transactions.transaction_dt == sample_dt
sample_sub = fake_subscribers.sub_uid.sample(1).item()
filter_sub = fake_transactions.sub_uid == sample_sub
fake_transactions.loc[filter_sub&filter_dt]

Unnamed: 0,txn_uid,sub_uid,cel_uid,transaction_dt,transaction_hr
64,glo-txn-00065,glo-sub-001,glo-cel-036,2023-06-15,3
65,glo-txn-00066,glo-sub-001,glo-cel-027,2023-06-15,9
66,glo-txn-00067,glo-sub-001,glo-cel-036,2023-06-15,13
67,glo-txn-00068,glo-sub-001,glo-cel-043,2023-06-15,22


Create a helper function to help fetch subscriber trajectory

In [13]:
def get_sub_traj(
    sub: str,
    date: str,
    window: str,
    transactions: pd.DataFrame,
    cellsites: pd.DataFrame
):
    if window=="month":
        date_filter = transactions.transaction_dt.apply(lambda d: pendulum.parse(d, exact=True).start_of("month").to_date_string())==date
    elif window=="day":
        date_filter = transactions.transaction_dt==date
    sub_filter = transactions.sub_uid==sub
    transactions_red = transactions.loc[sub_filter&date_filter]
    transactions_red = transactions_red.merge(cellsites, on="cel_uid")
    return transactions_red.sort_values(by=["transaction_dt", "transaction_hr"], ascending=[1, 1])

In [14]:
get_sub_traj("glo-sub-001", "2023-06-01", "month", fake_transactions, fake_cellsites)

Unnamed: 0,txn_uid,sub_uid,cel_uid,transaction_dt,transaction_hr,coords
0,glo-txn-00001,glo-sub-001,glo-cel-053,2023-06-01,3,POINT (121.0581692 14.5056112)
1,glo-txn-00002,glo-sub-001,glo-cel-044,2023-06-01,15,POINT (121.0550896 14.5056935)
5,glo-txn-00003,glo-sub-001,glo-cel-035,2023-06-01,20,POINT (121.0520115 14.5087523)
2,glo-txn-00004,glo-sub-001,glo-cel-044,2023-06-01,21,POINT (121.0550896 14.5056935)
6,glo-txn-00005,glo-sub-001,glo-cel-035,2023-06-01,23,POINT (121.0520115 14.5087523)
...,...,...,...,...,...,...
115,glo-txn-00141,glo-sub-001,glo-cel-056,2023-06-30,6,POINT (121.0591257 14.5116111)
43,glo-txn-00142,glo-sub-001,glo-cel-051,2023-06-30,7,POINT (121.0572633 14.5141269)
29,glo-txn-00143,glo-sub-001,glo-cel-049,2023-06-30,10,POINT (121.0568161 14.5177119)
103,glo-txn-00144,glo-sub-001,glo-cel-057,2023-06-30,19,POINT (121.059168 14.5199865)


We'll use this scoring base moving forward

In [15]:
scoring_base = fake_subscribers.copy()

## 1: Total Travel Distance

In [16]:
def calc_total_travel_distance(coords):
    od_pairs = list(zip(coords, coords[1:]))
    total_travel_distance = reduce(
        lambda a, b: a + b,
        list(map(lambda p: calc_haversine_distance(*p), od_pairs))
    )
    return total_travel_distance

Check on a single sample

In [17]:
sub = fake_subscribers.sample(1).sub_uid.item()
date = "2023-06-01"
window = "month"

In [18]:
traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
traj.head(4)

Unnamed: 0,txn_uid,sub_uid,cel_uid,transaction_dt,transaction_hr,coords
0,glo-txn-14938,glo-sub-100,glo-cel-007,2023-06-01,0,POINT (121.0342338 14.5181996)
3,glo-txn-14939,glo-sub-100,glo-cel-005,2023-06-01,1,POINT (121.0281256 14.5208775)
13,glo-txn-14940,glo-sub-100,glo-cel-006,2023-06-01,2,POINT (121.03083 14.5277572)
23,glo-txn-14941,glo-sub-100,glo-cel-002,2023-06-01,15,POINT (121.0244545 14.5324531)


In [19]:
coords = traj.coords.tolist()
od_pairs = list(zip(coords, coords[1:]))
print(str(od_pairs[:3]).replace("[", "").replace("]", ""))

('POINT (121.0342338 14.5181996)', 'POINT (121.0281256 14.5208775)'), ('POINT (121.0281256 14.5208775)', 'POINT (121.03083 14.5277572)'), ('POINT (121.03083 14.5277572)', 'POINT (121.0244545 14.5324531)')


In [20]:
# convert to kilometer
calc_total_travel_distance(coords) / 1_000

64.04120381629349

scale to the scoring base

In [23]:
date = "2023-06-01"
window = "month"

total_travel_distances = []
for sub in scoring_base.sub_uid.tolist():
    traj = get_sub_traj(sub, date, window, fake_transactions, fake_cellsites)
    coords = traj.coords.tolist()
    total_travel_distance = calc_total_travel_distance(coords)
    total_travel_distances.append(total_travel_distance)

scoring_base["total_travel_distance"] = total_travel_distances

In [22]:
scoring_base.sample(5)

Unnamed: 0,sub_uid,gender,age,name,chi_indicator,ewallet_user_indicator,total_travel_distance
64,glo-sub-065,male,63,Matthew Townsend,True,N,47574.763355
74,glo-sub-075,female,36,Amanda Edwards,True,N,60644.249269
94,glo-sub-095,female,42,Cheryl Smith,True,N,64663.618576
75,glo-sub-076,male,46,Carlos Cameron,False,N,49144.348688
92,glo-sub-093,female,60,Jennifer Jenkins,True,Y,60403.889369


## 2: Radius of Gyration

In mobility analysis, the radius of gyration indicates the characteristic distance travelled by the agent (in our case, the telco mobile subscriber). This is computed using the given formula below:

$RoG$ = $\sqrt{\frac{1}{n} \sum \limits_{i=1}^{n} {dist(CoM,coord_i)^2}}$

$CoM$ = $\frac{1}{n} \sum \limits_{i=1}^{n} (lat_i, lng_i)$

WHERE: <br>
RoG is Radius of Gyration <br>
CoM is Center of Mass



In [27]:
print(1)

1


## 3: Activity Entropy

## ARCHIVE

In [14]:
def get_route_fig(r):
    fig, ax = plt.subplots(1, 1)
    gpd.GeoSeries(r).plot(ax=ax, linewidth=5, zorder=1)
    orig = shapely.geometry.Point([r.xy[0][0], r.xy[1][0]])
    dest = shapely.geometry.Point([r.xy[0][-1], r.xy[1][-1]])
    gpd.GeoSeries(orig).plot(ax=ax, color="red", markersize=250, zorder=2, alpha=0.8)
    gpd.GeoSeries(dest).plot(ax=ax, color="green", markersize=250, zorder=2, alpha=0.8)
    plt.axis("off")
    ax.ticklabel_format(useOffset=False)
    plt.close()
    return fig

import os
import shapely
import geopandas as gpd
import matplotlib.pyplot as plt
from PIL import Image
import shutil

In [15]:
# day in a life of a sub
d = fake_transactions.copy()
sample_sub = d.sample(1).sub_id.item()
d = d.loc[d.sub_id==sample_sub]
days = d.transaction_dt.unique().tolist()
route_figs = []
for sample_day in days:
    sites = fake_transactions\
        .loc[fake_transactions.sub_id==sample_sub]\
        .loc[fake_transactions.transaction_dt==sample_day]\
        .cel_id.tolist()
    points = list(map(lambda z: convert_cel_to_point(z, fake_cellsites), sites))
    r = shapely.geometry.LineString(points)
    route_figs.append(get_route_fig(r))
os.mkdir("../sample/")
for idx, fig in enumerate(route_figs):
    fname = "../sample/{}_tmp.jpg".format(idx+1)
    fig.savefig(fname)
imgs = load_images("../sample")
plot_images(imgs)
shutil.rmtree("../sample")

AttributeError: 'DataFrame' object has no attribute 'uid'

In [None]:
imgs[0]