# Identifying instances of co-location in synthetic mobility trajectories

## Imports

In [1]:
import skmob
import geopandas as gpd
import pandas as pd
import numpy as np
from itertools import combinations

## Functions

In [2]:
def is_spatially_proximal(lat1: float, lat2: float, lng1: float, lng2: float) -> bool:
    return ((lat1==lat2) & (lng1==lng2))

In [12]:
def is_temporally_proximal(t1: np.datetime64, t2: np.datetime64) -> bool:
    condition = (t1 - np.timedelta64(4, "h") <= t2) & (t2 <= t1 + np.timedelta64(4, "h"))
    return condition

## Read data

In [4]:
tdf = skmob.TrajDataFrame.from_file("data/traj.csv")

In [5]:
tdf.plot_trajectory()

  return plot.plot_trajectory(self, map_f=map_f, max_users=max_users, max_points=max_points, style_function=style_function,
  return plot.plot_trajectory(self, map_f=map_f, max_users=max_users, max_points=max_points, style_function=style_function,


## Create subset

Create a subset of the data containing only the first 5 individuals to make things a bit easier to visualise and work with.

In [6]:
stdf = tdf.loc[tdf["uid"]<=5, :]

In [7]:
stdf.plot_trajectory()

  return plot.plot_trajectory(self, map_f=map_f, max_users=max_users, max_points=max_points, style_function=style_function,
  return plot.plot_trajectory(self, map_f=map_f, max_users=max_users, max_points=max_points, style_function=style_function,


## Iterating over individuals

In [13]:
individuals = stdf["uid"].unique()

In [14]:
combos = combinations(individuals, 2)

In [15]:
all_observation_combinations = []

In [16]:
for combo in combos:
    person1 = stdf.loc[stdf["uid"] == combo[0]]
    person2 = stdf.loc[stdf["uid"] == combo[1]]
    cross = person1.merge(person2, how="cross")
    cross["is_coloc"] = np.where(is_spatially_proximal(
        cross["lat_x"], cross["lat_y"], cross["lng_x"], cross["lng_y"]
    ) & is_temporally_proximal(
        cross["datetime_x"], cross["datetime_y"]
    ), 1, 0)
    all_observation_combinations.append(cross)

In [17]:
all_observation_combinations = pd.concat(all_observation_combinations)

In [18]:
all_observation_combinations.head()

Unnamed: 0,uid_x,datetime_x,lat_x,lng_x,uid_y,datetime_y,lat_y,lng_y,is_coloc
0,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 08:00:00,53.812933,-1.500682,0
1,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 08:20:15,53.812787,-1.488527,0
2,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 10:44:20,53.825503,-1.516227,0
3,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 18:51:07,53.814158,-1.511499,0
4,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 19:18:01,53.815489,-1.50831,0


In [19]:
sum(all_observation_combinations["is_coloc"])

22

In [20]:
hit_rate = (sum(all_observation_combinations["is_coloc"]) / len(all_observation_combinations))

In [21]:
hit_rate * 100

0.0052257773343784885

In [38]:
stdf.loc[stdf["uid"]==1]

Unnamed: 0,uid,datetime,lat,lng
0,1,2024-01-01 08:00:00,53.729001,-1.612396
1,1,2024-01-01 08:33:04,53.734795,-1.620111
2,1,2024-01-01 09:42:28,53.729001,-1.612396
3,1,2024-01-01 10:15:42,53.741035,-1.624495
4,1,2024-01-01 10:44:34,53.739380,-1.606354
...,...,...,...,...
223,1,2024-01-14 05:02:04,53.802035,-1.587004
224,1,2024-01-14 05:23:40,53.734795,-1.620111
225,1,2024-01-14 05:44:23,53.802035,-1.587004
226,1,2024-01-14 06:20:38,53.809499,-1.622786


In [39]:
from skmob.measures.individual import waiting_times

In [40]:
wt_df = waiting_times(stdf)

100%|██████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 110.85it/s]


In [41]:
wt_df

Unnamed: 0,uid,waiting_times
0,1,"[1984.0, 4164.0, 1994.0, 1732.0, 2266.0, 1812...."
1,2,"[1215.0, 8645.0, 29207.0, 1614.0, 2351.0, 1872..."
2,3,"[1470.0, 1945.0, 2300.0, 1590.0, 1336.0, 4660...."
3,4,"[1922.0, 3808.0, 2700.0, 1244.0, 2656.0, 5521...."
4,5,"[2702.0, 2986.0, 7094.0, 14587.0, 4585.0, 1637..."


In [42]:
wt = wt_df.loc[wt_df["uid"]==1, ["waiting_times"]]

In [48]:
np.ravel(wt)

array([array([ 1984.,  4164.,  1994.,  1732.,  2266.,  1812., 17751.,  1315.,
               1669.,  1674.,  1680.,  2998.,  1375.,  5495.,  1829.,  2359.,
               4993., 12734.,  6083.,  1833.,  1447.,  2911.,  6102.,  3812.,
               5287.,  4086.,  1294.,  1401.,  1315., 11836.,  3606.,  7394.,
              13885.,  3128.,  4324.,  1581.,  9235.,  1343.,  2440.,  7384.,
               1760.,  3319.,  9169.,  2082.,  2172.,  5653.,  2544.,  1482.,
              12464.,  1731.,  5875.,  6338.,  1604.,  1412.,  1239.,  1465.,
               1538.,  9967.,  1276.,  1791.,  1495.,  2199., 13678.,  1354.,
               1386., 11145.,  1625.,  2615.,  5688.,  3611.,  1753.,  1806.,
              26244.,  5403.,  1524.,  1892.,  5482.,  2274.,  1376.,  2577.,
              12885.,  2373.,  4666.,  1548.,  1558.,  2945.,  2516., 10169.,
               1514.,  2452.,  1429.,  4363.,  7501.,  1704.,  7631.,  3650.,
              44470.,  1973.,  3545.,  1507., 12477.,  1986.,  1

In [15]:
for individual in individuals:
    df = stdf.loc[stdf["uid"]==individual, :]
    print(df.shape)

(228, 4)
(200, 4)
(206, 4)
(173, 4)
(220, 4)


[(np.int64(1), np.int64(2)),
 (np.int64(1), np.int64(3)),
 (np.int64(1), np.int64(4)),
 (np.int64(1), np.int64(5)),
 (np.int64(2), np.int64(3)),
 (np.int64(2), np.int64(4)),
 (np.int64(2), np.int64(5)),
 (np.int64(3), np.int64(4)),
 (np.int64(3), np.int64(5)),
 (np.int64(4), np.int64(5))]

In [None]:
user_x = get_user_list_of_dicts(users, "x")
user_y = get_user_list_of_dicts(users, "y")

scol_points = []

for x_record in user_x:
    for y_record in user_y:
        d1 = x_record.copy()
        d2 = y_record.copy()
        
        x_coords = (x_record["x"], x_record["y"])
        y_coords = (y_record["x"], y_record["y"])
        x_time = x_record["time"]
        y_time = y_record["time"]

        condition1 = trace_x_delta(x_coords, y_coords, x_tolerance)
        condition2 = t_delta(x_time, y_time, t_tolerance)

        if condition1 and condition2:
            scol_points.extend([d1, d2])

scol_points = pd.DataFrame(scol_points)
return scol_points

In [16]:
person1 = stdf.loc[stdf["uid"]==1, :]

In [17]:
person2 = stdf.loc[stdf["uid"]==2, :]

In [18]:
combo = person1.merge(person2, how="cross")

In [19]:
combo

Unnamed: 0,uid_x,datetime_x,lat_x,lng_x,uid_y,datetime_y,lat_y,lng_y
0,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 08:00:00,53.812933,-1.500682
1,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 08:20:15,53.812787,-1.488527
2,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 10:44:20,53.789568,-1.511633
3,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 18:51:07,53.748723,-1.603006
4,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 19:18:01,53.750792,-1.605922
...,...,...,...,...,...,...,...,...
45595,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 01:37:48,53.789568,-1.511633
45596,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 02:15:38,53.804982,-1.508025
45597,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 02:37:26,53.789054,-1.548701
45598,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 03:16:54,53.787941,-1.553949


In [35]:
combo["is_coloc"] = np.where(is_colocated(combo["lat_x"], combo["lat_y"], combo["lng_x"], combo["lng_y"]), 1, 0)

In [36]:
combo

Unnamed: 0,uid_x,datetime_x,lat_x,lng_x,uid_y,datetime_y,lat_y,lng_y,is_coloc
0,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 08:00:00,53.812933,-1.500682,0
1,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 08:20:15,53.812787,-1.488527,0
2,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 10:44:20,53.789568,-1.511633,0
3,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 18:51:07,53.748723,-1.603006,0
4,1,2024-01-01 08:00:00,53.729001,-1.612396,2,2024-01-01 19:18:01,53.750792,-1.605922,0
...,...,...,...,...,...,...,...,...,...
45595,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 01:37:48,53.789568,-1.511633,0
45596,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 02:15:38,53.804982,-1.508025,0
45597,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 02:37:26,53.789054,-1.548701,0
45598,1,2024-01-14 06:46:16,53.808094,-1.559009,2,2024-01-14 03:16:54,53.787941,-1.553949,0


In [37]:
sum(combo["is_coloc"])

73