# Testing co-location rate and spatial co-location rate

## Imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns
from colocation.wang_colocation import WangColocation

## Functions

In [2]:
def delta(a, b):
    return 1 if a == b else 0

In [3]:
def n(x, df):
    tdf = df.loc[df["userID"]==x, :]
    return len(tdf)

In [4]:
def P(x, l, df):
    tdf = df.loc[df["userID"]==x, :]
    n_x = n(x, df)
    L = list(tdf["locationID"])

    total = 0
    for i in range (n_x):
        a = delta(l, L[i])
        b = n_x
        total += a / b

    return total

In [5]:
def ML(x, df):
    locs = df["locationID"].unique()
    Ps = []

    for l in locs:
        Ps.append((l, P(x, l, df)))

    Ps = np.array(Ps)
    return np.argmax(Ps, axis=0)[1]

In [6]:
def dist(location1, location2):
    x_diff = location1[0] - location2[0]
    y_diff = location1[1] - location2[1]

    return (x_diff**2 + y_diff**2)**0.5

In [7]:
def get_coords(location, locations):
    coords = locations.loc[locations["locationID"]==location, ["x", "y"]]
    coords = (coords.iloc[0]["x"], coords.iloc[0]["y"])
    return coords

In [8]:
def SCoL(a, b, df):
    total = 0

    for l in df["locationID"].unique():
        total += P(a, l, df) * P(b, l, df)

    return total

In [9]:
def get_loc(userID, k, df):
    tdf = df.loc[df["userID"]==userID, :]
    return tdf.iloc[k]["locationID"]

In [10]:
def get_T(userID, k, df):
    tdf = df.loc[df["userID"]==userID, :]
    return tdf.iloc[k]["time"]

In [11]:
def CoL(a, b, df, dt=1):
    n_x = n(a, df)
    n_y = n(b, df)
    delta_t = dt

    top_total = 0
    bottom_total = 0
    for i in range(n_x):
        for j in range(n_y):
            common = np.heaviside(delta_t - abs(get_T(a, i, df) - get_T(b, j, df)), 1)
            bottom_total += common
            top_total += common * delta(get_loc(a, i, data), get_loc(b, j, data))

    return top_total / bottom_total

## Read data

In [12]:
locations = pd.read_csv("locations.csv")
locations.head()

Unnamed: 0,locationID,x,y
0,0,0,0
1,1,4,2
2,2,6,8
3,3,14,4
4,4,8,16


In [13]:
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0,userID,locationID,time
0,x,0,0
1,y,0,0
2,x,1,5
3,y,1,6
4,x,2,8


## Testing functions on data

Getting the probability of users $x$ and $y$ visiting various locations:

In [20]:
P("x", 0, data)

0.375

In [21]:
P("y", 1, data)

0.2857142857142857

Finding the most likely location of user $x$ and $y$:

In [22]:
ml_x = ML("x", data)
ml_x

0

In [23]:
ml_y = ML("y", data)
ml_y

1

Get coordinates for most likely locations:

In [24]:
coords_x = get_coords(ml_x, locations)
coords_x

(0, 0)

In [25]:
coords_y = get_coords(ml_y, locations)
coords_y

(4, 2)

Get distance between most likely locations:

In [26]:
dist(coords_x, coords_y)

4.47213595499958

Calculate spatial co-location rate:

In [27]:
SCoL("x", "y", data)

0.12499999999999999

Calculate co-location rate:

In [28]:
CoL("x", "y", data)

0.3333333333333333

In [29]:
cols = list()
for i in range (1, 10):
    dt = i
    cols.append(CoL("x", "y", data, dt))

## Testing WangColocation class

In [30]:
wc = WangColocation(data, locations)

In [31]:
wc.data["userID"].unique()

array(['x', 'y'], dtype=object)

In [32]:
wc.get_CoL("x", "y")

0.3333333333333333

In [33]:
wc.get_SCoL("x", "y")

0.12499999999999999