# Pandas version of mobilitymatrix.ipynb

In [None]:
import pandas as pd
import numpy as np

### Import data

In [None]:
%%time
# read data: specify which columns to import, and use time column for index
df = pd.read_csv(
    "../data/calldata/synthetic.txt",
    sep="|",
    names=["time", "user", "zip1", "zip2", "lat", "lon"],
    usecols=["time", "user", "lat", "lon"],
    index_col="time",
)

In [None]:
df.head()

In [None]:
df.count()

In [None]:
df.dtypes

In [None]:
print(f"ram: {df.memory_usage(deep=True).sum()/2**30} Gig")

### Use smaller data types

In [None]:
%%time
# factorize lat, lon, user_ids (ie replace values with unique index)
df.lat, lats = pd.factorize(df.lat)
df.lon, lons = pd.factorize(df.lon)
df.user, user_ids = pd.factorize(df.user)
# combine lat/lon into a single antenna_id column & re-factorize
df.lat = df.lat + len(lats) * df.lon
df["antenna_id"], antenna_locations = pd.factorize(df.lat)
# drop lat, lon columns
df.drop(["lat", "lon"], axis=1, inplace=True)

In [None]:
%%time
# use smallest integer type that can represent all values in each column to save memory
for name in ["antenna_id", "user"]:
    df[name] = pd.to_numeric(df[name], downcast="unsigned")

In [None]:
%%time
# ensure rows are sorted by time
if not df.index.is_monotonic_increasing:
    df.sort_index(inplace=True)

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
print(f"ram: {df.memory_usage(deep=True).sum()/2**30} Gig")

### Construct mobility matrix

In [None]:
# construction of antenna_id series grouped by user (lazy evaluation: no work done here)
grouped_by_user = df.groupby("user", sort=False)["antenna_id"]

In [None]:
%%time
# construct mobility matrix
n = len(antenna_locations)
antenna_map = np.zeros((n, n), dtype="int64")
# iterate over antenna_ids for each user
for user, data in grouped_by_user:
    # get underlying numpy array from pandas Series
    indices = data.array
    # for each antenna transition i->j, increment [i,j] element of antenna_map
    np.add.at(antenna_map, (indices[:-1], indices[1:]), 1)