# Vehicle Speed (target) Aggregation: 'Moving Average' Strategy

* save directory: `../datasets/per-vehicle-moving-average/<aggregation type>/window-<some window choice>-<datestring>`

In [1]:
import shap
import pandas as pd
import numpy as np

import time

import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder

from glob import glob
import os
import joblib
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

import multiprocessing as mp
from datetime import datetime
import itertools

In [2]:
import psutil
    
ram_gb = psutil.virtual_memory().total / 2**30 # total physical memory in bytes
print(f"RAM: {ram_gb:.2f} GB")

RAM: 251.54 GB


## Read from source

In [3]:
# get paths
root = '../datasets/processed'
# source_date = '2022-05-30'
# source_date = '2022-05-24'
source_date = '2023-06-15'
glob_path = os.path.join(root, source_date, '*.csv')
filepaths = glob(glob_path)

# check
filepaths[0]

'../datasets/processed/2023-06-15/123200872603.csv'

In [4]:
def dedup_based_on_trace_data(df, verbose=False):
    landuse_cols = [
        col for col in df.columns if col.startswith("pix_")
    ]
    trace_columns = [i for i in df.columns if i not in landuse_cols]
    init_count = df.shape[0]
    df = df.drop_duplicates(subset=trace_columns)
    new_count = df.shape[0]
    
    if verbose:
        print(f"Before: {init_count}, After: {new_count}")
        
    return df

In [5]:
def get_periods(row, ref_index, thresh):
    """Return period counts for thresh"""
    timedeltas = (row.name - ref_index).total_seconds()
    # >= 0 includes self
    periods = np.sum((timedeltas <= thresh) & (timedeltas >= 0))
    return periods

def get_agg_speed(row, group, thresh, agg='mean'):
    """Return a aggregated speed value according to thresh and agg."""
    timedeltas = (row.name - group.index).total_seconds()
    mask = (timedeltas <= thresh) & (timedeltas >= 0)
    # >= 0 includes self
    if agg=='mean':
        return group[mask]['instant_speed'].mean()
    elif agg=='median':
        return group[mask]['instant_speed'].median()    
    else:
        raise "Enter valid agg."

## Parallelize

In [6]:
def mp_get_periods(chunk, ref_index, thresh):
    chunk['num_periods'] = chunk.apply(
        lambda row: get_periods(row, ref_index, thresh=thresh), axis=1)
    return chunk

def mp_agg_speed(chunk, full_df, thresh, agg):
    chunk['agg_speed'] = chunk.apply(
        lambda row: get_agg_speed(row, full_df, thresh=thresh, agg=agg), axis=1)
    return chunk

# parallelize agg function
def parallelize_get_periods(df, func, thresh):
    """Parallelize mp_elevation and mp_building_counts functions"""
    ref_index = df.index
    cpus = mp.cpu_count()
    df_chunks = np.array_split(df, cpus)
    pool = mp.Pool(processes=cpus)
    chunk_processes = [pool.apply_async(func, args=(chunk, ref_index, thresh)) for chunk in df_chunks]
    df_results = []
    for chunk in chunk_processes:
        res = chunk.get()
        df_results.append(res)
    df_out = pd.concat(df_results)
    return df_out

def parallelize_agg_speed(df, func, thresh, agg):
    cpus = mp.cpu_count()
    df_chunks = np.array_split(df, cpus)
    pool = mp.Pool(processes=cpus)
    chunk_processes = [pool.apply_async(func, args=(chunk, df, thresh, agg)) for chunk in df_chunks]
    df_results = []
    for chunk in chunk_processes:
        res = chunk.get()
        df_results.append(res)
    df_out = pd.concat(df_results)
    return df_out


In [8]:
## use this for progress_apply method:
# tqdm.pandas()
# date = '2022-05-27' # alternatively use now date for new runs
# recompute speed through "moving average" method, get median/mean speed in window
# saves a new csv with columns for number of periods (num_periods) and agg vehicle speed (agg_speed)

date = str(datetime.date(datetime.now())) # now date
print(date)

# aggs = ['median', 'mean']
# lookback_windows = [30, 60, 120] # seconds, can be changed

aggs = ['mean']
lookback_windows = [60]

iters = list(itertools.product(aggs, lookback_windows))

for iter_ in iters:
    agg = iter_[0]
    lookback_window = iter_[1]
    
    for index, path in tqdm(enumerate(filepaths)):
        name = path.split('/')[-1].split('.')[0]
        group = pd.read_csv(path)

        group = dedup_based_on_trace_data(group, verbose=True)

        # prep save paths
        save_dir = f'../datasets/per-vehicle-moving-average/{agg}-window-{lookback_window}-{date}'
        
        os.makedirs(save_dir, exist_ok=True)
        save_path = os.path.join(save_dir, f"{name}.csv")
        
        if not os.path.exists(save_path):
            
            # get datetime index
            group['datetime'] = pd.to_datetime(group['datetime'])
            group = group.sort_values(by='datetime')
            group = group.set_index('datetime')

            group = parallelize_get_periods(
                df=group, 
                func=mp_get_periods, 
                thresh=lookback_window)

            group = parallelize_agg_speed(
                df=group, 
                func=mp_agg_speed, 
                thresh=lookback_window, 
                agg=agg) 

            group = group.reset_index()

            # save
            group.to_csv(save_path, index=False)

2023-06-17


0it [00:00, ?it/s]

Before: 53175, After: 53175


1it [00:34, 34.16s/it]

Before: 153493, After: 153492


2it [03:36, 121.42s/it]

Before: 272253, After: 272253


3it [12:34, 311.47s/it]

Before: 13690, After: 13690


4it [12:42, 191.58s/it]

Before: 96358, After: 96357


5it [14:05, 152.43s/it]

Before: 12663, After: 12663


6it [14:13, 103.41s/it]

Before: 317578, After: 317577


8it [27:11, 227.74s/it]

Before: 170187, After: 170187


9it [31:00, 228.38s/it]

Before: 128042, After: 128042


10it [33:14, 199.00s/it]

Before: 391114, After: 391114


11it [50:42, 458.86s/it]

Before: 49367, After: 49367


12it [51:12, 328.53s/it]

Before: 227249, After: 227249


13it [56:47, 330.43s/it]

Before: 212056, After: 212056


14it [1:01:00, 307.13s/it]

Before: 192439, After: 192439


15it [1:04:29, 277.57s/it]

Before: 145981, After: 145981


16it [1:06:28, 229.71s/it]

Before: 187770, After: 187770


17it [1:09:46, 220.35s/it]

Before: 316222, After: 316222


18it [1:19:10, 323.40s/it]

Before: 143793, After: 143793


19it [1:21:06, 261.10s/it]

Before: 14779, After: 14779


20it [1:21:11, 184.25s/it]

Before: 27783, After: 27783


21it [1:21:19, 131.47s/it]

Before: 287460, After: 287460


22it [1:29:23, 237.35s/it]

Before: 374084, After: 374084


23it [1:43:35, 421.59s/it]

Before: 165267, After: 165267


24it [1:46:16, 343.51s/it]

Before: 100613, After: 100613


25it [1:47:14, 257.83s/it]

Before: 29260, After: 29260


26it [1:47:23, 183.18s/it]

Before: 125042, After: 125042


27it [1:48:56, 156.02s/it]

Before: 170867, After: 170867


28it [1:52:01, 164.90s/it]

Before: 369936, After: 369936


29it [2:06:12, 370.55s/it]

Before: 43492, After: 43492


30it [2:06:27, 263.88s/it]

Before: 153200, After: 153200


31it [2:08:49, 227.45s/it]

Before: 32065, After: 32065


32it [2:09:00, 162.35s/it]

Before: 212795, After: 212795


33it [2:13:29, 194.41s/it]

Before: 34691, After: 34691


34it [2:13:39, 139.26s/it]

Before: 162623, After: 162623


35it [2:16:14, 143.71s/it]

Before: 224528, After: 224528


36it [2:21:27, 194.78s/it]

Before: 490483, After: 490482


37it [2:46:31, 587.28s/it]

Before: 132592, After: 132592


38it [2:48:17, 443.04s/it]

Before: 146755, After: 146755


39it [2:50:26, 348.86s/it]

Before: 45531, After: 45531


40it [2:50:41, 248.63s/it]

Before: 321432, After: 321432


41it [3:01:19, 365.60s/it]

Before: 56680, After: 56680


42it [3:01:41, 262.48s/it]

Before: 214493, After: 214493


43it [3:06:20, 267.23s/it]

Before: 67336, After: 67336


44it [3:06:48, 195.43s/it]

Before: 109044, After: 109044


45it [3:07:59, 158.10s/it]

Before: 47270, After: 47270


46it [3:08:16, 115.98s/it]

Before: 81590, After: 81590


47it [3:08:57, 93.26s/it] 

Before: 57386, After: 57386


48it [3:09:18, 71.68s/it]

Before: 404677, After: 404677


49it [3:26:05, 352.47s/it]

Before: 324803, After: 324803


50it [3:36:59, 442.71s/it]

Before: 179501, After: 179500


51it [3:40:21, 370.57s/it]

Before: 290739, After: 290739


52it [3:48:45, 410.45s/it]

Before: 312746, After: 312746


53it [3:58:39, 465.64s/it]

Before: 137940, After: 137940


54it [4:00:35, 360.91s/it]

Before: 36358, After: 36358


55it [4:00:47, 256.12s/it]

Before: 127711, After: 127711


56it [4:02:30, 210.10s/it]

Before: 252590, After: 252590


57it [4:09:08, 266.47s/it]

Before: 251954, After: 251954


58it [4:15:52, 307.67s/it]

Before: 123140, After: 123140


59it [4:17:26, 243.62s/it]

Before: 156673, After: 156673


60it [4:19:57, 215.83s/it]

Before: 92012, After: 92012


61it [4:20:47, 166.22s/it]

Before: 12477, After: 12477


62it [4:20:52, 117.81s/it]

Before: 194245, After: 194245


63it [4:24:49, 153.60s/it]

Before: 117706, After: 117706


64it [4:26:14, 133.05s/it]

Before: 231670, After: 231670


65it [4:31:59, 196.48s/it]

Before: 14421, After: 14421


66it [4:32:04, 139.16s/it]

Before: 103555, After: 103555


67it [4:33:12, 117.85s/it]

Before: 92431, After: 92431


68it [4:34:05, 98.26s/it] 

Before: 57038, After: 57038


69it [4:34:27, 75.46s/it]

Before: 149004, After: 149003


70it [4:36:44, 93.83s/it]

Before: 66860, After: 66860


71it [4:37:11, 73.97s/it]

Before: 347768, After: 347768


72it [4:49:40, 276.24s/it]

Before: 281820, After: 281820


73it [4:57:37, 336.70s/it]

Before: 394317, After: 394316


74it [5:13:27, 520.67s/it]

Before: 203981, After: 203981


75it [5:17:21, 434.72s/it]

Before: 185545, After: 185544


76it [5:20:34, 362.08s/it]

Before: 139166, After: 139166


77it [5:22:22, 285.84s/it]

Before: 118430, After: 118430


78it [5:23:40, 223.60s/it]

Before: 175497, After: 175497


79it [5:26:35, 209.03s/it]

Before: 95374, After: 95374


80it [5:27:27, 161.88s/it]

Before: 351820, After: 351820


81it [5:39:18, 326.58s/it]

Before: 150916, After: 150916


82it [5:41:26, 267.05s/it]

Before: 114836, After: 114836


83it [5:42:40, 208.95s/it]

Before: 240015, After: 240015


84it [5:48:16, 247.01s/it]

Before: 438, After: 438


85it [5:48:18, 173.50s/it]

Before: 98483, After: 98483


86it [5:49:14, 138.54s/it]

Before: 64648, After: 64648


87it [5:49:40, 104.66s/it]

Before: 89053, After: 89052


88it [5:50:26, 86.89s/it] 

Before: 13463, After: 13463


89it [5:50:30, 62.24s/it]

Before: 190795, After: 190795


90it [5:54:01, 106.67s/it]

Before: 137907, After: 137907


91it [5:55:52, 108.05s/it]

Before: 262, After: 262


92it [5:55:54, 76.21s/it] 

Before: 159901, After: 159901


93it [5:58:24, 98.38s/it]

Before: 112622, After: 112622


94it [5:59:37, 90.77s/it]

Before: 245180, After: 245180


95it [6:05:24, 167.75s/it]

Before: 56334, After: 56334


96it [6:05:45, 123.55s/it]

Before: 312633, After: 312633


97it [6:15:30, 261.99s/it]

Before: 367639, After: 367639


98it [6:29:00, 426.53s/it]

Before: 419988, After: 419988


99it [6:46:51, 619.90s/it]

Before: 226619, After: 226619


100it [6:52:06, 528.25s/it]

Before: 23454, After: 23454


101it [6:52:13, 372.12s/it]

Before: 143652, After: 143652


102it [6:54:17, 297.41s/it]

Before: 205443, After: 205443


103it [6:58:32, 284.68s/it]

Before: 169357, After: 169357


104it [7:01:26, 251.57s/it]

Before: 49151, After: 49151


105it [7:01:43, 181.10s/it]

Before: 280946, After: 280946


106it [7:09:28, 266.53s/it]

Before: 30715, After: 30715


107it [7:09:38, 189.40s/it]

Before: 211576, After: 211576


108it [7:14:18, 216.56s/it]

Before: 13153, After: 13153


109it [7:14:23, 153.22s/it]

Before: 45496, After: 45496


110it [7:14:39, 112.14s/it]

Before: 204566, After: 204566


111it [7:18:59, 156.35s/it]

Before: 103498, After: 103498


112it [7:20:03, 128.59s/it]

Before: 91393, After: 91393


113it [7:20:54, 105.52s/it]

Before: 130442, After: 130442


114it [7:22:36, 104.28s/it]

Before: 13602, After: 13602


115it [7:22:41, 74.52s/it] 

Before: 179022, After: 179022


116it [7:25:49, 108.66s/it]

Before: 442004, After: 442003


117it [7:44:59, 421.01s/it]

Before: 224922, After: 224922


118it [7:50:01, 385.20s/it]

Before: 62541, After: 62541


119it [7:50:26, 277.30s/it]

Before: 80682, After: 80682


120it [7:51:06, 206.02s/it]

Before: 247475, After: 247475


121it [7:57:22, 257.05s/it]

Before: 270195, After: 270195


122it [8:04:41, 311.73s/it]

Before: 8142, After: 8142


123it [8:04:45, 219.41s/it]

Before: 379762, After: 379762


124it [8:19:43, 422.86s/it]

Before: 397525, After: 397525


125it [8:34:24, 560.17s/it]

Before: 126514, After: 126514


126it [8:35:49, 417.83s/it]

Before: 209957, After: 209957


127it [8:39:50, 364.83s/it]

Before: 53510, After: 53510


128it [8:40:10, 261.22s/it]

Before: 207655, After: 207655


129it [8:44:01, 252.12s/it]

Before: 83528, After: 83528


130it [8:44:41, 188.57s/it]

Before: 359, After: 359


131it [8:44:43, 132.60s/it]

Before: 106092, After: 106092


132it [8:45:47, 112.02s/it]

Before: 236928, After: 236928


133it [8:50:56, 171.20s/it]

Before: 287500, After: 287500


134it [8:58:16, 251.65s/it]

Before: 386222, After: 386221


135it [9:11:57, 422.62s/it]

Before: 336137, After: 336137


136it [9:21:54, 474.74s/it]

Before: 40518, After: 40518


137it [9:22:07, 336.43s/it]

Before: 42605, After: 42605


138it [9:22:22, 239.97s/it]

Before: 53712, After: 53712


139it [9:22:42, 173.91s/it]

Before: 266988, After: 266988


140it [9:29:22, 241.59s/it]

Before: 229132, After: 229132


141it [9:34:18, 258.00s/it]

Before: 347223, After: 347223


142it [9:45:25, 380.77s/it]

Before: 346246, After: 346246


143it [9:57:03, 476.07s/it]

Before: 19639, After: 19639


144it [9:57:10, 335.12s/it]

Before: 283557, After: 283557


145it [10:04:41, 369.88s/it]

Before: 278434, After: 278434


146it [10:11:12, 376.30s/it]

Before: 351174, After: 351174


147it [10:22:04, 459.14s/it]

Before: 81233, After: 81233


148it [10:22:43, 332.83s/it]

Before: 394583, After: 394583


149it [10:36:15, 476.79s/it]

Before: 25972, After: 25972


150it [10:36:24, 336.37s/it]

Before: 6524, After: 6524


151it [10:36:27, 236.49s/it]

Before: 126137, After: 126137


152it [10:37:56, 192.14s/it]

Before: 366705, After: 366705


153it [10:50:11, 354.90s/it]

Before: 302441, After: 302440


154it [10:58:36, 400.04s/it]

Before: 100503, After: 100503


155it [10:59:35, 297.74s/it]

Before: 339318, After: 339318


156it [11:10:09, 398.46s/it]

Before: 4890, After: 4890


157it [11:10:12, 280.06s/it]

Before: 13580, After: 13580


158it [11:10:18, 197.58s/it]

Before: 223, After: 223


159it [11:10:20, 138.93s/it]

Before: 178869, After: 178869


160it [11:13:14, 149.50s/it]

Before: 98318, After: 98318


161it [11:14:11, 121.83s/it]

Before: 148222, After: 148222


162it [11:16:17, 122.95s/it]

Before: 35860, After: 35860


163it [11:16:28, 89.52s/it] 

Before: 247907, After: 247907


164it [11:22:32, 171.74s/it]

Before: 186148, After: 186148


165it [11:26:08, 185.12s/it]

Before: 67950, After: 67950


166it [11:26:38, 138.71s/it]

Before: 312524, After: 312524


167it [11:36:08, 268.02s/it]

Before: 150214, After: 150214


168it [11:38:19, 226.94s/it]

Before: 180217, After: 180217


169it [11:41:32, 216.64s/it]

Before: 197357, After: 197357


170it [11:45:24, 221.29s/it]

Before: 140370, After: 140370


171it [11:47:19, 189.54s/it]

Before: 220651, After: 220651


172it [11:52:13, 220.73s/it]

Before: 26172, After: 26172


173it [11:52:22, 157.17s/it]

Before: 110710, After: 110710


174it [11:53:35, 131.84s/it]

Before: 62760, After: 62760


175it [11:54:00, 99.96s/it] 

Before: 265795, After: 265795


176it [12:01:15, 200.43s/it]

Before: 463, After: 463


177it [12:01:17, 140.90s/it]

Before: 63414, After: 63414


178it [12:01:43, 106.48s/it]

Before: 272981, After: 272981


179it [12:09:12, 209.06s/it]

Before: 351865, After: 351865


180it [12:21:04, 360.10s/it]

Before: 196755, After: 196755


181it [12:25:04, 323.89s/it]

Before: 285948, After: 285948


182it [12:33:00, 369.81s/it]

Before: 113507, After: 113507


183it [12:34:25, 284.10s/it]

Before: 81937, After: 81937


184it [12:35:10, 212.53s/it]

Before: 243533, After: 243533


185it [12:40:52, 251.24s/it]

Before: 83650, After: 83650


186it [12:41:32, 188.09s/it]

Before: 13543, After: 13543


187it [12:41:37, 133.16s/it]

Before: 202040, After: 202040


188it [12:45:51, 169.36s/it]

Before: 129920, After: 129920


189it [12:47:34, 149.51s/it]

Before: 368661, After: 368661


190it [13:00:45, 341.88s/it]

Before: 299927, After: 299927


191it [13:11:39, 435.44s/it]

Before: 175370, After: 175370


192it [13:14:50, 362.06s/it]

Before: 299559, After: 299559


193it [13:23:53, 416.50s/it]

Before: 119553, After: 119553


194it [13:25:14, 315.82s/it]

Before: 333925, After: 333925


195it [13:37:38, 444.08s/it]

Before: 272861, After: 272861


196it [13:44:22, 432.21s/it]

Before: 34752, After: 34752


197it [13:44:33, 305.89s/it]

Before: 276715, After: 276715


198it [13:51:04, 331.34s/it]

Before: 275829, After: 275829


199it [13:58:01, 357.16s/it]

Before: 132807, After: 132807


200it [13:59:40, 251.90s/it]
