# Input data

In [1]:
speed_limit = 40
violation_threshold = 1

# Classic approach

In [2]:
# Dictionary {I: [V, C, T, X, Y]}
# I - object ID
# V - determines whether the objects is a violator
# C - violation time counter
# T - previos timestamp
# X - previous X coordinate
# Y - previos Y coordinate
ids = {}

with open("input/data.csv", 'r', newline='') as csvfile:
    next(csvfile)
    for line in csvfile:
        # Read line
        vals = line.split(',')
        i = vals[1]
        t, x, y = float(vals[0]), float(vals[3]), float(vals[4])

        # Add object if not present
        if i not in ids:
            ids[i] = [False, 0, t, x, y]
            continue
        
        # Skip object if it's a violator
        if ids[i][0]:
            continue

        # Calculate statistics
        dt = t - ids[i][2]
        dx = x - ids[i][3]
        dy = y - ids[i][4]
        s = (dx**2 + dy**2)**0.5
        v = s / dt * 3.6

        # Determine speed limit violation
        if v > speed_limit:
            ids[i][1] += dt
        else:
            ids[i][1] = 0

        # Determine counter exceeding
        if ids[i][1] >= violation_threshold:
            ids[i][0] = True
        
        # Update data
        ids[i][2] = t
        ids[i][3] = x
        ids[i][4] = y

In [3]:
vs = [v[0] for v in ids.values()]
vs_ids = [k for k, v in zip(ids.keys(), vs) if v]
for v in vs_ids:
    print(v)

00000000-0000-0000-0000-000000006508
00000000-0000-0000-0000-000000006629
00000000-0000-0000-0000-000000006467
00000000-0000-0000-0000-000000006647
00000000-0000-0000-0000-000000006605
00000000-0000-0000-0000-000000006660


# Using pandas

In [4]:
import pandas as pd

df = pd.read_csv("input/data.csv", index_col=False)

In [5]:
df

Unnamed: 0,TIMESTAMP,TRACK_ID,OBJECT_TYPE,X,Y,CITY_NAME
0,3.159672e+08,00000000-0000-0000-0000-000000000000,AV,2183.412276,724.388351,PIT
1,3.159672e+08,00000000-0000-0000-0000-000000006508,OTHERS,2159.402694,711.473119,PIT
2,3.159672e+08,00000000-0000-0000-0000-000000006293,OTHERS,2229.611668,762.482721,PIT
3,3.159672e+08,00000000-0000-0000-0000-000000006566,OTHERS,2240.571538,768.533941,PIT
4,3.159672e+08,00000000-0000-0000-0000-000000006585,OTHERS,2190.728600,730.459119,PIT
...,...,...,...,...,...,...
786,3.159672e+08,00000000-0000-0000-0000-000000006684,OTHERS,2161.384583,701.669894,PIT
787,3.159672e+08,00000000-0000-0000-0000-000000006508,OTHERS,2112.931861,673.248816,PIT
788,3.159672e+08,00000000-0000-0000-0000-000000006661,OTHERS,2251.211259,789.101345,PIT
789,3.159672e+08,00000000-0000-0000-0000-000000006681,OTHERS,2173.064191,712.178570,PIT


In [6]:
dfs = []
df_ids = df['TRACK_ID']
for i in set(df_ids):
    dfs.append((i, df[df_ids == i]))

In [7]:
dfs[0]

('00000000-0000-0000-0000-000000006467',
         TIMESTAMP                              TRACK_ID OBJECT_TYPE  \
 10   3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 25   3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 40   3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 56   3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 72   3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 90   3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 106  3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 119  3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 134  3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 149  3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 164  3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 179  3.159672e+08  00000000-0000-0000-0000-000000006467      OTHERS   
 195  3.159672e+08  000

In [8]:
viols = []
for i, d in dfs:
    diff = d[['TIMESTAMP', 'X', 'Y']].diff()
    diff['S'] = (diff['X']**2 + diff['Y']**2)**0.5
    diff['V'] = diff['S'] / diff['TIMESTAMP'] * 3.6
    
    mask = diff['V'] > 40
    group = mask.ne(mask.shift()).cumsum()
    groups = diff[mask].groupby(group)

    overflow = groups['TIMESTAMP'].agg('sum') >= 1
    if overflow.any():
        viols.append(i)

In [9]:
viols

['00000000-0000-0000-0000-000000006467',
 '00000000-0000-0000-0000-000000006605',
 '00000000-0000-0000-0000-000000006647',
 '00000000-0000-0000-0000-000000006660',
 '00000000-0000-0000-0000-000000006629',
 '00000000-0000-0000-0000-000000006508']

# Using generators

In [10]:
def trace():
    violation_time = 0
    t_prev, x_prev, y_prev = 0, 0, 0
    while True:
        t, x, y = yield

        if t_prev != 0:
            dt = t - t_prev
            dx = x - x_prev
            dy = y - y_prev

            distance = (dx**2 + dy**2)**0.5
            speed = distance / dt * 3.6
            
            if speed > speed_limit:
                violation_time += dt
            else:
                violation_time = 0

        yield violation_time >= violation_threshold
        t_prev, x_prev, y_prev = t, x, y

In [11]:
def violators(file):
    traces = {}
    with open(file, 'r', newline='') as csvfile:
        next(csvfile)
        for line in csvfile:
            # Read and parse line
            vals = line.split(',')
            id = vals[1]
            t, x, y = float(vals[0]), float(vals[3]), float(vals[4])

            # Add new trace
            if id not in traces:
                tr = trace(); next(tr)
                traces[id] = tr

            tr = traces[id]

            # Skip violators
            if not tr:
                continue
            
            violator = tr.send((t, x, y))
            next(tr)
            
            if violator:
                # Invalidate trace
                traces[id] = False
                yield id

In [12]:
for v in violators("input/data.csv"):
    print(v)

00000000-0000-0000-0000-000000006467
00000000-0000-0000-0000-000000006647
00000000-0000-0000-0000-000000006605
00000000-0000-0000-0000-000000006629
00000000-0000-0000-0000-000000006508
00000000-0000-0000-0000-000000006660


# Assert

In [13]:
set(vs_ids) == set(violators("input/data.csv"))

True