In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as matplot
import itertools
from os import listdir
from os.path import join

In [66]:
data_path = 'juelich/KO/'
experiments = [
    'ko-240-050-240',
    'ko-240-060-240',
    'ko-240-080-240',
    'ko-240-100-240',
    'ko-240-120-240',
    'ko-240-150-240',
    'ko-240-240-240',
    'ko-300-050-300',
    'ko-300-080-300',
    'ko-300-120-300',
    'ko-300-150-300'
];

In [56]:
def load(folders, key):
    frames = []
    for folder in folders:
        files = list(filter(lambda file: key in file, listdir(join(data_path, folder))))
        for file in files:
            data = pd.read_csv(join(data_path, folder, file), sep=' ', names=['p-id', 'timestep', 'x', 'y', 'e'], index_col=False, header=None, skiprows=0)
            frames.append((file, data))
            
    return frames

In [75]:
def extract_observation_area(df, area):
    x_meas = area[0]
    y_meas = area[1]
    width = area[2]
    height = area[3]
    
    
    # males = df[(df[Gender]=='Male') & (df[Year]==2014)]
    is_x = (df['x'] >= x_meas) & (df['x'] <= (x_meas + width))
    is_y = (df['y'] >= y_meas) & (df['y'] <= (y_meas + height))
    row_obs_area = df[is_x & is_y]
    # row_obs_area = ([row for index, row in df.iterrows() if (x_meas <= row['x'] <= (x_meas + width)) and
    #        (y_meas <= row['y'] <= (y_meas + height))])
    # if len(row_obs_area) == 0: # empty
    #    raise ValueError('** Check observation area! No pedestrians have stepped in the observation area**')

    return row_obs_area

In [136]:
def getTargetPercentiles(peds, targets):
    if len(peds)== 0:
        return None
    
    all_targets = np.unique(list(targets.values()))
    
    ids = list(peds['p-id'])
    total = len(ids)
    
    filtered_dict = {k:v for k,v in targets.items() if k in ids}
    used_targets = filtered_dict.values()
    
    # print(filtered_dict)
    # print(itertools.groupby(sorted(filtered_dict.values())))
    percentiles = {k:(len(list(v)) / total) for k, v in itertools.groupby(sorted(used_targets))}
    
    for k in all_targets:
        if k not in used_targets:
            percentiles[k] = 0.0
    
    return percentiles

In [117]:
frames = load(experiments, 'combined')
obs_area = [-240, 300, 240, 100]

In [161]:
all_percentiles = pd.DataFrame(columns=['A', 'B'])
pedsPerStep = pd.DataFrame(columns=['#peds'])
for experiment, frame in frames:
    pId2Target = dict.fromkeys(list(frame['p-id'].unique()))
    
    # fig, ax = matplot.subplots()
    
    # map p-ids to targets
    ex = []
    for pId, group in frame.groupby('p-id'):
        # ax.plot(group['x'], group['y'])
        ex.append(group['x'].iloc[-1])
        pId2Target[pId] = ('B' if group['x'].iloc[0] < 0 else 'A')
    
    # ax.grid()
    
    # extract #-of peds in observation area
    for timestep, group in frame.groupby('timestep'):
        peds = extract_observation_area(group, obs_area)
        
        percentiles = getTargetPercentiles(peds, pId2Target)
        all_percentiles = all_percentiles.append(percentiles, ignore_index=True)
        
        pedsPerStep = pedsPerStep.append({'#peds': len(peds) }, ignore_index=True)
    

In [163]:
a = pedsPerStep.groupby(['#peds'], as_index=False).size().to_frame('#frames').reset_index()
b = all_percentiles.groupby(['A', 'B'], as_index=False).size().to_frame('frames').reset_index()

a.to_csv('juelich/number-of-peds-per-frame.csv', index=False)
b.to_csv('juelich/percentiles.csv', index=False)