In [1]:
import pandas as pd


def read_label_map(label_map_path):

    item_id = None
    item_name = None
    items = {}

    with open(label_map_path, "r") as file:
        for line in file:
            line.replace(" ", "")
            if line == "item{":
                pass
            elif line == "}":
                pass
            elif "label_id" in line:
                item_id = int(line.split(":", 1)[1].strip()) - 1
            elif "name" in line:
                item_name = line.split(":", 1)[1].replace("'", "").replace('"', "").strip()

            if item_id is not None and item_name is not None:
                items[item_name] = item_id
                item_id = None
                item_name = None

    return items

In [2]:
activities = read_label_map(r"D:\Projects\ava\ava_v2.1\ava_action_list_v2.1.pbtxt")

In [9]:
print({f'{k}': v.split(" ")[0] for v, k in activities.items()})

{'0': 'bend/bow', '1': 'crawl', '2': 'crouch/kneel', '3': 'dance', '4': 'fall', '5': 'get', '6': 'jump/leap', '7': 'lie/sleep', '8': 'martial', '9': 'run/jog', '10': 'sit', '11': 'stand', '12': 'swim', '13': 'walk', '14': 'answer', '15': 'brush', '16': 'carry/hold', '17': 'catch', '18': 'chop', '19': 'climb', '20': 'clink', '21': 'close', '22': 'cook', '23': 'cut', '24': 'dig', '25': 'dress/put', '26': 'drink', '27': 'drive', '28': 'eat', '29': 'enter', '30': 'exit', '31': 'extract', '32': 'fishing', '33': 'hit', '34': 'kick', '35': 'lift/pick', '36': 'listen', '37': 'open', '38': 'paint', '39': 'play', '40': 'play', '41': 'play', '42': 'point', '43': 'press', '44': 'pull', '45': 'push', '46': 'put', '47': 'read', '48': 'ride', '49': 'row', '50': 'sail', '51': 'shoot', '52': 'shovel', '53': 'smoke', '54': 'stir', '55': 'take', '56': 'text', '57': 'throw', '58': 'touch', '59': 'turn', '60': 'watch', '61': 'work', '62': 'write', '63': 'fight/hit', '64': 'give/serve', '65': 'grab', '66': 

In [11]:
target_df = pd.read_csv(r"D:\Projects\ava\ava_v2.1\ava_val_v2.1.csv", names=['id', 'timestamp', 'x1', 'y1', 'x2', 'y2', 'action_id', 'person_id'])
stochastic_df = pd.read_csv(r"D:\Projects\ava\ava_baseline_detections_val_v2.1\ava_baseline_detections_val_v2.1.csv", names=['id', 'timestamp', 'x1', 'y1', 'x2', 'y2', 'action_id', 'score'])

In [28]:
import numpy as np

def reconstruct_prob_vector(k, p_k, num_classes, alpha=0.3):
    """
    k           : index of predicted class (0-based)
    p_k         : known probability of the predicted class
    num_classes : total number of classes
    alpha       : Dirichlet concentration parameter
    """
    if not (0 < p_k < 1):
        p_k = abs(p_k - 5e-5)

    rest = num_classes - 1
    q = np.random.dirichlet([alpha] * rest)
    p = np.zeros(num_classes)
    p[k] = p_k
    p_rest = (1 - p_k) * q
    p[np.arange(num_classes) != k] = p_rest

    return p

In [30]:
from tqdm.notebook import tqdm

tokenized_traces = []
sk_traces = []
n_classes = len(activities)
for trace_id in tqdm(target_df["id"].unique()):
    tokenized_traces.append([x - 1 for x in target_df[target_df["id"] == trace_id]["action_id"].to_numpy()])
    sk_activities = [x - 1 for x in stochastic_df[stochastic_df["id"] == trace_id]["action_id"].to_numpy()]
    sk_score = stochastic_df[stochastic_df["id"] == trace_id]["score"].to_numpy()
    sk_traces.append([reconstruct_prob_vector(act, score, n_classes) for act, score in zip(sk_activities, sk_score)])

  0%|          | 0/64 [00:00<?, ?it/s]

In [32]:
from itertools import chain
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output=False)
encoder.fit(np.array(list(chain.from_iterable(tokenized_traces))).reshape(-1, 1))
one_hot_ava = [encoder.transform(np.array(x).reshape(-1, 1)) for x in tokenized_traces]

In [36]:
import pickle as pkl

with open("../data/pickles/ava_unified.pkl", "wb") as f:
    pkl.dump({'target': one_hot_ava, 'stochastic': [np.array(x) for x in sk_traces]}, f)