## Libraries & Constants

In [1]:
import numpy as np
import pandas as pd
import os

data_dir = '../Data/'
locations_dir = data_dir + 'Locations/'
datasets = ['Original', '10FPS']

metadata_file = '../Data/Dances_Metadata.csv'
metadata = pd.read_csv(metadata_file)

In [2]:
def compute_vertex_angle(prev_loc, vertex, next_loc):
    a = np.linalg.norm(next_loc - prev_loc)
    b = np.linalg.norm(vertex - prev_loc)
    c = np.linalg.norm(next_loc - vertex) 
    a = 0.00001 if np.isclose(a, 0) else a
    b = 0.00001 if np.isclose(b, 0) else b
    c = 0.00001 if np.isclose(c, 0) else c
    cos = (a**2 - b**2 - c**2) / (2 * b * c)
    cos = 1 if np.isclose(cos, 1) else cos
    cos = -1 if np.isclose(cos, -1) else cos
    return np.arccos(cos)

## Code

Two Datasets:
1. OriginalDataset
2. 10FPSDataset

In [13]:
for dataset_name in datasets:
    curr_dataset = []
    dataset_locs_dir = locations_dir + dataset_name + '/'
    locs_files = os.listdir(dataset_locs_dir)
    if '.DS_Store' in locs_files:
        locs_files.remove('.DS_Store')
    locs_files.sort()

    for i in range(len(locs_files)):
        curr_dance = locs_files[i].replace('WaggleDance_', '').replace('_Locations.csv', '')
        curr_bee_length = metadata['AvgBeeLength_Px'].loc[metadata['DanceID'] == curr_dance[:5]].values[0]
        locs_df = pd.read_csv(dataset_locs_dir + 'WaggleDance_' + curr_dance + '_Locations.csv')
        _, locs, dance_mvmt, _ = np.split(locs_df.to_numpy(), [1, 3, 4], axis=1)

        loc_ids = np.array([curr_dance + '_' + '{:04d}'.format(j) for j in range(3, len(locs) - 3)]) # FIX

        distances = [-100]
        vertex_angles = [-100]
        for j in range(1, len(locs)):
            d_k = np.linalg.norm(locs[j] - locs[j - 1]) # Distance betw. curr_pt and prev_pt
            d_k = 0.00001 if np.isclose(d_k, 0) else d_k
            distances.append(d_k)
            if j < len(locs) - 1:
                a_k = compute_vertex_angle(locs[j - 1], locs[j], locs[j + 1])
                vertex_angles.append(a_k)
        distances = distances
        vertex_angles = vertex_angles + [-100]

        mean_consec_distances = []
        mean_consec_vertex_angles = []
        for j in range(3, len(locs) - 3): # FIX
            mean_consec_distances.append(np.mean(distances[(j - 2):((j + 3) + 1)]))
            mean_consec_vertex_angles.append(np.mean(vertex_angles[(j - 2):((j + 2) + 1)]))

        mean_consec_distances = np.array(mean_consec_distances) / curr_bee_length
        mean_consec_vertex_angles = np.array(mean_consec_vertex_angles) / np.pi
        interactions = mean_consec_distances * mean_consec_vertex_angles
        dance_mvmt = dance_mvmt.reshape(-1)[3:-3].astype(int) # FIX
        dance_mvmt = np.where(dance_mvmt == 2, 0, dance_mvmt)

        curr_dataset.append(np.vstack([loc_ids, mean_consec_distances, mean_consec_vertex_angles, interactions, dance_mvmt]).T) 

    curr_dataset = pd.DataFrame(np.vstack(curr_dataset), columns=['LocID', 'MeanConsecDistance', 'MeanConsecVertAngle', 'Interaction', 'MovementClass'])
    curr_dataset.to_csv(data_dir + '/ClassificationDatasets/' + dataset_name + 'Dataset_v8.csv', index=False)