# Sign selection - DO NOT UPLOAD TO FINAL REPO

This notebook finds out which videos a specific sign is present in. This is useful for when we want to check the sign spotting for a specific video and we need a target sign as an example.

In [1]:
# Imports
import pympi
import os
import pandas as pd

import importlib

# Keep python tools up to date
from tools import tools, constants
importlib.reload(tools)
importlib.reload(constants)

# Import all functions from the tools
from tools.tools import*
from tools.constants import PATHS # Path constants

In [2]:
# Root where all the annotated .eaf sign files are present
dataset_root = PATHS['cngt_vids_and_eaf']

dataset_anns_path = PATHS['dataset_anns']

# List the .eaf files in the root directory to investigate
anns_in_dir = [file for file in os.listdir(dataset_root) if file.endswith('.eaf')]

if os.path.exists(dataset_anns_path):
    print('Loading annotations...')
    anns_with_tiers = load_dict(dataset_anns_path)
else:
    print('Making annotations without manual simultaneity...')
    anns_with_tiers = {}
    for i, ann_file in enumerate(anns_in_dir):
        print(i, end = '\r')
        # Read in the Eaf file 
        eaf_file = pympi.Elan.Eaf(os.path.join(dataset_root, ann_file))

        # Get the glosses and mouthings of the file
        anns_dict, _ = get_gloss_vals(eaf_file, True)
        anns_dict = man_sim_and_hand_dist(anns_dict, manual_sim = False)

        # Store the glosses, mouthings and tiers
        anns_with_tiers[ann_file] = anns_dict
    print('Storing...')
    with open(dataset_anns_path, 'wb') as f:
        pickle.dump(anns_with_tiers, f)
        
# Signbank dictionary info
df = pd.read_csv(PATHS['signbank_dictionary_info'])

id_split = load_dict(PATHS['CNGT_split_ids'])

Loading annotations...


In [3]:
# Printing which signers and videos belong in the train and test set
train_ids = []
test_ids = []
for key in id_split:
    # Change format from CNGTyyyy_Sxxx -> Sxxx_CNGTyyyy.eaf
    # So it matches the annotation file names (.eaf files)
    reorder = sorted(set(['_'.join(x.split('_')[::-1]) + '.eaf' for x in id_split[key]]))
    if key == 'Train':
        train_ids = reorder
    else:
        test_ids = reorder 

In [4]:
# For a target gloss, we find all videos in the test set that contain it
# So we can use one of those videos for the sliding window during sign spotting
target_gloss = 'NU-A'
for video_id in anns_with_tiers:
    gloss_dict = anns_with_tiers[video_id]
    for gloss in gloss_dict:
        if gloss == target_gloss and video_id in test_ids:
            print(video_id, gloss, gloss_dict[gloss])

S011_CNGT0215.eaf NU-A [(75800, 76840, 'GlossR S1'), (83480, 84480, 'GlossR S1')]
S011_CNGT0217.eaf NU-A [(5480, 6040, 'GlossR S1'), (21920, 22400, 'GlossR S1')]
S011_CNGT0223.eaf NU-A [(138260, 138590, 'GlossR S1'), (145930, 146110, 'GlossR S1'), (251600, 251680, 'GlossR S1'), (252360, 252640, 'GlossR S1')]
S012_CNGT0208.eaf NU-A [(88200, 88360, 'GlossR S2')]
S012_CNGT0211.eaf NU-A [(36400, 36520, 'GlossR S2')]
S012_CNGT0215.eaf NU-A [(65760, 65920, 'GlossR S2'), (69560, 69840, 'GlossR S2'), (114120, 114200, 'GlossR S2'), (114560, 114640, 'GlossR S2'), (352720, 352880, 'GlossR S2')]
S012_CNGT0217.eaf NU-A [(21080, 21200, 'GlossR S2'), (70040, 70440, 'GlossR S2'), (71160, 71480, 'GlossR S2'), (74640, 74840, 'GlossR S2'), (76400, 76480, 'GlossR S2'), (76800, 76880, 'GlossR S2')]
S019_CNGT0386.eaf NU-A [(124160, 124280, 'GlossL S2'), (105160, 105280, 'GlossR S2')]
S019_CNGT0390.eaf NU-A [(106360, 106480, 'GlossR S2')]
S020_CNGT0370.eaf NU-A [(46520, 46840, 'GlossR S1')]
S020_CNGT0387.eaf