This file filters the test dataset from Momask T2M based on positive key words (=keep) and negative key words (=drop). Make sure, that the T2M dataset is accurately saved as described in the ReadMe of Momask.

The file outputs a new "test.txt" file, which can replace the original T2M "test.txt" for evaluation.

In [None]:
# import statements
import os
from tqdm import tqdm

In [None]:
# Change the following and uncomment the lists you want to use.
PROJECT_ROOT_DIR = "/Users/annel/Documents/Github Repositories/DigitalHumans" # the project root directory

# left-right specification:
# test_file_name = "left_right"
# positive_key_word_list = ["left", "right"]
# negative_key_word_list = []

# emotions:
# test_file_name = "emotions"
# positive_key_word_list = ["happy", "sad", "angry", "fearful", "surprised", "disgusted", "anxious", "confident", "jealous", "proud", "excited", "bored", "love", "hopeful", "guilty", "relieved", "ashamed", "embarrassed", "content", "frustrated", "happily", "sadly", "angrily", "fearfully", "surprisingly", "disgustedly", "anxiously", "confidently", "jealously", "proudly", "excitedly", "boredly", "lovingly", "hopefully", "guiltily", "relievedly", "ashamedly", "embarrassingly", "contentedly", "frustratedly"]
# negative_key_word_list = []

# adjectives:
# test_file_name = "adjectives"
# positive_key_word_list = ["fast", "slow", "graceful", "clumsy", "steady", "erratic", "quick", "lethargic", "smooth", "rough", "agile", "awkward", "nimble", "brisk", "sluggish", "hurried", "relaxed", "sprightly", "jerky", "fluid", "deliberate", "hasty", "cautious", "vigorous", "lively", "mechanical", "frenzied", "graceful", "haphazard", "energetic", "purposeful", "tentative",  "slowly", "gracefully", "clumsily", "steadily", "erratically", "quickly", "lethargically", "smoothly", "roughly", "agilely", "awkwardly", "nimbly", "briskly", "sluggishly", "hurriedly", "relaxedly", "sprightly", "jerkily", "fluidly", "deliberately", "hastily", "cautiously", "vigorously", "lively", "mechanically", "frenziedly", "gracefully", "haphazardly", "energetically", "purposefully", "tentatively"]
# negative_key_word_list = []

# limbs:
# test_file_name = "limbs"
# positive_key_word_list = ["head", "torso", "arm", "leg", "hand", "foot", "finger", "toe", "thumb", "wrist", "ankle", "elbow", "knee", "forearm", "upper arm", "shin", "calf", "thigh", "palm", "heel", "shoulder", "hip", "neck", "chest", "back", "abdomen", "waist", "groin", "buttocks", "nape", "scalp", "forehead", "cheek", "chin", "jaw", "nose", "mouth", "ear", "eye", "eyebrow", "eyelid", "lip", "tongue"]
# negative_key_word_list = []

# High level Yoga poses:
# test_file_name = "yoga_low_level"
# positive_key_word_list = ["Mountain", "Downward-Facing Dog", "Warrior", "Tree", "Child", "Seated Forward Bend", "Bridge", "Triangle", "Cobra", "Cat-Cow", "Plank", "Boat", "Camel", "Chair", "Corpse", "Crow", "Eagle", "Fish", "Half Moon", "Headstand", "Pigeon", "Shoulder Stand", "Side Plank", "Sphinx", "Supine Twist", "Upward-Facing Dog", "Wild Thing", "Wheel"]
# negative_key_word_list = []#["walking", "walks", "running", "runs", "sitting", "sits", "standing", "stands", "bending", "bends", "reaching", "reaches", "lifting", "lifts", "pushing", "pushes", "pulling", "pulls", "turning", "turns", "twisting", "twists", "stretching", "stretches"]

# High level dances:
test_file_name = "dance_low_level"
positive_key_word_list = ["Moonwalk", "Pirouette", "Plié", "Jete", "Sashay", "Shimmy", "Tango", "Waltz", "Salsa", "Breakdance", "Cha-Cha", "Foxtrot", "Rumba", "Lindy Hop", "Quickstep", "Pas de Deux", "Arabesque", "Grand Jeté", "Chassé", "Fouetté", "Pas de Bourrée", "Grand Battement", "Tendu", "Glissade", "Cabriole", "Balloné", "Mazurka", "Fandango", "Bolero", "Twist"]
negative_key_word_list = []#["walking", "walks", "running", "runs", "sitting", "sits", "standing", "stands", "bending", "bends", "reaching", "reaches", "lifting", "lifts", "pushing", "pushes", "pulling", "pulls", "turning", "turns", "twisting", "twists", "stretching", "stretches"]



# Make sure these paths exists!
dataset_path = os.path.join(PROJECT_ROOT_DIR, "external_repos", "momask-codes", "dataset", "HumanML3D")
test_txt_file_path = os.path.join(dataset_path, "test.txt")
texts_file_path = os.path.join(dataset_path, "texts")

In [None]:
# Convert all keywords to lower case as dataset consists of only lower case words
positive_key_word_list = [keyword.lower() for keyword in positive_key_word_list]
negative_key_word_list = [keyword.lower() for keyword in negative_key_word_list]

In [None]:
# Filter all test files
old_id_list = []
new_id_list = []
with open(test_txt_file_path, 'r') as f:
    for line in f.readlines():
        old_id_list.append(line.strip())

file_not_found_counter = 0
for name in tqdm(old_id_list):
    text_data = []
    flag = False
    try:
        with open(os.path.join(texts_file_path, name + '.txt')) as f:
            positive = False
            negative = False
            for line in f.readlines():
                line_split = line.strip().split('#')
                caption = line_split[0]
                if any(word in caption for word in positive_key_word_list):
                    positive = True
                if any(word in caption for word in negative_key_word_list):
                    negative = True
            if positive and not negative:
                new_id_list.append(name)
    except FileNotFoundError as e:
        file_not_found_counter += 1

print("Finished")
print("files not found: ", file_not_found_counter)
print("total old test files: ", len(old_id_list)-file_not_found_counter)
print("total new test files: ", len(new_id_list))

In [None]:
# Save new test file
test_file_name = "test_" + test_file_name + "_filtered" + ".txt"
test_file_path = os.path.join(dataset_path, test_file_name)

# make sure it doesn't already exist
if os.path.exists(test_file_path):
    raise Exception("File already exists! Rename test_file_name to proceed with saving.")

with open(test_file_path, 'w') as f:
    for name in new_id_list:
        f.write(name + '\n')
