In [43]:
import spacy
import textacy
import re
import numpy as np
import os.path as osp
from spacy.lemmatizer import Lemmatizer
from spacy.lang.en import LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES

file_list_path = '/mnt/Alfheim/Data/AMASS_30_SMPL/file_list.lst'
output_path = '/mnt/Alfheim/Data/AMASS_30_SMPL/amass_verb_list.csv'
nlp = spacy.load('en_core_web_lg')
lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES)

In [2]:
def extract_pos(filename):
    doc = nlp(filename)
    return [token.pos_ for token in doc]

def extract_lemma(filename):
    doc = nlp(filename)
    return [lemmatizer(token.text, token.pos_)[0] for token in doc]

def remove_one_char_tokens(sentence):
    return ' '.join([token for token in sentence.split() if len(token) > 1])

In [21]:
file_list

['ACCAD/Female1General_c3d/A1 - Stand_poses.npz',
 'ACCAD/Female1General_c3d/A10 - lie to crouch_poses.npz',
 'ACCAD/Female1General_c3d/A11 - crawl forward_poses.npz',
 'ACCAD/Female1General_c3d/A12 - crawl backwards_poses.npz',
 'ACCAD/Female1General_c3d/A14 - stand to skip_poses.npz',
 'ACCAD/Female1General_c3d/A15 - skip to stand_poses.npz',
 'ACCAD/Female1General_c3d/A2 - Sway t2_poses.npz',
 'ACCAD/Female1General_c3d/A2 - Sway_poses.npz',
 'ACCAD/Female1General_c3d/A3 - Swing t2_poses.npz',
 'ACCAD/Female1General_c3d/A3 - Swing_poses.npz',
 'ACCAD/Female1General_c3d/A4 - look_poses.npz',
 'ACCAD/Female1General_c3d/A5 - pick up box_poses.npz',
 'ACCAD/Female1General_c3d/A6 - lift box_poses.npz',
 'ACCAD/Female1General_c3d/A6- lift box t2_poses.npz',
 'ACCAD/Female1General_c3d/A7 - crouch_poses.npz',
 'ACCAD/Female1General_c3d/A8 - crouch to lie_poses.npz',
 'ACCAD/Female1General_c3d/A9 - lie t2_poses.npz',
 'ACCAD/Female1Gestures_c3d/D1 - Urban 1_poses.npz',
 'ACCAD/Female1Gestures

In [3]:
file_list = [l.replace('/mnt/Alfheim/Data/AMASS_30_SMPL/', '').strip() for l in open(file_list_path).readlines()]
filename_list = [osp.splitext(osp.basename(l.strip()))[0] for l in open(file_list_path).readlines()]
filename_list = [fn.lower() for fn in filename_list]
filename_list = [fn.replace('_', ' ') for fn in filename_list]
filename_list = [fn.replace('poses', '') for fn in filename_list]
filename_list = [re.sub(r'[^a-zA-Z ]', '', fn) for fn in filename_list]
filename_list = [re.sub(' +', ' ', fn) for fn in filename_list]
filename_list = [remove_one_char_tokens(fn) for fn in filename_list]
filename_list

['stand',
 'lie to crouch',
 'crawl forward',
 'crawl backwards',
 'stand to skip',
 'skip to stand',
 'sway',
 'sway',
 'swing',
 'swing',
 'look',
 'pick up box',
 'lift box',
 'lift box',
 'crouch',
 'crouch to lie',
 'lie',
 'urban',
 'wait',
 'conversation gestures',
 'random stuff',
 'cartwheel',
 'female subj calibration',
 'run backwards stop run forward',
 'run turn left',
 'run turn left',
 'run turn right',
 'run turn right',
 'run turn right',
 'run to hop to walk',
 'run to stand',
 'run to jump to walk',
 'run to pick up box',
 'run to pick up box',
 'run with box',
 'side step left',
 'side step right',
 'run to crouch',
 'crouch to run',
 'run',
 'run to walk',
 'walk to run',
 'stand to run backwards',
 'run backwards',
 'run backwards to stand',
 'run backwards turn run forward',
 'walk with box',
 'stand to walk',
 'walk turn left',
 'walk turn left',
 'walk turn right',
 'walk turn right',
 'walk turn right',
 'walk turn around same direction',
 'walk turn change di

In [4]:
to_count = 0
for fn in filename_list:
    tokens = fn.split()
    if 'to' in tokens:
        to_count += 1
        continue
print(f'TO found: {to_count}/{len(filename_list)}')

TO found: 216/13232


In [5]:
print('Extracting POS')
filename_pos = [extract_pos(fn) for fn in filename_list]

Extracting POS


In [6]:
print('Extracting lemmas')
filename_lemma = [extract_lemma(fn) for fn in filename_list]

Extracting lemmas


In [46]:
verb_list = list()
fragmented_verbs = ['walk', 'run', 'jump', 'turn', 'greet', 'fight', 'sit', 
                    'stand', 'pose', 'gesture', 'push', 'pull', 'kick', 
                    'jog', 'dance', 'stretch']

for fn, pos_list, lemma_list in zip(filename_list, filename_pos, filename_lemma):
    print(lemma_list, pos_list)
    fn_verbs = list()
    if pos_list.count('VERB') > 0:
        verb_idx = [i for i, pos in enumerate(pos_list) if pos == 'VERB']
        fn_verbs += [lemma_list[vi] for vi in verb_idx]
        
    if pos_list.count('NOUN') > 0:
        noun_idx = [i for i, pos in enumerate(pos_list) if pos == 'NOUN']
        for v in fragmented_verbs:
            for noun in [fn.split()[i] for i in noun_idx]:
                if v in noun:
                    fn_verbs.append(v)

    verb_list.append(list(set(fn_verbs)))

['stand'] ['VERB']
['lie', 'to', 'crouch'] ['VERB', 'PART', 'VERB']
['crawl', 'forward'] ['VERB', 'ADV']
['crawl', 'backwards'] ['VERB', 'ADV']
['stand', 'to', 'skip'] ['VERB', 'PART', 'VERB']
['skip', 'to', 'stand'] ['VERB', 'PART', 'VERB']
['sway'] ['VERB']
['sway'] ['VERB']
['swing'] ['NOUN']
['swing'] ['NOUN']
['look'] ['VERB']
['pick', 'up', 'box'] ['VERB', 'PART', 'NOUN']
['lift', 'box'] ['NOUN', 'NOUN']
['lift', 'box'] ['NOUN', 'NOUN']
['crouch'] ['VERB']
['crouch', 'to', 'lie'] ['VERB', 'PART', 'VERB']
['lie'] ['VERB']
['urban'] ['ADJ']
['wait'] ['VERB']
['conversation', 'gesture'] ['NOUN', 'NOUN']
['random', 'stuff'] ['ADJ', 'NOUN']
['cartwheel'] ['NOUN']
['female', 'subj', 'calibration'] ['ADJ', 'NOUN', 'NOUN']
['run', 'backwards', 'stop', 'run', 'forward'] ['VERB', 'ADV', 'VERB', 'VERB', 'ADV']
['run', 'turn', 'leave'] ['VERB', 'NOUN', 'VERB']
['run', 'turn', 'leave'] ['VERB', 'NOUN', 'VERB']
['run', 'turn', 'right'] ['VERB', 'NOUN', 'ADV']
['run', 'turn', 'right'] ['VERB', 

['lift', 'light'] ['VERB', 'NOUN']
['lift', 'light'] ['VERB', 'NOUN']
['lift', 'heavy'] ['VERB', 'ADJ']
['lift', 'heavy'] ['VERB', 'ADJ']
['catch', 'and', 'throw'] ['VERB', 'CCONJ', 'VERB']
['throw', 'hard'] ['VERB', 'ADV']
['throw', 'hard'] ['VERB', 'ADV']
['throw', 'hard'] ['VERB', 'ADV']
['kick'] ['VERB']
['kick'] ['VERB']
['jump'] ['VERB']
['jump'] ['VERB']
['scamper'] ['VERB']
['rom'] ['NOUN']
['treadmill', 'norm'] ['NOUN', 'NOUN']
['treadmill', 'fast'] ['NOUN', 'ADV']
['treadmill', 'slow'] ['NOUN', 'ADJ']
['treadmill', 'jog'] ['NOUN', 'NOUN']
['motorcycle'] ['NOUN']
['normal', 'walk'] ['ADJ', 'NOUN']
['normal', 'walk'] ['ADJ', 'NOUN']
['normal', 'walk'] ['ADJ', 'NOUN']
['normal', 'walk'] ['ADJ', 'NOUN']
['normal', 'jog'] ['ADJ', 'NOUN']
['normal', 'jog'] ['ADJ', 'NOUN']
['knock'] ['VERB']
['knock'] ['VERB']
['lift', 'light'] ['VERB', 'NOUN']
['lift', 'light'] ['VERB', 'NOUN']
['lift', 'heavy'] ['VERB', 'ADJ']
['lift', 'heavy'] ['VERB', 'ADJ']
['circle', 'walk'] ['NOUN', 'NOUN']
[

['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']
['subject'] ['ADJ']


[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] []
[] [

['experiment', 'subject', 'wash', 'leg', 'position', 'bigcircle'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'NOUN']
['experiment', 'subject', 'wash', 'leg', 'position', 'bigcircle'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'NOUN']
['experiment', 'subject', 'wash', 'leg', 'position', 'horizontal'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'ADJ']
['experiment', 'subject', 'wash', 'leg', 'position', 'horizontal'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'ADJ']
['experiment', 'subject', 'wash', 'leg', 'position', 'smallcircle'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'NOUN']
['experiment', 'subject', 'wash', 'leg', 'position', 'smallcircle'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'NOUN']
['experiment', 'subject', 'wash', 'leg', 'position', 'vertical'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'ADJ']
['experiment', 'subject', 'wash', 'leg', 'position', 'vertical'] ['NOUN', 'ADJ', 'NOUN', 'NOUN', 'NOUN', 'ADJ']
['experimenta', 'subject', 'back'] ['ADJ', 'NOUN', 'ADV']
['experimenta', 'subject', 'ba

['push', 'recovery', 'stand', 'back'] ['VERB', 'NOUN', 'VERB', 'PART']
['push', 'recovery', 'stand', 'back'] ['VERB', 'NOUN', 'VERB', 'PART']
['push', 'recovery', 'stand', 'back'] ['VERB', 'NOUN', 'VERB', 'PART']
['push', 'recovery', 'stand', 'back'] ['VERB', 'NOUN', 'VERB', 'PART']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['downstairs'] ['ADV']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['go', 'over', 'beam'] ['VERB', 'ADP', 'NOUN']
['upstairs'] ['A

['side', 'to', 'side', 'hop'] ['NOUN', 'ADP', 'NOUN', 'VERB']
['simple', 'crouch'] ['ADJ', 'NOUN']
['jump', 'jack'] ['VERB', 'NOUN']
['light', 'hop', 'loose'] ['ADJ', 'VERB', 'ADJ']
['light', 'hop', 'stiff'] ['ADJ', 'VERB', 'ADJ']
['misc'] ['NOUN']
['stretch'] ['NOUN']
['jump', 'jack'] ['VERB', 'NOUN']
['light', 'hop', 'loose'] ['ADJ', 'VERB', 'ADJ']
['light', 'hop', 'stiff'] ['ADJ', 'VERB', 'ADJ']
['misc', 'dancing', 'hiphop'] ['NOUN', 'NOUN', 'NOUN']
['stretch'] ['NOUN']
['feetjump'] ['NOUN']
['backwardswalk'] ['NOUN']
['jog'] ['VERB']
['jumprope'] ['NOUN']
['sideskip'] ['PROPN']
['slowtrot'] ['NOUN']
['stomp'] ['VERB']
['walk'] ['VERB']
['balance'] ['NOUN']
['cartwheel'] ['NOUN']
['crawl'] ['VERB']
['walk'] ['VERB']
['chacha'] ['NOUN']
['skip'] ['VERB']
['walk'] ['VERB']
['walk'] ['VERB']
['yoga'] ['NOUN']
['jumpandroll'] ['NOUN']
['speedvault'] ['NOUN']
['speedvault'] ['NOUN']
['basickendo'] ['VERB']
['hopoverobstacle'] ['X']
['jumpoverobstacle'] ['NOUN']
['kendokata'] ['NOUN']
['k

In [47]:
output_lines = []
for path, fn, verbs in zip(file_list, filename_list, verb_list):
    output_lines.append(f'{path},{fn},{";".join(verbs)}')

with open(output_path, 'w') as output_file:
    output_file.write('\n'.join(output_lines))