In [1]:
import os
import pandas as pd
import numpy as np
import networkx as nx
from collections import defaultdict
from pprint import pformat

In [2]:
move_path = '/media/ch3njus/Seagate4TB/research/parkourtheory/data/database/latest/moves.csv'
videos_path = '/media/ch3njus/Seagate4TB/research/parkourtheory/data/database/latest/videos.csv'
moves = pd.read_csv(move_path, dtype={'id': int})
clips = pd.read_csv(videos_path, dtype={'id': int})

In [None]:
def label_dist(df, single=True):
    dist = defaultdict(int)
    
    for i, row in df.iterrows():
        if isinstance(row['type'], str):
            if single:
                types = row['type'].split('/')
                for t in types:
                    dist[t] += 1
            else:
                dist[row['type']] += 1
    return dist

In [None]:
def no_prereq(df):
    return df.loc[df['prereq'].isnull()]

In [None]:
def no_subseq(df):
    return df.loc[df['subseq'].isnull()]

In [None]:
def dataframe_to_edges(df, key, cols, delim=''):
    for i, row in df.iterrows():
        src = row[key]
        for j in cols:
            if isinstance(row[j], str):
                for i in row[j].split(delim):
                    yield (src, i)

In [None]:
def dataframe_to_graph(df):
    edges = dataframe_to_edges(df, 'name', ['prereq', 'subseq'], ', ')
    G = nx.Graph(edges)

    roots = no_prereq(df)
    singles = no_subseq(roots)

    for i, node in singles.iterrows():
        G.add_node(node['name'])

    return G

In [None]:
ml = label_dist(moves, single=False)
sl = label_dist(moves, single=True)
# multiclass = [(k, v) for k, v in sorted(dist.items(), key=lambda item: item[1], reverse=True)]
print(f'multi-label: {len(ml)}\tsingle-label: {len(sl)}')

In [None]:
G = dataframe_to_graph(df)

In [None]:
len(G.nodes())

In [None]:
len(G.edges())

In [4]:
df = pd.merge(moves, clips, on='id')
move_headers = moves.head()
video_dir = '/media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/'

for i, row in df.iterrows():
    curr_fn = os.path.join(video_dir, row['embed'])
    new_embed = row['name'].replace(' ', '_').lower()+'.mp4'
    new_fn = os.path.join(video_dir, new_embed)
    
    if row['embed'] != 'unavailable.mp4' and curr_fn != new_fn:
        try:
            os.rename(curr_fn, new_fn)
        except FileNotFoundError as e:
            print(f'{row['embed']}')
    
    df.at[i, 'embed'] = new_embed

df = df.drop(move_headers, axis=1)
df

curr: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/morph_bomb.mp4	 new: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/360_dive_roll_bomb.mp4
curr: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/540_morph_bomb.mp4	 new: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/540_dive_roll_bomb.mp4
curr: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/sloppy_joe.mp4	 new: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/270_dive_side_sumi.mp4
curr: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/dive_front.mp4	 new: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/dive_front_flip.mp4
curr: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/trinity.mp4	 new: /media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/trinity_flip.mp4
curr: /media/ch3njus/Seagate4TB/resear

Unnamed: 0,vid,channel,link,time,embed
0,Parkour Documentary: People in Motion,Cedric Dahl,https://www.youtube.com/watch?v=QH09YCtpKaw,25:45:00,shoulder_roll.mp4
1,"We'll Be There Someday, Somewhere (Parkour & F...",GravityNinjaPsych,https://www.youtube.com/watch?v=Hb9M0s1BYPU,5:19,dive_roll.mp4
2,Trust in Dustin,Hub Freerunning,https://www.youtube.com/watch?v=S1Ky8uPDPvc,0:14,360_dive_roll.mp4
3,InCircus - Standing 540 Diveroll,Aaron Hakala,https://www.youtube.com/watch?v=OhwUptczRwQ,0:00,540_dive_roll.mp4
4,Aaron Hakala - 2012 [OFFICIAL],Aaron Hakala,https://www.youtube.com/watch?v=AlMZEUDvfPU,3:22,720_dive_roll.mp4
...,...,...,...,...,...
1084,Instagram,nicoflow12,https://www.instagram.com/p/CAa8DB5IFOP/,0:00,wall_feilong.mp4
1085,,,,0:00,wall_pistol_side.mp4
1086,MORE MUSCLE MEMORY *FLIP COMPILATION*,Dominick Hughes,https://www.youtube.com/watch?v=z3h2LJai26U,3:33,skin_the_catch_in_back-out.mp4
1087,,,,0:00,splat.mp4
