In [1]:
import os
import pandas as pd
import numpy as np
import networkx as nx
from collections import defaultdict
from pprint import pformat

In [2]:
move_path = '/media/ch3njus/Seagate4TB/research/parkourtheory/data/database/latest/moves.tsv'
videos_path = '/media/ch3njus/Seagate4TB/research/parkourtheory/data/database/latest/videos.tsv'
moves = pd.read_csv(move_path, dtype={'id': int}, sep='\t')
clips = pd.read_csv(videos_path, dtype={'id': int}, sep='\t')

In [None]:
def label_dist(df, single=True):
    dist = defaultdict(int)
    
    for i, row in df.iterrows():
        if isinstance(row['type'], str):
            if single:
                types = row['type'].split('/')
                for t in types:
                    dist[t] += 1
            else:
                dist[row['type']] += 1
    return dist

In [None]:
def no_prereq(df):
    return df.loc[df['prereq'].isnull()]

In [None]:
def no_subseq(df):
    return df.loc[df['subseq'].isnull()]

In [None]:
def dataframe_to_edges(df, key, cols, delim=''):
    for i, row in df.iterrows():
        src = row[key]
        for j in cols:
            if isinstance(row[j], str):
                for i in row[j].split(delim):
                    yield (src, i)

In [None]:
def dataframe_to_graph(df):
    edges = dataframe_to_edges(df, 'name', ['prereq', 'subseq'], ', ')
    G = nx.Graph(edges)

    roots = no_prereq(df)
    singles = no_subseq(roots)

    for i, node in singles.iterrows():
        G.add_node(node['name'])

    return G

In [None]:
ml = label_dist(moves, single=False)
sl = label_dist(moves, single=True)
# multiclass = [(k, v) for k, v in sorted(dist.items(), key=lambda item: item[1], reverse=True)]
print(f'multi-label: {len(ml)}\tsingle-label: {len(sl)}')

In [None]:
G = dataframe_to_graph(df)

In [None]:
len(G.nodes())

In [None]:
len(G.edges())

In [4]:
df = pd.merge(moves, clips, on='id')
df.loc[df['embed'] == '0:00']

Unnamed: 0,id,name,prereq,subseq,type,alias,description,title,channel,link,time,embed
228,229,Inward Flyaway Catch,Inward Flyaway,,Bar/Flip,Madrid,An Inward Flyaway to bar catch.,Inward Flyaway lache aka tkachev lache aka the...,),Erica Madrid,https://www.youtube.com/watch?v=eOXU5Ee-_Aw,0:00


In [None]:
df = pd.merge(moves, clips, on='id')
move_headers = moves.head()
video_dir = '/media/ch3njus/Seagate4TB/research/parkourtheory/data/videos/production/'

for i, row in df.iterrows():
    curr_fn = os.path.join(video_dir, row['embed'])
    new_embed = row['name'].replace(' ', '_').lower()+'.mp4'
    new_fn = os.path.join(video_dir, new_embed)
    
    if row['embed'] != 'unavailable.mp4' and curr_fn != new_fn:
        try:
            if not os.path.exists(new_fn):
                os.rename(curr_fn, new_fn)
        except FileNotFoundError as e:
            print(f'- [ ] {row["embed"]}')
    
    df.at[i, 'embed'] = new_embed

df = df.drop(move_headers, axis=1)

In [None]:
df.loc[df['embed'] == 'devil_drop_frisbee_full-down.mp4']