In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

%matplotlib inline

In [2]:
# read in csv.
dfold = pd.read_csv("blake.csv")
df = dfold
dfold.head()

Unnamed: 0,clip_index,trick_index,trick,obstacle,obstacle_detailer,slowmo,bank,switch,line,video,year
0,1,1,ollie,gap,,1,0,0,0,Programming Injection,2019
1,2,2,sw bs kickflip,gap,bump to gap,1,1,1,0,Programming Injection,2019
2,3,2,sw bs kickflip,gap,bump to gap,1,1,1,0,Programming Injection,2019
3,4,3,fs heelflip,flat,,0,0,0,1,Programming Injection,2019
4,4,4,fakie fs noseslide to bigspin,ledge,,0,0,0,1,Programming Injection,2019


In [3]:
df.columns = ['clip_index', 'trick_index', 'trick', 'obstacle',
             'obstacle_detailer', 'slowmo','bank','switch', 'line', 'video', 'year']

In [4]:
df = df[['clip_index', 'trick_index', 'trick', 'switch', 'obstacle',
             'obstacle_detailer', 'slowmo', 'line', 'bank', 'video', 'year']]
df.head()

Unnamed: 0,clip_index,trick_index,trick,switch,obstacle,obstacle_detailer,slowmo,line,bank,video,year
0,1,1,ollie,0,gap,,1,0,0,Programming Injection,2019
1,2,2,sw bs kickflip,1,gap,bump to gap,1,0,1,Programming Injection,2019
2,3,2,sw bs kickflip,1,gap,bump to gap,1,0,1,Programming Injection,2019
3,4,3,fs heelflip,0,flat,,0,1,0,Programming Injection,2019
4,4,4,fakie fs noseslide to bigspin,0,ledge,,0,1,0,Programming Injection,2019


In [5]:
# coerce columns to numeric
for col in ['switch', 'line', 'slowmo', 'bank']:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

# Create percentage data for waffle plot

In [6]:
# total num_tricks 
num_tricks = pd.DataFrame(df.groupby('video')['trick_index'].max()).reset_index()
num_tricks.columns = ['video', 'num_tricks']

# total num clips
num_clips = pd.DataFrame(df.groupby('video')['clip_index'].max()).reset_index()
num_clips.columns = ['video', 'num_clips']

# total slowmo clips
num_slowmo = pd.DataFrame(df.groupby('video').sum()['slowmo']).reset_index()
num_slowmo.columns = ['video', 'num_slowmo']


# total switch/nollie tricks
num_switch = pd.DataFrame(df.groupby('video').sum()['switch']).reset_index()
num_switch.columns = ['video', 'num_switch']

# total distinct tricks
num_distinct = pd.DataFrame(df.groupby('video').nunique()['trick']).reset_index()
num_distinct.columns = ['video', 'num_distinct']

# total fs tricks
num_fs = pd.DataFrame(df[['trick', 'video', 'trick_index', 'obstacle']].drop_duplicates().groupby(['video'])['trick'].apply(lambda x: x[x.str.contains('fs ')].count())).reset_index()
num_fs.columns = ['video', 'num_fs_tricks']
    
# total bs tricks
num_bs = pd.DataFrame(df[['trick', 'video', 'trick_index', 'obstacle']].drop_duplicates().groupby(['video'])['trick'].apply(lambda x: x[x.str.contains('bs ')].count())).reset_index()
num_bs.columns = ['video', 'num_bs_tricks']

df_stats = df.merge(num_tricks).merge(num_clips).merge(num_slowmo).merge(num_switch).merge(num_distinct).merge(num_fs).merge(num_bs)


vids = df.groupby('video')

# percent stair
perc_stair = pd.DataFrame(vids['obstacle'].agg(lambda x: (len(x[x=='stair']) + len(x[x=='gap']) ) / len(x)))
perc_stair = perc_stair.reset_index()
perc_stair.columns = ['video', 'perc_stair']

# percent ledge
perc_ledge = pd.DataFrame(vids['obstacle'].agg(lambda x: (len(x[x=='ledge'])) / len(x)))
perc_ledge = perc_ledge.reset_index()
perc_ledge.columns = ['video', 'perc_ledge']

# percent manual
perc_manual = pd.DataFrame(vids['obstacle'].agg(lambda x: (len(x[x=='manual'])) / len(x)))
perc_manual = perc_manual.reset_index()
perc_manual.columns = ['video', 'perc_manual']

# percent transition
perc_transition = pd.DataFrame(vids['obstacle'].agg(lambda x: (len(x[x=='transition'])) / len(x)))
perc_transition = perc_transition.reset_index()
perc_transition.columns = ['video', 'perc_transition']

# percent flat
perc_flat = pd.DataFrame(vids['obstacle'].agg(lambda x: (len(x[x=='flat'])) / len(x)))
perc_flat = perc_flat.reset_index()
perc_flat.columns = ['video', 'perc_flat']

# percent handrail
perc_handrail = pd.DataFrame(vids['obstacle'].agg(lambda x: (len(x[x=='handrail'])) / len(x)))
perc_handrail = perc_handrail.reset_index()
perc_handrail.columns = ['video', 'perc_handrail']



In [7]:
new_df = df
for s in [num_tricks, num_clips, num_slowmo, num_switch, num_distinct, num_fs, num_bs,
         perc_stair, perc_ledge, perc_manual, perc_transition, perc_flat, perc_handrail]:
    new_df = new_df.merge(s)

In [8]:
cols_to_keep = ['video', 'num_tricks', 'num_clips', 'num_slowmo', 'num_switch',
       'num_distinct', 'num_fs_tricks', 'num_bs_tricks',
       'perc_stair', 'perc_ledge', 'perc_manual', 'perc_transition',
     'perc_handrail', 'perc_flat']

In [9]:
stats_df = new_df[cols_to_keep].drop_duplicates().reset_index().drop(columns='index')

In [10]:
stats_df['perc_distinct'] = stats_df.apply(lambda x: x['num_distinct'] / x['num_tricks'], axis=1)

stats_df['perc_slowmo'] = stats_df.apply(lambda x: x['num_slowmo'] / x['num_tricks'], axis=1)

stats_df['perc_switch'] = stats_df.apply(lambda x: x['num_switch'] / x['num_tricks'], axis=1)

stats_df['perc_fs'] = stats_df.apply(lambda x: x['num_fs_tricks'] / x['num_tricks'], axis=1)

stats_df['perc_bs'] = stats_df.apply(lambda x: x['num_bs_tricks'] / x['num_tricks'], axis=1)


In [11]:
stats_df = stats_df.drop(columns=['num_tricks', 'num_clips', 'num_slowmo', 'num_switch', 'num_distinct', 
                       'num_fs_tricks', 'num_bs_tricks'])


stats_df['video'] = stats_df.video.str.replace(' - ', '_', regex=False).str.replace(' ', '_', regex=False).str.replace('é', 'e', regex=False).str.replace('-', '_', regex=False).str.replace('.', '', regex=False)

for col in ['perc_stair', 'perc_ledge', 'perc_manual', 'perc_transition',
     'perc_handrail', 'perc_distinct',
       'perc_slowmo', 'perc_switch', 'perc_fs', 'perc_bs', 'perc_flat']:
    stats_df[col] = round(stats_df[col] * 100)

In [12]:
stats_df

Unnamed: 0,video,perc_stair,perc_ledge,perc_manual,perc_transition,perc_handrail,perc_flat,perc_distinct,perc_slowmo,perc_switch,perc_fs,perc_bs
0,Programming_Injection,26.0,5.0,0.0,0.0,36.0,12.0,80.0,20.0,66.0,34.0,46.0
1,Push_Part,10.0,7.0,0.0,0.0,39.0,2.0,89.0,29.0,32.0,45.0,45.0
2,Daylando,19.0,10.0,0.0,0.0,27.0,14.0,85.0,40.0,56.0,42.0,50.0
3,Cosmic_Vomit_2,31.0,5.0,0.0,0.0,28.0,8.0,91.0,17.0,57.0,40.0,43.0
4,Trust_Fall,35.0,16.0,0.0,0.0,26.0,3.0,97.0,10.0,60.0,37.0,37.0
5,True_Blue,25.0,11.0,0.0,0.0,48.0,2.0,86.0,14.0,73.0,37.0,65.0
6,"Nike_SB_Chronicles,_Vol_3",39.0,0.0,0.0,0.0,17.0,6.0,89.0,39.0,44.0,33.0,56.0


In [16]:
stats_df.to_csv('../data/square_pie.csv', index=False)

# Number of Clips By Video

In [15]:
# total num tricks
aggregated = df.groupby('video').max()['clip_index']
aggregated.name = 'total_num_tricks'
aggregated.sort_values(ascending=False)

AssertionError: 

# Top Tricks

In [None]:
top_tricks = pd.DataFrame(df.trick.value_counts()).reset_index()
top_tricks.columns = ['trick', 'trickcount']
top_tricks = top_tricks[top_tricks.trickcount > 3]
top_tricks.to_csv('toptricks.csv', index=False)

In [None]:
top_tricks

# Number of Clips Filmed By Year

In [None]:
clips_per_year = df.groupby('year').count()['clip_index']

In [None]:
sns.lineplot(data=clips_per_year)

# Crust over Time

In [None]:
crust_by_year = df.groupby('year').sum()['crust']

In [None]:
crust_df = pd.DataFrame(crust_by_year).reset_index()
crust_df.columns = ['year', 'num_crust_clips']
crust_df.to_csv('crust_df.csv', index=False)

# Obstacles Over Time

To plot each obstacle over time, we first need to get the counts for obstacle by year.

Then, we'll have to reformat our data to a 'long' structure suitable for plotting.

In [None]:
# group by to get counts
obs_by_time = df.groupby(['obstacle', 'year']).count()

In [None]:
# format data better
obs_by_time = obs_by_time.reset_index()[['obstacle', 'year', 'clip_index']].rename(columns={'clip_index': 'cnt'})
obs_by_time.to_csv('obs_by_time.csv', index=False)

In [None]:
# pivot data into desired structure
obs_by_time = obs_by_time.pivot(index="year", columns="obstacle", values="cnt").fillna(0)

In [None]:
# unfortunately, default plotting kind sucks but at least we get a general idea of trends
obs_by_time.plot(figsize=(20,10), lw=4)

In [None]:
# Rerun previous code but filter out some of the obstacles
desired_obstacles = ['rail', 'wall', 'stairs', 'transition', 'ledge']

obs_by_time = df.loc[df.obstacle.isin(desired_obstacles)].groupby(['obstacle', 'year']).count()
# format data better
obs_by_time = obs_by_time.reset_index()[['obstacle', 'year', 'clip_index']].rename(columns={'clip_index': 'cnt'})
# pivot data into desired structure
obs_by_time = obs_by_time.pivot(index="year", columns="obstacle", values="cnt").fillna(0)
# unfortunately, default plotting kind sucks but at least we get a general idea of trends
obs_by_time.plot(figsize=(20,10), lw=5)

In [None]:
sns.catplot("obstacle", col="year", 
                col_wrap=3, data=df, 
                kind="count", height=2.5, aspect=.8)

In [None]:
df

In [None]:
df.loc[df.line == 1].groupby('video').nunique()

In [None]:
df.loc[df.video == "Eastern Exposure 3"]