In [1]:
import plotly.express as px
import plotly.graph_objects as go

In [5]:
import jax
import jax.numpy as jnp
import flax
from flax import linen as nn
import optax

import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype

from numpy.lib.stride_tricks import sliding_window_view

import pickle


In [3]:
pd.set_option('display.max_rows', 125)
pd.set_option('display.max_columns', 200)

In [22]:
def to_num_grade(s_grade):
    num_grade = pd.Series(np.zeros_like(s_grade), index=s_grade.index, name='V grade')
    filt = s_grade.str[-1]=='+'
    num_grade.loc[filt] = s_grade.loc[filt].str[1:-1].astype(int) + 0.25
    filt = s_grade.str[-1]=='-'
    num_grade.loc[filt] = s_grade.loc[filt].str[1:-1].astype(int) - 0.25
    filt = num_grade==0
    num_grade.loc[filt] = s_grade.loc[filt].str[1:].astype(int)
    return num_grade

## Data

## all regions

In [730]:
filename = '~/Downloads/2023-03-26_sendage.h5'
boulders = pd.read_hdf(filename, 'boulders')
ascents_df = pd.read_hdf(filename, 'ascents')
climbs_df = pd.read_hdf(filename, 'climbs')
climbers_df = pd.read_hdf(filename, 'climbers')

boulders = boulders.drop(136733)
boulders['# sends'] = boulders['# onsights'] + boulders['# redpoint'] + boulders['# flashes']
climbs_df['# sends'] = climbs_df['# onsights'] + climbs_df['# redpoint'] + climbs_df['# flashes']

In [729]:
boulders.loc[136733]

slug             disciple-prophet-wall-skaha-bc-canada
name                                      The Disciple
area_id                                          10385
type                                           boulder
grade_id                                            97
bolts                                                0
length                                             NaN
grade                                                +
description                                           
# onsights                                           0
# redpoint                                           0
# flashes                                            0
# sends                                              0
areas_0_name                                     Skaha
areas_1_name                              Prophet Wall
areas_2_name                                          
areas_0_slug                           skaha-bc-canada
areas_1_slug              prophet-wall-skaha-bc-canada
areas_2_sl

In [741]:
boulders['V grade'] = to_num_grade(boulders['grade'])
boulders.head()

Unnamed: 0,slug,name,area_id,type,grade_id,bolts,length,grade,description,# onsights,# redpoint,# flashes,# sends,areas_0_name,areas_1_name,areas_2_name,areas_0_slug,areas_1_slug,areas_2_slug,areas_3_name,areas_3_slug,areas_4_name,areas_4_slug,areas_5_name,areas_5_slug,areas_6_name,areas_6_slug,areas_7_name,areas_7_slug,areas_8_name,areas_8_slug,areas_9_name,areas_9_slug,areas_10_name,areas_11_name,areas_10_slug,areas_11_slug,ratings_min,ratings_max,ratings_mean,ratings_count,stars_isna,connected,V grade
36512,superfly-superfly-grand-wall-boulders-squamish...,Superfly,8353,boulder,29,0,,V4,A Squamish all-time classic with physical move...,22,575,74,671,Squamish,Grand Wall Boulders,Superfly,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,superfly-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,4.143836,438.0,233.0,True,4.0
36410,easy-in-easy-chair-easy-chair-grand-wall-bould...,Easy In An Easy Chair,7521,boulder,29,0,,V4,Traverse left to right on good slopers. A clas...,27,552,88,667,Squamish,Grand Wall Boulders,Easy Chair,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,easy-chair-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,1.0,5.0,4.071584,461.0,206.0,True,4.0
36476,titanic-titanic-north-grand-wall-boulders-squa...,Titanic,8360,boulder,24,0,,V3+,Start standing with opposing edges for your ha...,59,465,133,657,Squamish,Grand Wall Boulders,Titanic North,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-north-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,1.0,5.0,3.929577,426.0,231.0,True,3.25
35610,trad-killer-heartbreak-hotel-grand-wall-boulde...,Trad Killer,8351,boulder,28,0,,V4,,37,438,78,553,Squamish,Grand Wall Boulders,Heartbreak Hotel,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,heartbreak-hotel-grand-wall-boulders-squamish-...,,,,,,,,,,,,,,,,,,,1.0,5.0,4.149457,368.0,185.0,True,4.0
3049,timeless-titanic-south-grand-wall-boulders-squ...,Timeless,8361,boulder,32,0,,V5-,Start matched on the big shelf and climb up an...,22,414,84,520,Squamish,Grand Wall Boulders,Titanic South,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-south-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,2.0,5.0,4.168022,369.0,151.0,True,4.75


In [38]:
boulders[boulders['name']=='A Quest for Pain']

Unnamed: 0,slug,name,area_id,type,grade_id,bolts,length,grade,description,# onsights,# redpoint,# flashes,# sends,areas_0_name,areas_1_name,areas_2_name,areas_0_slug,areas_1_slug,areas_2_slug,areas_3_name,areas_3_slug,areas_4_name,areas_4_slug,areas_5_name,areas_5_slug,areas_6_name,areas_6_slug,areas_7_name,areas_7_slug,areas_8_name,areas_8_slug,areas_9_name,areas_9_slug,areas_10_name,areas_11_name,areas_10_slug,areas_11_slug,ratings_min,ratings_max,ratings_mean,ratings_count,stars_isna,connected,V grade
58635,quest-for-pain-ride-lightning-grand-wall-bould...,A Quest for Pain,7249,boulder,18,0,,V2,,1,6,1,8,Squamish,Grand Wall Boulders,Ride the Lightning,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,ride-lightning-grand-wall-boulders-squamish-bc...,,,,,,,,,,,,,,,,,,,3.0,5.0,3.833333,6.0,2.0,True,2.0


In [742]:
bascents = ascents_df[np.isin(ascents_df['climb_id'], boulders.index)]
bascents_dated = bascents[~bascents['date'].isna()]
climbers_, counts = np.unique(bascents_dated['user'], return_counts=True)
boulderers_dated = pd.DataFrame({'name': climbers_, '# sends': counts})

In [743]:
bascents_dated['V grade'] = to_num_grade(bascents_dated['grade'])
bascents_dated.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bascents_dated['V grade'] = to_num_grade(bascents_dated['grade'])


Unnamed: 0,climb_id,user,date,grade,style,first-ascent,stars,comments,beta,V grade
0,36512,hucklord,2023-02-12,V4,redpoint,False,5.0,,,4
1,36512,rajsoni,2023-01-24,V4,flash,False,,,,4
2,36512,maude-johnson,2022-11-01,V4,redpoint,False,5.0,,,4
3,36512,bethany-van-hierden,2022-10-20,V4,redpoint,False,,2nd go.,,4
4,36512,alessandroroome-sandrin,2022-10-12,V4,redpoint,False,4.0,,,4


## squamish only

In [677]:
squamish_boulders = boulders[boulders['areas_0_name']=='Squamish']

squamish_boulders.loc[squamish_boulders['areas_1_name']=='The Powerline Boulders', 'areas_1_name'] = 'Powerline Boulders'
squamish_boulders.loc[squamish_boulders['areas_2_name']=='Parking Lot Wall', 'areas_2_name'] = 'Parking Lot'
climb_features = pd.concat([squamish_boulders[['V grade', 'ratings_mean']], 
                            # pd.get_dummies(squamish_boulders['areas_1_name']), 
                            # pd.get_dummies(squamish_boulders['areas_2_name'])
                           ], axis=1)
climb_features.head()

Unnamed: 0,V grade,ratings_mean
36512,4.0,4.143836
36410,4.0,4.071584
36476,3.25,3.929577
35610,4.0,4.149457
3049,4.75,4.168022


In [678]:
squamish_sends = bascents_dated[np.isin(bascents_dated['climb_id'], squamish_boulders.index)]

cat_type = CategoricalDtype(categories=["onsight", "flash", "redpoint"], ordered=True)
squamish_sends['style'] = squamish_sends['style'].astype(cat_type)
squamish_sends['style_num'] = squamish_sends['style'].cat.codes

squamish_sends.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  squamish_sends['style'] = squamish_sends['style'].astype(cat_type)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  squamish_sends['style_num'] = squamish_sends['style'].cat.codes


Unnamed: 0,climb_id,user,date,grade,style,first-ascent,stars,comments,beta,V grade,climb_token,style_num
0,36512,hucklord,2023-02-12,V4,redpoint,False,5.0,,,4.0,0,2
1,36512,rajsoni,2023-01-24,V4,flash,False,,,,4.0,0,1
2,36512,maude-johnson,2022-11-01,V4,redpoint,False,5.0,,,4.0,0,2
3,36512,bethany-van-hierden,2022-10-20,V4,redpoint,False,,2nd go.,,4.0,0,2
4,36512,alessandroroome-sandrin,2022-10-12,V4,redpoint,False,4.0,,,4.0,0,2


In [679]:
n=5
tmp = squamish_sends.groupby('user')['climb_id'].count()
boulderers_min_n = tmp[tmp>n]
squamish_sends = squamish_sends[np.isin(squamish_sends['user'], boulderers_min_n.index)]
len(squamish_sends)

54281

In [680]:
squamish_sends = squamish_sends.sort_values(['user', 'date'])#.set_index(['user', 'date'])
squamish_sends

Unnamed: 0,climb_id,user,date,grade,style,first-ascent,stars,comments,beta,V grade,climb_token,style_num
772,36410,aaron-pardy,2021-02-27,V4,redpoint,False,5.0,,,4.00,1,2
112614,136621,aaron-pardy,2021-02-27,V0-,onsight,False,2.0,,,-0.25,1431,0
31707,3108,aaron-pardy,2021-04-04,V3,redpoint,False,3.0,,,3.00,108,2
63339,85045,aaron-pardy,2021-04-04,V2,redpoint,False,3.0,,,2.00,334,2
145908,136638,aaron-pardy,2021-04-04,V0,redpoint,False,3.0,,,0.00,6320,2
...,...,...,...,...,...,...,...,...,...,...,...,...
110176,129022,zoebeauchemin,2022-10-02,V9,redpoint,False,4.0,Gnarly approach for the birks! Fun little prow...,,9.00,1336,2
12929,36485,zoebeauchemin,2023-03-21,V7,redpoint,False,4.0,,,7.00,35,2
117387,64154,zoebeauchemin,2023-03-21,V5,redpoint,False,3.0,,,5.00,1690,2
50969,92069,zoebeauchemin,2023-03-22,V9,redpoint,False,4.0,,,9.00,220,2


In [681]:
len(squamish_boulders)

3328

In [682]:
squamish_boulders = squamish_boulders.loc[np.isin(squamish_boulders.index, squamish_sends['climb_id'])]
len(squamish_boulders)

3000

In [683]:
counts = squamish_sends.groupby('climb_id')['user'].count().sort_values()
p_c = counts / counts.sum()
# np.histogram(np.log(p_c))
threshold = np.exp(-5.46578617)
p_dropped = 1 - np.sqrt(threshold/p_c)
p_dropped

climb_id
140683   -14.151078
124354   -14.151078
59452    -14.151078
124353   -14.151078
110652   -14.151078
            ...    
35610      0.234755
3049       0.238630
36476      0.271043
36410      0.281770
36512      0.283379
Name: user, Length: 3000, dtype: float64

In [23]:
sequences = squamish_sends.groupby('user')['climb_id'].apply(list)
sequences

user
aaron-pardy                  [36410, 136621, 3108, 85045, 136638, 57736, 75...
aaronthebcsherpaiacovitti    [41980, 3117, 3068, 57237, 36512, 36410, 3095,...
adam-curtis                  [3050, 3125, 36476, 3084, 3092, 42899, 36512, ...
adam-fairweather             [10944, 3084, 3125, 42240, 3138, 41986, 58811,...
adamandrewbanks              [41449, 3091, 3111, 3115, 36512, 36476, 35610,...
                                                   ...                        
zachary-whalen               [36476, 41980, 3102, 3045, 56417, 41937, 10944...
zacharyjohnson               [107080, 10944, 3084, 42240, 55382, 41986, 418...
zarinaguzman                 [85664, 85663, 102171, 102172, 93477, 102170, ...
zibean                       [3049, 36444, 36476, 35982, 3058, 42872, 54461...
zoebeauchemin                [36410, 36444, 42887, 36472, 3058, 36476, 3049...
Name: climb_id, Length: 713, dtype: object

In [684]:
vocabulary = pd.Series(np.arange(len(squamish_boulders)), index=squamish_boulders.index)

In [685]:
inverse_vocabulary = pd.Series(squamish_boulders.index, index=np.arange(len(squamish_boulders)))

In [686]:
squamish_sends['climb_token'] = vocabulary.loc[squamish_sends['climb_id'].values].values

In [687]:
tokens = squamish_sends.groupby('user')['climb_token'].apply(list)
tokens

user
aaron-pardy                  [1, 937, 108, 296, 2626, 150, 237, 322, 338, 3...
aaroniacovitti                                  [82, 244, 20, 43, 59, 73, 254]
aaronthebcsherpaiacovitti    [50, 118, 11, 51, 0, 1, 81, 254, 41, 59, 2958,...
abby-taylor                                         [34, 49, 68, 77, 269, 270]
adam-currie                               [19, 4, 8, 12, 36, 96, 112, 242, 51]
                                                   ...                        
zacharyjohnson               [1043, 14, 27, 65, 83, 87, 90, 183, 230, 240, ...
zarinaguzman                 [781, 814, 178, 233, 243, 443, 700, 27, 49, 65...
zehraozturk                           [118, 186, 6, 30, 3, 11, 4, 15, 26, 133]
zibean                       [4, 6, 2, 20, 30, 33, 55, 19, 0, 1, 3, 7, 35, ...
zoebeauchemin                [1, 6, 41, 5, 30, 2, 4, 33, 52, 39, 147, 412, ...
Name: climb_token, Length: 862, dtype: object

In [688]:
i_ = np.arange(862)
np.random.shuffle(i_)
u_train, u_dev, u_test = np.split(i_, (650, 700))

## BC only

In [17]:
from process_area import combine_areas

In [18]:
bc_climbs, bc_ascents, bc_climbers, bc_boulders = combine_areas(['bc-canada'], path='/Users/lara.thompson/datasets/sendage/')

Processing bc-canada
Dropped bad dates: [7910, 62114]
Combining ['bc-canada']
fraction connected climbers 0.9946078431372549; fraction connected climbs 0.8717074682367524
0.9946808510638298 0.906085237258348
fraction connected boulderers 0.9946808510638298; fraction connected boulders 0.906085237258348


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  boulders['connected'] = np.isin(boulders.index, conn_bs)


In [19]:
bc_boulders = bc_boulders[~(bc_boulders['grade']=='+')]

In [20]:
bc_boulders['# sends'] = bc_boulders['# onsights'] + bc_boulders['# redpoint'] + bc_boulders['# flashes']
bc_climbs['# sends'] = bc_climbs['# onsights'] + bc_climbs['# redpoint'] + bc_climbs['# flashes']

In [23]:
bc_boulders['V grade'] = to_num_grade(bc_boulders['grade'])

In [24]:
cat_type = CategoricalDtype(categories=["onsight", "flash", "redpoint"], ordered=True)
bc_ascents['style'] = bc_ascents['style'].astype(cat_type)
bc_ascents['style_num'] = bc_ascents['style'].cat.codes

In [25]:
bc_boulders

Unnamed: 0,slug,name,area_id,type,grade_id,bolts,length,grade,description,# onsights,# redpoint,# flashes,# sends,areas_0_name,areas_1_name,areas_2_name,areas_0_slug,areas_1_slug,areas_2_slug,areas_3_name,areas_3_slug,areas_4_name,areas_4_slug,areas_5_name,areas_5_slug,areas_6_name,areas_6_slug,areas_7_name,areas_7_slug,areas_8_name,areas_8_slug,areas_9_name,areas_9_slug,areas_10_name,areas_11_name,areas_10_slug,areas_11_slug,ratings_min,ratings_max,ratings_mean,ratings_count,stars_isna,connected,V grade
36512,superfly-superfly-grand-wall-boulders-squamish...,Superfly,8353,boulder,29,0,,V4,A Squamish all-time classic with physical move...,22,575,74,671,Squamish,Grand Wall Boulders,Superfly,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,superfly-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,4.143836,438.0,233.0,True,4
36410,easy-in-easy-chair-easy-chair-grand-wall-bould...,Easy In An Easy Chair,7521,boulder,29,0,,V4,Traverse left to right on good slopers. A clas...,27,552,88,667,Squamish,Grand Wall Boulders,Easy Chair,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,easy-chair-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,1.0,5.0,4.071584,461.0,206.0,True,4
36476,titanic-titanic-north-grand-wall-boulders-squa...,Titanic,8360,boulder,24,0,,V3+,Start standing with opposing edges for your ha...,59,465,133,657,Squamish,Grand Wall Boulders,Titanic North,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-north-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,1.0,5.0,3.929577,426.0,231.0,True,3.25
35610,trad-killer-heartbreak-hotel-grand-wall-boulde...,Trad Killer,8351,boulder,28,0,,V4,,37,438,78,553,Squamish,Grand Wall Boulders,Heartbreak Hotel,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,heartbreak-hotel-grand-wall-boulders-squamish-...,,,,,,,,,,,,,,,,,,,1.0,5.0,4.149457,368.0,185.0,True,4
3049,timeless-titanic-south-grand-wall-boulders-squ...,Timeless,8361,boulder,32,0,,V5-,Start matched on the big shelf and climb up an...,22,414,84,520,Squamish,Grand Wall Boulders,Titanic South,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-south-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,2.0,5.0,4.168022,369.0,151.0,True,4.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135742,kootenay-cave-dweller-perry-creek-boulders-per...,Kootenay Cave Dweller,9800,boulder,34,0,,V5,Sit start with the big undercling and make dif...,0,0,0,0,Canada,British Columbia,Perry Creek,canada,bc-canada,perry-creek-bc-canada,Perry Creek Boulders,perry-creek-boulders-perry-creek-bc-canada,,,,,,,,,,,,,,,,,,,,,,False,5
133603,tinted-glass-perry-creek-boulders-perry-creek-...,Tinted Glass,9800,boulder,40,0,,V6,Sit start on the right hand side of the face o...,0,0,0,0,Canada,British Columbia,Perry Creek,canada,bc-canada,perry-creek-bc-canada,Perry Creek Boulders,perry-creek-boulders-perry-creek-bc-canada,,,,,,,,,,,,,,,,,,,,,,False,6
135958,grandfather-clock-mt-work-greater-victoria-van...,The Grandfather Clock,9837,boulder,18,0,,V2,Start standing with a right hand in a crack sl...,0,0,0,0,Vancouver Island,Greater Victoria,Mt Work,vancouver-island-bc-canada,greater-victoria-vancouver-island-bc-canada,mt-work-greater-victoria-vancouver-island-bc-c...,,,,,,,,,,,,,,,,,,,,,,,,False,2
131915,silly-face-buttress-boulders-eldred-valley-bou...,Silly face,9894,boulder,18,0,,V2,Start in the middle of the face just right of ...,0,0,0,0,Canada,British Columbia,Sunshine Coast,canada,bc-canada,sunshine-coast-bc-canada,Powell River,powell-river-sunshine-coast-bc-canada,Eldred Valley,eldred-valley-powell-river-sunshine-coast-bc-c...,Eldred Valley Boulders,eldred-valley-boulders-eldred-valley-powell-ri...,Buttress Boulders,buttress-boulders-eldred-valley-boulders-eldre...,,,,,,,,,,,,,,,,False,2


In [225]:
def self_consistent_min(ascents, climbers, climbs, n_min=5):
    n_climbs, n_climbers = len(climbs)+1, len(climbers)+1
    while (len(climbs) < n_climbs) or (len(climbers) < n_climbers):
        n_climbs, n_climbers = len(climbs), len(climbers)
        print(n_climbs, n_climbers)
        
        # update ascents
        ascents = ascents[np.isin(ascents['user'], climbers.index)]
        ascents = ascents[np.isin(ascents['climb_id'], climbs.index)]
        
        # update climbers
        climbers_, counts = np.unique(ascents['user'], return_counts=True)
        climbers = climbers.loc[climbers_[counts >= n_min]]
        climbers['# sends'] = counts[counts >= n_min]
        
        # update climbs
        climb_styles = ascents.groupby(['climb_id', 'style'])['user'].count().reset_index().pivot(
            index='climb_id', columns='style', values='user').fillna(0)
        climb_styles['# sends'] = climb_styles.sum(axis=1)
        climb_styles = climb_styles[climb_styles['# sends']>=n_min]
        climbs = climbs.loc[climb_styles.index]
        climbs[climb_styles.columns] = climb_styles
        print(len(climbs), len(climbers), len(ascents))
    return ascents, climbers, climbs

In [226]:
bc_bascents = bc_ascents[np.isin(bc_ascents['climb_id'], bc_boulders.index) & ~bc_ascents['date'].isna()]

In [227]:
bc_bascents, bc_boulderers, bc_boulders_5 = self_consistent_min(bc_bascents, bc_climbers.copy().set_index('name'), bc_boulders.copy())

9103 2040
2858 1091 78305
2858 1091
2849 1069 68433
2849 1069
2844 1069 68356
2844 1069
2844 1069 68336


In [231]:
bc_boulders_5[~np.isin(bc_boulders_5.index, bc_bascents['climb_id'])]

Unnamed: 0_level_0,slug,name,area_id,type,grade_id,bolts,length,grade,description,# onsights,# redpoint,# flashes,# sends,areas_0_name,areas_1_name,areas_2_name,areas_0_slug,areas_1_slug,areas_2_slug,areas_3_name,areas_3_slug,areas_4_name,areas_4_slug,areas_5_name,areas_5_slug,areas_6_name,areas_6_slug,areas_7_name,areas_7_slug,areas_8_name,areas_8_slug,areas_9_name,areas_9_slug,areas_10_name,areas_11_name,areas_10_slug,areas_11_slug,ratings_min,ratings_max,ratings_mean,ratings_count,stars_isna,connected,V grade,onsight,flash,redpoint,area_name
climb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1


In [228]:
bc_bascents['V grade'] = to_num_grade(bc_bascents['grade'])
bc_bascents = bc_bascents.sort_values(['user', 'date', 'V grade'])

In [30]:
pickle.dump([bc_bascents, bc_boulderers, bc_boulders_5], open('bc_selfcons-min5.p', 'wb'))

In [229]:
bc_boulders_5['area_name'] = np.nan
filt = (bc_boulders_5['areas_1_slug'].isna() | (bc_boulders_5['areas_1_slug']=='')) & (bc_boulders_5['area_name'].isna())
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_0_slug'].str.lower()

filt = (bc_boulders_5['areas_2_slug'].isna() | (bc_boulders_5['areas_2_slug']=='')) & (bc_boulders_5['area_name'].isna())
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_1_slug'].str.lower()

filt = np.isin(bc_boulders_5['areas_1_slug'], bc_boulders_5['area_name']) & bc_boulders_5['area_name'].isna()
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_1_slug'].str.lower()

filt = np.isin(bc_boulders_5['areas_1_name'], 
               ['Furry Creek', 'Shannon Falls', 'Sutton Pass', 'Crumpit Woods', 'Greater Victoria']) & bc_boulders_5['area_name'].isna()
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_1_slug'].str.lower()

filt = (bc_boulders_5['areas_3_slug'].isna() | (bc_boulders_5['areas_3_slug']=='')) & (bc_boulders_5['area_name'].isna())
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_2_slug']

filt = np.isin(bc_boulders_5['areas_2_slug'], bc_boulders_5['area_name']) & bc_boulders_5['area_name'].isna()
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_2_slug']

filt = (bc_boulders_5['area_name'].isna()) & np.isin(bc_boulders_5['areas_2_name'], 
                ['Nanaimo River', 'Duke Point Boulders', 'Duncan Boulders', "The Grid", 'Arrow Lakes'])
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5.loc[filt, 'areas_2_slug']

filt = (bc_boulders_5['area_name'].isna())
bc_boulders_5.loc[filt, 'area_name'] = bc_boulders_5[filt]['areas_3_slug'].str.lower()

filt = (bc_boulders_5['area_name'] =='grand-wall-chief-squamish-bc-canada')
bc_boulders_5.loc[filt, 'area_name'] = 'grand-wall-boulders-squamish-bc-canada'

In [327]:
bc_areas = bc_boulders_5['area_name'].unique()
bc_areas

array(['grand-wall-boulders-squamish-bc-canada',
       'apron-boulders-squamish-bc-canada',
       'north-walls-squamish-bc-canada',
       'paradise-valley-squamish-bc-canada',
       'murrin-park-squamish-bc-canada',
       'smoke-bluffs-squamish-bc-canada',
       'powerline-boulders-squamish-bc-canada', 'pemberton-bc-canada',
       'creekside-boulder-whistler-bc-canada', 'squamish-bc-canada',
       'furry-creek-squamish-bc-canada',
       'cypress-falls-park-vancouver-bc-canada',
       'lighthouse-park-vancouver-bc-canada',
       'godman-creek-vancouver-bc-canada', 'hunter-creek-bc-canada',
       'cal-cheak-whistler-bc-canada',
       'boulderfields-klo-creek-kelowna-bc-canada',
       'sutton-pass-vancouver-island-bc-canada',
       'cougar-canyon-bc-canada',
       'powerline-boulders-squamish-bc-canada-4991',
       'duncan-boulders-duncan-vancouver-island-bc-canada',
       'greater-victoria-vancouver-island-bc-canada',
       'duke-point-boulders-nanaimo-area-vancouver-i

In [235]:
len(bc_bascents['area_name'].unique())

60

In [32]:
counts = bc_bascents.groupby('climb_id')['user'].count().sort_values()
p_c = counts / counts.sum()
# np.histogram(np.log(p_c))
threshold = np.exp(-5.5)
p_dropped = 1 - np.sqrt(threshold/p_c)
p_dropped

climb_id
116016   -6.473602
127240   -6.473602
127262   -6.473602
110844   -6.473602
106801   -6.473602
            ...   
35610     0.171643
3049      0.175683
36476     0.216554
36512     0.221669
36410     0.230792
Name: user, Length: 2844, dtype: float64

In [233]:
area_vocab = pd.Series(np.arange(len(bc_areas)), index=bc_areas)
area_inv = pd.Series(bc_areas, index=np.arange(len(bc_areas)))

bc_bascents['area_name'] = bc_boulders_5.loc[bc_bascents['climb_id'].values, 'area_name'].values
bc_bascents['area_token'] = area_vocab.loc[bc_bascents['area_name'].values].values

In [204]:
vocabulary = pd.Series(np.arange(len(bc_boulders_5)), index=bc_boulders_5.index)
inverse_vocabulary = pd.Series(bc_boulders_5.index, index=np.arange(len(bc_boulders_5)))

bc_bascents['climb_token'] = vocabulary.loc[bc_bascents['climb_id'].values].values
bc_bascents['stars'] = bc_bascents['stars'].fillna(-5)

features = {}
for feature in ['climb_token', 'area_token', 'V grade', 'style_num', 'stars', 'date']:
    features[feature] = bc_bascents.groupby('user')[feature].apply(list)
    holdout = features[feature].loc[['larathompson', 'finnfrasergrathwol']]
    features[feature] = pd.concat([features[feature].drop(['larathompson', 'finnfrasergrathwol']), holdout])
features

{'climb_token': user
 aaron                        [1904, 1454, 18, 98, 2284, 2341, 921, 1450, 14...
 aaron-pardy                  [2762, 98, 1112, 32, 591, 432, 431, 592, 833, ...
 aaroniacovitti                               [128, 772, 1311, 132, 86, 95, 83]
 aaronthebcsherpaiacovitti    [323, 37, 11, 579, 25, 106, 98, 1311, 466, 132...
 abby-taylor                                        [328, 23, 42, 45, 298, 369]
                                                    ...                        
 zehraozturk                  [37, 135, 100, 9, 72, 1000, 1239, 2176, 2143, ...
 zibean                       [5, 100, 103, 86, 9, 454, 560, 70, 106, 98, 72...
 zoebeauchemin                [98, 100, 466, 102, 1516, 934, 9, 103, 5, 454,...
 larathompson                 [1183, 1672, 1854, 1940, 1407, 581, 2294, 2312...
 finnfrasergrathwol           [344, 1778, 1719, 72, 5, 1776, 1779, 442, 222,...
 Name: climb_token, Length: 1069, dtype: object,
 'area_token': user
 aaron                        

In [206]:
for i in range(1069): 
    features['date'].iloc[i] = [0]+list(np.diff(features['date'].iloc[i])/pd.Timedelta("1 days"))

In [207]:
features['date']

user
aaron                        [0, 0.0, 231.0, 0.0, 11.0, 1.0, 0.0, 0.0, 0.0,...
aaron-pardy                  [0, 0.0, 36.0, 0.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0...
aaroniacovitti                            [0, 28.0, 233.0, 0.0, 0.0, 0.0, 0.0]
aaronthebcsherpaiacovitti    [0, 69.0, 9.0, 27.0, 21.0, 0.0, 0.0, 281.0, 55...
abby-taylor                                       [0, 0.0, 0.0, 0.0, 0.0, 0.0]
                                                   ...                        
zehraozturk                  [0, 35.0, 19.0, 1.0, 5.0, 335.0, 10.0, 0.0, 18...
zibean                       [0, 3.0, 332.0, 1.0, 6.0, 0.0, 1.0, 3.0, 12.0,...
zoebeauchemin                [0, 0.0, 1.0, 29.0, 0.0, 0.0, 29.0, 331.0, 0.0...
larathompson                 [0, 63.0, 21.0, 0.0, 33.0, 10.0, 19.0, 0.0, 0....
finnfrasergrathwol           [0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...
Name: date, Length: 1069, dtype: object

## all dated boulder sends

In [360]:
len(bascents_dated)

144589

In [361]:
bascents_dated_dedupd = bascents_dated.sort_values(by='date').drop_duplicates(['user', 'climb_id'])

In [None]:
n=5
tmp = bascents_dated_dedupd.groupby('user')['climb_id'].count()
boulderers_min_n = tmp[tmp>n]
bascents_dated_min5 = bascents_dated_dedupd[np.isin(bascents_dated_dedupd['user'], boulderers_min_n.index)]
len(bascents_dated_min5)

In [1101]:
bascents_dated_min10 = bascents_dated_min10.sort_values(['user', 'date', 'V grade'])

In [1102]:
print(len(boulders))
boulders_dated = boulders.loc[np.isin(boulders.index, bascents_dated_min10['climb_id'])]
print(len(boulders_dated))

26961
22063


In [1103]:
vocabulary = pd.Series(np.arange(len(boulders_dated)), index=boulders_dated.index)
inverse_vocabulary = pd.Series(boulders_dated.index, index=np.arange(len(boulders_dated)))

In [1104]:
bascents_dated_min10['climb_token'] = vocabulary.loc[bascents_dated_min10['climb_id'].values].values
tokens = bascents_dated_min10.groupby('user')['climb_token'].apply(list)
tokens

user
aaron                        [701, 336, 14796, 14794, 14790, 14791, 14792, ...
aaron-martinez               [10390, 10591, 10211, 10237, 10305, 10261, 106...
aaron-pardy                  [1431, 1, 6310, 334, 108, 554, 390, 152, 570, ...
aaronthebcsherpaiacovitti    [50, 120, 11, 51, 81, 0, 1, 282, 41, 59, 7706,...
abby-huyler-2263             [10209, 10266, 10222, 15658, 15769, 15690, 156...
                                                   ...                        
zane-dordai                  [15663, 15658, 15723, 16266, 15674, 15777, 156...
zarinaguzman                 [1187, 1115, 550, 4583, 265, 249, 184, 978, 40...
zehraozturk                  [120, 193, 6, 30, 3, 520, 630, 263, 176, 11, 4...
zibean                       [4, 6, 2, 20, 30, 33, 55, 19, 3, 1, 7, 0, 35, ...
zoebeauchemin                [1, 6, 41, 160, 5, 114, 30, 2, 4, 33, 52, 445,...
Name: climb_token, Length: 1229, dtype: object

In [543]:
i_tokens = np.arange(len(tokens))
np.random.shuffle(i_tokens)

u_train, u_dev, u_test = np.split(i_tokens, [1000, 1100])

In [215]:
len(u_train), len(u_dev), len(u_test)

(1000, 100, 129)

# make training data -- time slices

In [36]:
i_ = np.arange(len(features['climb_token']))
np.random.shuffle(i_[:-2])
u_train, u_dev, u_test = np.split(i_, (int(0.85*len(i_)), int(0.9*len(i_))))
print(len(u_train), len(u_dev), len(u_test))

908 54 107


In [351]:
n = 40
i_t = n-1

In [352]:
to_one_hot = ['area_token']
def all_user_seqs(features, udx, i_target=2, n=5):
    maxlen = features['climb_token'].str.len().max()
    idx = jnp.arange(maxlen - n + 1)[:, None] + jnp.arange(n)[None, :]
    mask = len(vocabulary)
    feat_mat = []
    for feature in features:
        # print(feature)
        mats = []
        for user, seq in features[feature].iloc[udx].items():
            l_seq = len(seq)
            if l_seq < n:
                mats.append(jnp.pad(jnp.array(seq), (n-l_seq, 0), 
                                    constant_values=mask if feature=='climb_token' else 0)[jnp.newaxis, :])
            else:
                mats.append(jnp.pad(jnp.array(seq), (0, maxlen-l_seq), constant_values=-1)[idx])
        mats = jnp.concatenate(mats, axis=0)
        mats = mats[~np.any(mats==-1, axis=1)]
        if feature in to_one_hot:
            shape = mats.shape
            b = np.zeros((mats.size, len(bc_areas)))
            b[np.arange(mats.size), mats.reshape(-1)] = 1
            feat_mat.append(b.reshape((shape[0], shape[1], -1)))
        else:
            feat_mat.append(mats[:,:, jnp.newaxis])
    feat_mat = jnp.concatenate(feat_mat, axis=2)
    feat_mat = feat_mat.at[:, 0, -1].set(0)
    print(feat_mat.shape)
    x, y = jnp.concatenate([feat_mat[:, :i_target, :], feat_mat[:, i_target+1:, :]], axis=1), feat_mat[:, i_target, :].astype(jnp.int16)
    x = jnp.concatenate([x, jnp.concatenate([mask*jnp.ones(feat_mat.shape[0])[:, jnp.newaxis], y[:, 1:]], axis=1)[:, jnp.newaxis, :]], axis=1)
    y = y[:, 0]
    return x.astype(jnp.int32), y.astype(jnp.int32)

In [353]:
train_x, train_y = all_user_seqs(features, u_train, i_target=i_t, n=n)
dev_x, dev_y = all_user_seqs(features, u_dev, i_target=i_t, n=n)
test_x, test_y = all_user_seqs(features, u_test, i_target=i_t, n=n)

(36130, 40, 65)
(1625, 40, 65)
(4581, 40, 65)


In [354]:
me_x, me_y = all_user_seqs(features, [1067], i_target=i_t, n=n)
finn_x, finn_y = all_user_seqs(features, [1068], i_target=i_t, n=n)

(47, 40, 65)
(76, 40, 65)


In [355]:
train_x.shape, train_y.shape, dev_x.shape, dev_y.shape, test_x.shape, test_y.shape

((36130, 40, 65), (36130,), (1625, 40, 65), (1625,), (4581, 40, 65), (4581,))

# make training data -- time ordered samples

In [668]:
n = 10

In [670]:
def random_user_seqs(tokens, i_target=2, n=5):
    rng = np.random.default_rng()
    mats = []
    for user, seq in tokens.items():
        for _ in range(4*(len(seq)-n)):
            idx = np.sort(rng.choice(len(seq), n, replace=False))
            mats.append(jnp.array(seq)[idx])
    mats = jnp.row_stack(mats)
    return jnp.concatenate([mats[:, :i_target], mats[:, i_target+1:]], axis=1), mats[:, i_target]

In [671]:
train_x, train_y = random_user_seqs(tokens.iloc[u_train], i_target=n-1, n=n)
dev_x, dev_y = random_user_seqs(tokens.iloc[u_dev], i_target=n-1, n=n)
test_x, test_y = random_user_seqs(tokens.iloc[u_test], i_target=n-1, n=n)

In [672]:
train_x.shape

(416648, 9)

# Sequence Modelling

## Model training

In [75]:
class Climb2Vec(nn.Module):
    vocab : int
    d : int
    
    @nn.compact
    def __call__(self, X_batch, training):
        x = nn.Embed(self.vocab, self.d)(X_batch)
        # x = nn.Dropout(0.2, deterministic=not training)(x)
        x = x.reshape(X_batch.shape[0], -1)
        x = nn.relu(nn.Dense(128)(x))
        x = nn.Dropout(0.2, deterministic=not training)(x)
        x = nn.Dense(self.vocab)(x)
        return x

In [76]:
class ClimbGRU(nn.Module):
    vocab : int
    d : int
    features: int
    
    @nn.compact
    def __call__(self, x, training):
        batch_size = x.shape[0]
        x = jnp.concatenate([nn.Embed(self.vocab, self.d)(x[:, :, 0].astype(jnp.int16)),
                             x[:, :, 1:]], axis=2)
        
        ScanGRUCell = nn.scan(
            nn.GRUCell, variable_broadcast="params",
            split_rngs={"params": False}, in_axes=1, out_axes=1)
        carry = nn.GRUCell.initialize_carry(
            jax.random.PRNGKey(0), (batch_size,), self.features)
        carry, x = ScanGRUCell()(carry, x)
        
        ScanGRUCell = nn.scan(
            nn.GRUCell, variable_broadcast="params",
            split_rngs={"params": False}, in_axes=1, out_axes=1)
        carry = nn.GRUCell.initialize_carry(
            jax.random.PRNGKey(0), (batch_size,), self.features)
        carry, x = ScanGRUCell()(carry, x)
        
        x = nn.Dense(self.vocab)(carry)

        return x

In [252]:
class ClimbLSTM(nn.Module):
    vocab : int
    d : int
    features: int
    
    @nn.compact
    def __call__(self, x, training):
        batch_size = x.shape[0]
        x = nn.Embed(self.vocab, self.d)(x)
        x = nn.Dropout(0.1, deterministic=not training)(x)
        
        ScanLSTMCell = nn.scan(
            nn.LSTMCell, variable_broadcast="params",
            split_rngs={"params": False}, in_axes=1, out_axes=1)
        carry = nn.LSTMCell.initialize_carry(
            jax.random.PRNGKey(0), (batch_size,), self.features)
        carry, x = ScanLSTMCell()(carry, x)
        # ScanLSTMCell = nn.scan(
        #     nn.LSTMCell, variable_broadcast="params",
        #     split_rngs={"params": False}, in_axes=1, out_axes=1)
        # carry = nn.LSTMCell.initialize_carry(
        #     jax.random.PRNGKey(0), (batch_size,), self.features)
        # carry, x = ScanLSTMCell()(carry, x)
        
        x = x.reshape(batch_size, -1)
        # x = nn.relu(nn.Dense(128)(x))
        x = nn.Dense(self.vocab)(x)
        return x

In [250]:
class Transformer(nn.Module):
    """Transformer Model for sequence tagging."""

    config: TransformerConfig

    @nn.compact
    def __call__(self, *, inputs, train):
        """Applies Transformer model on the inputs.

        Args:
          inputs: input data
          train: if it is training.

        Returns:
          output of a transformer encoder.

        """
        # assert inputs.ndim == 2  # (batch, len)

        config = self.config

        x = inputs.astype('int32')
        # x = nn.Embed(
        #     num_embeddings=config.vocab_size, features=config.emb_dim,
        #     name='embed')(
        #         x)
        x = jnp.concatenate([nn.Embed(
            num_embeddings=config.vocab_size, features=config.emb_dim,
            name='embed')(x[:, :, 0]), x[:, :, 1:]], axis=2)
        
        x = nn.Dropout(rate=config.dropout_rate)(x, deterministic=not train)
        x = AddPositionEmbs(config)(x)

        for _ in range(config.num_layers):
                x = Encoder1DBlock(config)(x, deterministic=not train)

        x = nn.LayerNorm(dtype=config.dtype)(x[:, -1, :])
        logits = nn.Dense(
            config.output_vocab_size,
            kernel_init=config.kernel_init,
            bias_init=config.bias_init)(
                x)
        return logits

In [356]:
from transformer import AddPositionEmbs, Encoder1DBlock, TransformerConfig #, Transformer
    # vocab_size: int
    # output_vocab_size: int
    # dtype: Any = jnp.float32
    # emb_dim: int = 512
    # num_heads: int = 8
    # num_layers: int = 6
    # qkv_dim: int = 512
    # mlp_dim: int = 2048
    # max_len: int = 2048
    # dropout_rate: float = 0.3
    # attention_dropout_rate: float = 0.3

vocab = len(vocabulary)+1
config = TransformerConfig(
    vocab_size=vocab,
    output_vocab_size=vocab,
    emb_dim=128,
    num_heads=4,
    num_layers=4,
    qkv_dim=64,
    mlp_dim=64,
    max_len=n,
    dropout_rate=0.,
    attention_dropout_rate=0.
)

In [19]:
# train_x = np.random.randint(0, 5, (50000, 10, 5))
# train_y = np.random.randint(0, 2000, 50000)

# dev_x = np.random.randint(0, 5, (500, 10, 5))
# dev_y = np.random.randint(0, 2000, 500)
# n=10


In [357]:
batch_size = 256
batch_idx = np.arange(batch_size*(len(train_x) // batch_size))
np.random.shuffle(batch_idx)
batch_idx = np.reshape(batch_idx, (-1, batch_size))

rng = jax.random.PRNGKey(42)

rng, inp_rng, init_rng, dropout_rng = jax.random.split(rng, 4)
inp = jax.random.randint(inp_rng, (batch_size, n, 65), 0, 1).astype(jnp.int32)

# vocab = len(vocabulary)+1
d_emb = 100
d_rnn = 128

# model = Climb2Vec(vocab, d_emb)
# initial_params = model.init(init_rng, inp, training=False)

# model = ClimbGRU(vocab, d_emb, d_rnn)
# initial_params = model.init(init_rng, inp, training=False)

model = Transformer(config)
initial_params = model.init(init_rng, inputs=inp, train=False)

param_count = sum(x.size for x in jax.tree_util.tree_leaves(initial_params))
print(param_count)

1212637


In [358]:
def loss(params: optax.Params, batch: jnp.ndarray, labels: jnp.ndarray) -> jnp.ndarray:
    y_hat = model.apply(params, inputs=batch, train=True, rngs={'dropout': dropout_rng})
    loss_value = optax.softmax_cross_entropy_with_integer_labels(y_hat, labels)
    
    return loss_value.mean()

In [361]:
params = initial_params

e_loss = []
e_acc = []
e_sm = []

i_epoch = 0

@jax.jit
def step(params, opt_state, batch, labels):
    loss_value, grads = jax.value_and_grad(loss)(params, batch, labels)
    updates, opt_state = optimizer.update(grads, opt_state, params)
    params = optax.apply_updates(params, updates)
    return params, opt_state, loss_value

schedule = optax.warmup_cosine_decay_schedule(
    init_value=1e-5,
    peak_value=1e-3,
    warmup_steps=20,
    decay_steps=1000,
    end_value=1e-4
)

optimizer = optax.adam(learning_rate=schedule)
# optimizer = optax.inject_hyperparams(optax.adam)(learning_rate=1e-4)

opt_state = optimizer.init(params)

best_params = params
best_sm = 10
best_acc = 0
wait = 5

In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    losses = []
    for i, idx in enumerate(batch_idx):
        batch, labels = train_x[idx], train_y[idx]
        params, opt_state, loss_value = step(params, opt_state, batch, labels)
        losses.append(loss_value)
    e_loss.append(np.mean(losses))
    dev_p = model.apply(params, inputs=dev_x, train=False)
    e_acc.append(np.mean(dev_p.argmax(axis=-1) == dev_y))
    e_sm.append(np.mean(optax.softmax_cross_entropy_with_integer_labels(dev_p, dev_y)))
    print(f'epoch {i_epoch + epoch}\t loss: {e_loss[-1]:.4f}; val_sm: {e_sm[-1]:.4f}; val_acc: {e_acc[-1]:.4f}')
    
    if e_acc[-1] > best_acc:  #e_sm[-1] < best_sm:
        best_acc = e_acc[-1]
        best_params = params
    if (epoch > wait) and np.all(e_sm[-1] > np.array(e_sm[-wait-1:-1])):
        break
        
i_epoch += num_epochs

epoch 0	 loss: 7.1457; val_sm: 6.1399; val_acc: 0.0295
epoch 1	 loss: 5.6547; val_sm: 5.0678; val_acc: 0.0738


In [None]:
# opt_state.hyperparams['learning_rate'] = 1e-5
# optimizer.update(params, opt_state)

In [315]:
test_p = model.apply(best_params, inputs=test_x, train=False)
np.mean(test_p.argmax(axis=-1) == test_y), np.mean(optax.softmax_cross_entropy_with_integer_labels(test_p, test_y))

(Array(0.21363476, dtype=float32), Array(3.4241629, dtype=float32))

In [316]:
me_p = model.apply(best_params, inputs=me_x, train=False).argmax(axis=-1)
finn_p = model.apply(best_params, inputs=finn_x, train=False).argmax(axis=-1)

In [317]:
np.mean(me_p==me_y), np.mean(finn_p==finn_y)

(Array(0.13432835, dtype=float32), Array(0.1875, dtype=float32))

## Evaluation

In [276]:
def print_results(true, predicted, seqs, mistakes=True):
    if mistakes:
        show = true != predicted
    else:
        show = true == predicted
    actual = bc_boulders_5.loc[inverse_vocabulary.loc[true[show]].values, ['name', 'grade']].values
    otherwise = bc_boulders_5.loc[inverse_vocabulary.loc[predicted[show]].values, ['name', 'grade']].values
    context = [bc_boulders_5.loc[inverse_vocabulary.loc[row].values, 'name'].tolist() for row in np.array(seqs[show])]
    rows = []
    for i in range(len(actual)):
        rows.append(context[i] + list(actual[i]) + list(otherwise[i]))
    return pd.DataFrame(rows)

In [318]:
print_results(me_y, me_p, me_x[:, :-1, 0], mistakes=True)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22
0,The Peeve,Freeman's Warmup,Fisheye Biceps,Snakes and Ladders,Bass Fishing,Green Eggs and Sam,Stairway to Nowhere,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,V3+,Low Rider,V3
1,Freeman's Warmup,Fisheye Biceps,Snakes and Ladders,Bass Fishing,Green Eggs and Sam,Stairway to Nowhere,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,V3,Aquarium,V2
2,Snakes and Ladders,Bass Fishing,Green Eggs and Sam,Stairway to Nowhere,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,V4,Tiny Tim,V3-
3,Bass Fishing,Green Eggs and Sam,Stairway to Nowhere,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,V2,Eight Ball,V1+
4,Green Eggs and Sam,Stairway to Nowhere,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,Masaki's Problem,V2,Fish Tank,V3-
5,Stairway to Nowhere,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,Masaki's Problem,All Aboard the Swaize Express,V5-,Fish Tank,V3-
6,Buddha's Belly,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,Masaki's Problem,All Aboard the Swaize Express,Backtrack,V2,Nefarious,V1
7,Middle Monk,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,Masaki's Problem,All Aboard the Swaize Express,Backtrack,The Pawn,V2,Time Aréte,V1
8,Punk Ass Kid,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,Masaki's Problem,All Aboard the Swaize Express,Backtrack,The Pawn,Tumbleweed,V2,Slingshot,V2-
9,Ladybug Strikes Back,New Kid on the Block,Old Man Mountain,Dark Prince Left,Calabogie Dreaming,Mantel la Mesa,Big Bottoms,Tennis Star,The Wiz,Shade Mantel,Shineater,Tiny Tim,Jenga,Eager Puppy Short,Masaki's Problem,All Aboard the Swaize Express,Backtrack,The Pawn,Tumbleweed,Largonian Bulge,V2-,Big Bottoms,V2


In [319]:
print_results(finn_y, finn_p, finn_x[:, :-1, 0], mistakes=True)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22
0,University Crawl,Moss Highway,Macho man,Trad Killer,Timeless,Vertigo,The force,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,V4,Shots Fired,V4
1,Moss Highway,Macho man,Trad Killer,Timeless,Vertigo,The force,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,V3,Hummingbird,V6
2,Macho man,Trad Killer,Timeless,Vertigo,The force,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,V5,Cream Puff Dream,V5-
3,Trad Killer,Timeless,Vertigo,The force,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,V6,Minor Threat,V6
4,Timeless,Vertigo,The force,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,Latch On,V3,When Pigs Fly,V3
5,The force,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,Latch On,Can't Do It In Shoes,Gastonomical,V5,Doubt,V5+
6,Breakfast at Tiffany's,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,Latch On,Can't Do It In Shoes,Gastonomical,Sloppy Poppy,V4,The Mantis,V4-
7,Quickstick,Oscar,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,Latch On,Can't Do It In Shoes,Gastonomical,Sloppy Poppy,Galaxy,V5,Viper,V5
8,Peninsula Pinch,Equilibrium,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,Latch On,Can't Do It In Shoes,Gastonomical,Sloppy Poppy,Galaxy,Detached Flake,Flow Stone,V4-,Space Monkey,V5
9,Dumb Slab,Made In The Shade,Shade Mantel,Bo Jo Jones,Harry's Rail,The Killing Floor Left,The Sickle,Big Boy,Real World,Swank Stretch,Lounge Act,Latch On,Can't Do It In Shoes,Gastonomical,Sloppy Poppy,Galaxy,Detached Flake,Flow Stone,Space Monkey,Ride the Waves,V7,Chicken Lips and Assholes,V6+


In [337]:
def top_next(seqs, true, grade, bc_area, stars=5, style_id=2, days_later=7):
    last_n = np.array(seqs[-1, 1:, :])
    last_n[-1, 0] = true[-1]
    
    b = np.zeros_like(bc_areas)
    b[area_vocab.loc[bc_area]] = 1
    next_ = np.concatenate([[len(vocabulary)], b, [grade, style_id, stars, days_later]])
    
    x = np.concatenate([last_n, next_[np.newaxis, :]], axis=0)
    p = model.apply(best_params, inputs=x[np.newaxis, :, :], train=False)
    return jax.nn.softmax(p)

In [349]:
p = top_next(me_x, me_y, 5, 'grand-wall-boulders-squamish-bc-canada')
bc_boulders_5.loc[inverse_vocabulary.loc[np.squeeze(p.argsort())[-20:][::-1]].values]

Unnamed: 0_level_0,slug,name,area_id,type,grade_id,bolts,length,grade,description,# onsights,# redpoint,# flashes,# sends,areas_0_name,areas_1_name,areas_2_name,areas_0_slug,areas_1_slug,areas_2_slug,areas_3_name,areas_3_slug,areas_4_name,areas_4_slug,areas_5_name,areas_5_slug,areas_6_name,areas_6_slug,areas_7_name,areas_7_slug,areas_8_name,areas_8_slug,areas_9_name,areas_9_slug,areas_10_name,areas_11_name,areas_10_slug,areas_11_slug,ratings_min,ratings_max,ratings_mean,ratings_count,stars_isna,connected,V grade,onsight,flash,redpoint,area_name
climb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1
3049,timeless-titanic-south-grand-wall-boulders-squ...,Timeless,8361,boulder,32,0,,V5-,Start matched on the big shelf and climb up an...,22,414,84,411,Squamish,Grand Wall Boulders,Titanic South,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-south-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,2.0,5.0,4.168022,369.0,151.0,True,4.75,15,72,324,grand-wall-boulders-squamish-bc-canada
36444,swank-stretch-easy-chair-grand-wall-boulders-s...,Swank Stretch,7521,boulder,34,0,,V5,Start matched on the big flat hold at the bott...,25,338,72,345,Squamish,Grand Wall Boulders,Easy Chair,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,easy-chair-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,3.783439,314.0,121.0,True,5.0,20,61,264,grand-wall-boulders-squamish-bc-canada
36463,wafer-thin-black-dyke-grand-wall-boulders-squa...,Wafer Thin,8357,boulder,33,0,,V5-,"Sit start on a positive rail, then climb strai...",9,199,54,226,Squamish,Grand Wall Boulders,Black Dyke,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,black-dyke-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,3.768421,190.0,72.0,True,4.75,7,49,170,grand-wall-boulders-squamish-bc-canada
35761,tims-sloper-problem-thighmaster-grand-wall-bou...,Tim's Sloper Problem,7972,boulder,34,0,,V5,,10,295,19,252,Squamish,Grand Wall Boulders,Thighmaster,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,thighmaster-grand-wall-boulders-squamish-bc-ca...,,,,,,,,,,,,,,,,,,,1.0,5.0,4.022624,221.0,102.0,True,5.0,7,15,230,grand-wall-boulders-squamish-bc-canada
36422,viper-viper-grand-wall-boulders-squamish-bc-ca...,Viper,8358,boulder,35,0,,V5,Climb the snake-like fin of rock up the overha...,3,333,25,269,Squamish,Grand Wall Boulders,Viper,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,viper-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,4.072,250.0,111.0,True,5.0,1,20,248,grand-wall-boulders-squamish-bc-canada
36499,its-about-time-titanic-south-grand-wall-boulde...,It's About Time,8361,boulder,37,0,,V5+,"Start matched on a sloper, heel hooking or toe...",5,244,16,219,Squamish,Grand Wall Boulders,Titanic South,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-south-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,2.0,5.0,4.046875,192.0,73.0,True,5.25,4,13,202,grand-wall-boulders-squamish-bc-canada
35614,jacks-baby-animal-magnetism-grand-wall-boulder...,Jack's Baby,8350,boulder,35,0,,V5,,10,98,19,105,Squamish,Grand Wall Boulders,Animal Magnetism,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,animal-magnetism-grand-wall-boulders-squamish-...,,,,,,,,,,,,,,,,,,,1.0,5.0,3.489583,96.0,31.0,True,5.0,8,17,80,grand-wall-boulders-squamish-bc-canada
41488,hoop-wrangler-octagon-grand-wall-boulders-squa...,The Hoop Wrangler,8356,boulder,34,0,,V5,,4,98,13,97,Squamish,Grand Wall Boulders,Octagon,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,octagon-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,4.073171,82.0,33.0,True,5.0,3,10,84,grand-wall-boulders-squamish-bc-canada
42877,doubt-viper-grand-wall-boulders-squamish-bc-ca...,Doubt,8358,boulder,36,0,,V5+,,2,84,6,85,Squamish,Grand Wall Boulders,Viper,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,viper-grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,4.179104,67.0,25.0,True,5.25,1,6,78,grand-wall-boulders-squamish-bc-canada
41763,everyday-helmet-titanic-south-grand-wall-bould...,Everyday Helmet,8361,boulder,34,0,,V5,Start on the fat rail of Timeless. Mantle stra...,10,56,16,76,Squamish,Grand Wall Boulders,Titanic South,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,titanic-south-grand-wall-boulders-squamish-bc-...,,,,,,,,,,,,,,,,,,,3.0,5.0,3.741379,58.0,24.0,True,5.0,9,15,52,grand-wall-boulders-squamish-bc-canada


In [334]:
area_vocab.loc['grand-wall-boulders-squamish-bc-canada']

0

In [1358]:
wrong_ids = np.unique(finn_p[finn_p!=finn_y])
wrong_ids

array([   3,   24,   31,   38,   48,   58,   72,   85,   86,   93,   97,
         98,   99,  100,  102,  103,  125,  131,  134,  138,  142,  146,
        147,  158,  163,  169,  174,  225,  268,  275,  323,  327,  347,
        355,  409,  410,  418,  424,  425,  454,  458,  463,  549,  550,
        579,  671,  697,  703,  753,  817,  885, 1014, 1051, 1149, 1167,
       1188, 1262, 1317, 1320, 1345, 1401, 1477, 1524, 1579, 1794, 1836,
       2045, 2313, 2743, 2756], dtype=int32)

In [1359]:
wrong_ids = np.unique(finn_p[finn_p!=finn_y])
print(len(wrong_ids[~np.isin(wrong_ids, features['climb_token'].loc['finnfrasergrathwol'])])/len(wrong_ids))
bc_boulders_5.loc[inverse_vocabulary.loc[wrong_ids[~np.isin(wrong_ids, features['climb_token'].loc['finnfrasergrathwol'])]].values]['name'].tolist()

0.7142857142857143


['Squamish Days Traverse',
 'Alien Within',
 'Double Decker',
 'Child Abuse',
 'Tatonka',
 'The Fuzz',
 'Depths Direct',
 'Viper',
 'Titanic',
 'Storm Troopers',
 'Ramen Raw',
 'Missing Reaction',
 'Diabolica',
 "Tyler's Traverse",
 'Paperboy',
 'Skin Graft',
 'Summer Vacation',
 'Into The Light',
 'Mr. Bigglesworth',
 'ChossABlock',
 'Superfly Slab',
 'Born Too Slow',
 'Pyramid Arete',
 'Prime Time Sit-down',
 'Bert and Ernie Were Just Good Friends',
 'White Bread',
 'Hydrogen',
 'Minor Threat',
 'Doubt',
 'Green Tea',
 'Immunized',
 'Autobody',
 "Peelin' Monkey",
 'Lipsmack traverse',
 'Dog Days',
 'Handible Lecture',
 'Evil Empire',
 'Cave Dweller Center',
 'Scratching Post',
 "Quinn's Problem",
 'Empire Strikes Back',
 'Angel Wings',
 'FAROUK!',
 'Stupid Face',
 'Change Agent',
 'Sunset Crack',
 'The Warm Up Cave',
 'Driven Stand',
 'The Casualty Collector',
 'Aretes For Days']

## Explore Embeddings Space

In [1176]:
def cosine(v, m):
    # dot(a, b)/(norm(a)*norm(b))
    norm_v = np.sqrt(np.sum(v**2))
    norms_m = np.sqrt(np.sum(embeddings**2, axis=1))
    return np.squeeze(np.dot(v[np.newaxis, :], m.T) / norm_v / norms_m)

In [844]:
def l2(v, m):
    return np.sqrt(np.sum((v[np.newaxis, :] - m)**2, axis=1))

In [1169]:
params['params']['Embed_0']['embedding'].shape

(3014, 100)

In [1160]:
embeddings = np.array(best_params['params']['Embed_0']['embedding'])

In [1185]:
queries = ['Superfly', 'Pocket Problem', 'Trad Killer']
climb_ids = [bc_boulders_5[bc_boulders_5['name']==query].index[0] for query in queries]
q_tokens = vocabulary.loc[climb_ids].values
embeddings[q_tokens, :].shape

(3, 100)

In [1186]:
q_tokens

array([106,  43,  72])

In [1188]:
bc_boulders_5.loc[inverse_vocabulary.loc[np.argsort(cosine(embeddings[q_tokens[0], :], 
                                                       embeddings))[-10:-1]]]

Unnamed: 0_level_0,slug,name,area_id,type,grade_id,bolts,length,grade,description,# onsights,# redpoint,# flashes,# sends,areas_0_name,areas_1_name,areas_2_name,areas_0_slug,areas_1_slug,areas_2_slug,areas_3_name,areas_3_slug,areas_4_name,areas_4_slug,areas_5_name,areas_5_slug,areas_6_name,areas_6_slug,areas_7_name,areas_7_slug,areas_8_name,areas_8_slug,areas_9_name,areas_9_slug,areas_10_name,areas_11_name,areas_10_slug,areas_11_slug,ratings_min,ratings_max,ratings_mean,ratings_count,stars_isna,connected,V grade,onsight,flash,redpoint
climb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1
75565,killing-floor-left-jungle-apron-boulders-squam...,The Killing Floor Left,8709,boulder,13,0,,V1-,,3,13,5,21,Squamish,Apron Boulders,The Jungle,squamish-bc-canada,apron-boulders-squamish-bc-canada,jungle-apron-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,4.0,2.923077,13.0,8.0,True,0.75,3,5,13
98891,lick-gonzos-finger-poultry-boulder-area-giscom...,Lick Gonzo's Finger,5427,boulder,18,0,,V2,,0,0,5,5,Canada,British Columbia,Prince George,canada,bc-canada,prince-george-bc-canada,Giscome Boulders,giscome-boulders-prince-george-bc-canada,Poultry Boulder Area,poultry-boulder-area-giscome-boulders-prince-g...,,,,,,,,,,,,,,,2.0,4.0,3.0,5.0,0.0,True,2.0,0,5,0
119066,belly-flop-left-sandy-cove-vancouver-bc-canada,Belly Flop Left,8284,boulder,22,0,,V3,,0,5,1,6,Vancouver,Sandy Cove,,vancouver-bc-canada,sandy-cove-vancouver-bc-canada,,,,,,,,,,,,,,,,,,,,2.0,4.0,3.0,4.0,2.0,True,3.0,0,1,5
79629,oxygen-clean-boulders-north-walls-squamish-bc-...,oxygen,6433,boulder,22,0,,V3,,2,2,3,7,Squamish,North Walls,The Clean Boulders,squamish-bc-canada,north-walls-squamish-bc-canada,clean-boulders-north-walls-squamish-bc-canada,,,,,,,,,,,,,,,,,,,2.0,5.0,3.5,6.0,1.0,True,3.0,2,3,2
99927,black-mark-dyno-thighmaster-grand-wall-boulder...,Black Mark Dyno,7972,boulder,22,0,,V3,,1,9,2,12,Squamish,Grand Wall Boulders,Thighmaster,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,thighmaster-grand-wall-boulders-squamish-bc-ca...,,,,,,,,,,,,,,,,,,,1.0,5.0,3.111111,9.0,3.0,True,3.0,1,2,9
133986,easter-island-public-works-godman-creek-vancou...,Easter Island,10095,boulder,14,0,,V1,"Climb the opposing arêtes, high-stepping above...",2,1,2,5,Vancouver,Godman Creek,Public Works,vancouver-bc-canada,godman-creek-vancouver-bc-canada,public-works-godman-creek-vancouver-bc-canada,,,,,,,,,,,,,,,,,,,5.0,5.0,5.0,5.0,0.0,True,1.0,2,2,1
92507,death-cookies-grand-wall-boulders-squamish-bc-...,Death Cookies,4778,boulder,22,0,,V3,,1,3,1,5,Squamish,Grand Wall Boulders,,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,4.0,1.0,True,3.0,1,1,3
73394,shakedown-sit-down-grand-wall-boulders-squamis...,Shakedown Sit Down,4778,boulder,41,0,,V6,,0,13,1,14,Squamish,Grand Wall Boulders,,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,,2.0,4.0,3.0,11.0,3.0,True,6.0,0,1,13
97046,catch-that-cat-grand-wall-boulders-squamish-bc...,Catch That Cat,4778,boulder,21,0,,V3-,,1,4,0,5,Squamish,Grand Wall Boulders,,squamish-bc-canada,grand-wall-boulders-squamish-bc-canada,,,,,,,,,,,,,,,,,,,,2.0,3.0,2.25,4.0,1.0,True,2.75,1,0,4


In [1177]:
cosine(embeddings[q_tokens[0], :],  embeddings).shape

(3014,)