In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from collections import Counter
#
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
#
from tqdm import tqdm
#
from problems import get_problems, filter_problems, filter_problems_ge
from metrics import *
from utils import coords_map, grade_maps, get_board_setup, draw_moves, plot_mat, count_parameters, x_coords, y_coords, draw_coords

In [None]:
import warnings
warnings.filterwarnings('always')  # "error", "ignore", "always", "default", "module" or "once"

In [None]:
P_ROOT = Path("./data")
P_PROB = P_ROOT / "problems"
P_BOARD = P_ROOT / "boards/moonboard.png"
P_HOLDS = P_ROOT / "boards/holds"
setup_year = 2017
setup_angle = 45
#
minlen = 5
maxlen = 12
#
grade_names = ["6A+", "6B", "6B+", "6C", "6C+", "7A", "7A+", "7B", "7B+", "7C", "7C+"]
#grade_names = ["6B+", "6C", "6C+", "7A", "7A+", "7B", "7B+", "7C", "7C+", "8A", "8A+"]
#repsge = 10
grade_rep_ge = {
    "6A+": 20,
    "6B":  10,
    "6B+": 5,
    "6C":  5,
    "6C+": 5,
    "7A":5,
    "7A+":5,
    "7B": 5,
    "7B+": 5,
    "7C": 5,
    "7C+": 5,
}

In [None]:
probs = get_problems(P_PROB, setup_year, setup_angle)
print(len(probs))

probs = filter_problems_ge(probs, grade_rep_ge, grade_names, minlen, maxlen)
print(len(probs))

# GRADE MAPS
grade_to_num, num_to_grade = grade_maps(grade_names)

# COORD MAPS
coords_to_num = coords_map
num_to_coords = {v:k for k,v in coords_to_num.items()}

# SPEC MAPS
spec_to_num = {
    "[GRD]": 0,
    "[HLD]": 1,
    "[PAD]": 2
}
num_to_spec = {v:k for k,v in spec_to_num.items()}

# TOKEN MAPS
toks = list(coords_to_num.keys()) + list(grade_to_num.keys()) + list(spec_to_num.keys())
tok_to_num = {t:idx for idx, t in enumerate(toks)}
num_to_tok = {v:k for k,v in tok_to_num.items()}
#
n_holds = len(coords_to_num)
n_grades = len(grade_to_num)

In [None]:
grades, nprobs = zip(*Counter(sorted([grade_to_num[p["grade"]] for p in probs])).items())
nprobs = np.array(nprobs)
grade_freqs =  nprobs / nprobs.sum()
class_weights = nprobs.sum() / nprobs

fig, axes = plt.subplots(1, 1, figsize=(16, 8))
plt.bar(grades, nprobs)
#plt.yscale("log")
plt.ylabel("#probs")
plt.xlabel("#grades")
plt.xticks(grades, [num_to_grade[n] for n in grades])
plt.show()
#
print(nprobs)

In [None]:
def paddify(sent, pad_to):
    assert len(sent) > 1, "Sentency empty"
    assert len(sent) <= pad_to, "Sentences too long!, Increase padding!"
    return  sent + ["[PAD]"] * (pad_to - len(sent))

def batch_paddify(sents, pad_to):
    return [paddify(x, pad_to) for x in sents]

def pad_mask(sent):
    return [False if t != '[PAD]' else True for t in sent]

def batch_pad_mask(sents):
    return [pad_mask(sent) for sent in sents]

def endecode(sent, tok_map):
    return [tok_map[t] for t in sent]

def batch_endecode(sents, tok_map):
    return [endecode(s, tok_map) for s in sents]


def hold_mask(sent):
    return [True if t == "[HLD]" else False for t in sent]

def batch_hold_mask(sents):
    return [hold_mask(s) for s in sents]

In [None]:
X = []
for p in probs:
    grade = p["grade"].upper()
    moves = [move["description"].upper() for move in p["moves"]]
    X.append([grade] + moves)

In [None]:
X = []
for p in probs:
    grade = p["grade"].upper()
    moves = [move["description"].upper() for move in p["moves"]]
    X.append([grade] + moves)
#
X = batch_paddify(X, maxlen + 1)
M = batch_pad_mask(X)
#
X = batch_endecode(X, tok_to_num)
#
X = np.array(X)
M = np.array(M)

In [None]:
r_train = 0.80
n_train = int(r_train * len(probs))
#
idcs = np.arange(len(probs))
for _ in range(10):
    np.random.shuffle(idcs)
idcs_train = idcs[:n_train]
idcs_valid = idcs[n_train:]
#
X_train = X[idcs_train]
M_train = M[idcs_train]
#
X_valid = X[idcs_valid]
M_valid = M[idcs_valid]

In [None]:
def label_smoother(y_true, n_classes, n_smooth):
    y = torch.zeros(n_classes + n_smooth * 2)
    y[y_true + n_smooth] = 1        # y
    for smooth in range(1, n_smooth + 1):
        y[y_true + n_smooth + smooth] = 1/(2*smooth)
        y[y_true + n_smooth - smooth] = 1/(2*smooth)
    return y[n_smooth:len(y) - n_smooth]


def label_ordinal(y_true, n_classes, n_smooth=0):
    y = np.zeros(n_classes)
    y[:y_true + 1] = 1
    #
    smooth_arr = 1 / (2*np.arange(1, n_smooth + 1, 1))
    y[y_true + 1: y_true +1 +n_smooth] = smooth_arr[:n_classes - y_true - 1]
    return y

class HoldDataset(Dataset):
    def __init__(self, X, M, transform=None):
        self.M = M.copy()
        self.X = X.copy()
        #
        self.transform = transform

    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        if self.transform is not None:
            return self.transform(self.X[idx], self.M[idx])
        else:
            return torch.Tensor(self.X[idx]).long(), torch.Tensor(self.M[idx]).bool()

In [None]:
def get_transform(num_to_tok, tok_to_num, grade_to_num, coords_to_num, n_classes, n_smooth, p_grade=0.5):
    def transform(x, m):
        x = x.copy()
        m = m.copy()
        is_grade = np.random.rand() <= p_grade
        if is_grade:
            y = grade_to_num[num_to_tok[x[0]]]
            idx = 0
            x[0] = tok_to_num['[GRD]']
            ys = label_ordinal(y, n_classes, n_smooth)
        else:
            max_idx = len(x[m == False])
            idx = np.random.randint(1, max_idx)
            y = coords_to_num[num_to_tok[x[idx]]]
            x[idx] = tok_to_num['[HLD]']
            ys = torch.zeros(n_classes)
        
        # Tensorify
        y = torch.Tensor([y]).long()
        idx = torch.Tensor([idx]).long()
        is_grade = torch.Tensor([is_grade]).bool()
        x = torch.Tensor(x).long()
        m = torch.Tensor(m).bool()
        ys = torch.Tensor(ys).float()
        return y, ys, idx, is_grade, x, m

    return transform

In [None]:
p_grade = 0.2
n_smooth = 0
batch_size = 256

transform = get_transform(num_to_tok, tok_to_num, grade_to_num, coords_to_num, n_grades, n_smooth, p_grade)

ds_train = HoldDataset(X_train, M_train, transform)
ds_valid = HoldDataset(X_valid, M_valid, transform)
#
dl_train = DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True)
dl_valid = DataLoader(ds_valid, batch_size=batch_size, shuffle=False, drop_last=True)
#
y, ys, idx, is_grade, x, m = next(iter(dl_train))

In [None]:
transform_holds = get_transform(num_to_tok, tok_to_num, grade_to_num, coords_to_num, n_grades, 1, 0)
transform_grades = get_transform(num_to_tok, tok_to_num, grade_to_num, coords_to_num, n_grades, 1, 1)

ds_valid_holds = HoldDataset(X_valid, M_valid, transform_holds)
ds_valid_grades =HoldDataset(X_valid, M_valid, transform_grades)
#
dl_valid_holds =  DataLoader(ds_valid_holds, batch_size=batch_size, shuffle=False, drop_last=True)
dl_valid_grades =  DataLoader(ds_valid_grades, batch_size=batch_size, shuffle=False, drop_last=True)

In [None]:
class GradeTransformer(nn.Module):
    def __init__(self, d_model, n_heads, n_layers, n_holds, n_grades, n_tok):
        super().__init__()
        self.d_model = d_model
        self.n_heads = n_heads
        self.n_layers = n_layers
        self.n_tok = n_tok
        #
        self.embedder = nn.Embedding(n_tok, d_model)
        #
        encoder_layer = nn.TransformerEncoderLayer(
            d_model = d_model,
            dim_feedforward=1024,
            nhead = n_heads,
            batch_first=True,
            norm_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        self.classifier_holds = nn.Sequential(
            nn.Linear(d_model, n_holds, bias=False),
        )
        self.classifier_grades = nn.Sequential(
            nn.Linear(d_model, n_grades, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x, src_key_padding_mask):
        x = self.embedder(x)
        x = self.encoder(x, src_key_padding_mask=src_key_padding_mask)
        return x

In [None]:
d_model = 64
n_heads = 8
n_layers = 6
n_tok = len(tok_to_num)
n_holds = len(coords_to_num)
n_grades = len(grade_to_num)
#
model = GradeTransformer(d_model, n_heads, n_layers, n_holds, n_grades, n_tok)
print(count_parameters(model))
model

In [None]:
def index_of_first_non_zero(x):
    idx = torch.arange(x.shape[1], 0, -1).to(x.device)
    x2= x * idx
    indices = torch.argmax(x2, 1, keepdim=True)
    return indices - 1

def classify(model, y, ys, idx, is_grade, x, m):
    out = model.forward(x, m)
    # LOSS GRADES
    if torch.any(is_grade):
        y_g = y[is_grade]
        ys_g = ys[is_grade]
        o_g = out[is_grade]
        idx_g = idx[is_grade]
        o_g = o_g[torch.arange(o_g.shape[0]),idx_g,:]
        o_g = model.classifier_grades(o_g)
        
        loss_g = loss_fn_g(o_g, ys_g)
        with torch.no_grad():
            y_g_p = index_of_first_non_zero(o_g <= 0.5).flatten()
            acc0_g = soft_acuracy(y_g.cpu(), y_g_p.detach().cpu(), tol=0)
            acc1_g = soft_acuracy(y_g.cpu(), y_g_p.detach().cpu(), tol=1)
    else:
        loss_g = torch.Tensor([0.]).to(device)
        acc0_g = 0.
        acc1_g = 0.
    
    # LOSS HOLDS
    if torch.any(~is_grade):
        y_h = y[~is_grade]
        o_h = out[~is_grade]
        idx_h = idx[~is_grade]
        o_h = o_h[torch.arange(o_h.shape[0]),idx_h,:]
        o_h = model.classifier_holds(o_h)
        
        loss_h = loss_fn_h(o_h, y_h)
        with torch.no_grad():
            acc0_h = soft_acuracy(y_h.cpu(), torch.argmax(o_h, dim=-1).detach().cpu(), tol=0)
    else:
        loss_h = torch.Tensor([0.]).to(device)
        acc0_h = 0.
    return loss_g, loss_h, acc0_g, acc1_g, acc0_h

def get_outs(model, y, ys, idx, is_grade, x, m):
    out = model.forward(x, m)
    # LOSS GRADES
    if torch.any(is_grade):
        y_g = y[is_grade]
        ys_g = ys[is_grade]
        o_g = out[is_grade]
        idx_g = idx[is_grade]
        o_g = o_g[torch.arange(o_g.shape[0]),idx_g,:]
        o_g = model.classifier_grades(o_g)
    else:
        o_g = None
        y_g = None
    
    # LOSS HOLDS
    if torch.any(~is_grade):
        y_h = y[~is_grade]
        o_h = out[~is_grade]
        idx_h = idx[~is_grade]
        o_h = o_h[torch.arange(o_h.shape[0]),idx_h,:]
        o_h = model.classifier_holds(o_h)
    else:
        o_h = None
        y_h = None

    return o_g, y_g, o_h, y_h

In [None]:
device = "cuda:0"
num_epochs = 101

model = model.to(device)
#
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=optimizer, gamma=0.98)
loss_fn_h = torch.nn.CrossEntropyLoss()
loss_fn_g = torch.nn.MSELoss()

epoch_acccs = []
for epoch_idx in range(num_epochs + 1):
    #
    model.train()
    desc = "Train[{:3}/{:3}]:".format(epoch_idx, num_epochs)
    pbar = tqdm(dl_train, bar_format=desc + ']{r_bar}')
    #
    for y, ys, idx, is_grade, x, m in pbar:
        #
        y = y.to(device).squeeze()
        ys = ys.to(device)
        idx = idx.to(device).squeeze()
        is_grade = is_grade.to(device).squeeze()
        x = x.to(device)
        m = m.to(device)
        #
        optimizer.zero_grad()
        #
        loss_g, loss_h, acc0_g, acc1_g, acc0_h = classify(model, y, ys, idx, is_grade, x, m)
            
        loss = 10 * loss_g + loss_h
        loss.backward()
        optimizer.step()
        
        pbar.set_postfix(
            {'L': loss.item(),
             'L_h': loss_h.item(),
             'L_g': loss_g.item(),
             'a@0': acc0_g,
             "a@1": acc1_g,
             "ah": acc0_h
            }
        )
    
    if epoch_idx % 10 == 0 or epoch_idx == num_epochs:
        model.eval()
        ACC0G = []
        ACC1G = []
        ACC0H = []
        for y, ys, idx, is_grade, x, m in dl_valid:
            y = y.to(device).squeeze()
            ys = ys.to(device)
            idx = idx.to(device).squeeze()
            is_grade = is_grade.to(device).squeeze()
            x = x.to(device)
            m = m.to(device)
            
            with torch.no_grad():
                _, _, acc0_g, acc1_g, acc0_h = classify(model, y, ys, idx, is_grade, x, m)
            ACC0G.append(acc0_g)
            ACC1G.append(acc1_g)
            ACC0H.append(acc0_h)
        ACC0G = np.array(ACC0G).mean()
        ACC1G = np.array(ACC1G).mean()
        ACC0H = np.array(ACC0H).mean()
        epoch_acccs.append([epoch_idx, ACC0G, ACC1G, ACC0H])
        print("\tValid - a@0={:.3f} a@1={:.3f} ah={:.3f}".format(ACC0G,ACC1G,ACC0H))

In [None]:
epoch_accs = np.array(epoch_acccs)
plt.plot(epoch_accs[:,0], epoch_accs[:,1], label="acc@0")
plt.plot(epoch_accs[:,0], epoch_accs[:,2], label="acc@1")
plt.plot(epoch_accs[:,0], epoch_accs[:,3], label="acc_hold")
plt.legend()
plt.show()

In [None]:
model.eval()
#
Y_h_t = []
Y_h_p = []
for _ in range(5):
    for y, ys, idx, is_grade, x, m in dl_valid_holds:
        y = y.to(device).squeeze()
        ys = ys.to(device)
        idx = idx.to(device).squeeze()
        is_grade = is_grade.to(device).squeeze()
        x = x.to(device)
        m = m.to(device)
        with torch.no_grad():
            _, _, o_h, y_h = get_outs(model, y, ys, idx, is_grade, x, m)
            Y_h_p.append(o_h.argmax(dim=-1).cpu().numpy())
            Y_h_t.append(y_h.cpu().numpy())
Y_h_t = np.concatenate(Y_h_t)
Y_h_p = np.concatenate(Y_h_p)
#
rep = soft_classification_report(Y_h_t, Y_h_p, 0,
                                 #target_names=list(coords_map.keys()),
                                 digits=3)
print(rep)

In [None]:
model.eval()
#
Y_g_t = []
Y_g_p = []
for _ in range(1):
    for y, ys, idx, is_grade, x, m in dl_valid_grades:
        y = y.to(device).squeeze()
        ys = ys.to(device)
        idx = idx.to(device).squeeze()
        is_grade = is_grade.to(device).squeeze()
        x = x.to(device)
        m = m.to(device)
        with torch.no_grad():
            o_g, y_g, _, _ = get_outs(model, y, ys, idx, is_grade, x, m)
            y_g_p = index_of_first_non_zero(o_g <= 0.5).flatten()
            Y_g_p.append(y_g_p.cpu().numpy())
            Y_g_t.append(y_g.cpu().numpy())
Y_g_t = np.concatenate(Y_g_t)
Y_g_p = np.concatenate(Y_g_p)
#
for tol in range(2):
    if Y_g_p.min() == -1:
        target_names =["-1"] + grade_names
    else:
        target_names = grade_names
    rep = soft_classification_report(Y_g_t, Y_g_p, tol,  target_names=target_names, digits=3)
    print(rep)

# Embeddings

In [None]:
from mpl_toolkits import mplot3d
from sklearn.manifold import TSNE

## Grade Embeddings

In [None]:
grade_tok_nums = [tok_to_num[g] for g in grade_names]
grade_embeddings = model.embedder(torch.Tensor(grade_tok_nums).long().unsqueeze(0).to(device)).detach().cpu().squeeze().numpy()
grade_embeddings.shape

In [None]:
GM = np.zeros((len(grades), len(grades)))
for i1, ge1 in enumerate(grade_embeddings):
    for i2, ge2 in enumerate(grade_embeddings):
        #d = 1 - np.abs(np.dot(ge1, ge2) / (np.linalg.norm(ge1) * np.linalg.norm(ge2)))
        d = np.linalg.norm(ge1 - ge2)
        GM[i1, i2] = d
plot_mat(GM, col_names=grade_names, row_names=grade_names, cmap="Greys", scale_factor=1)

## Hold Embeddings

In [None]:
def get_embedding(num):
    return model.embedder(torch.Tensor([num]).long().to(device)).squeeze().detach().cpu().numpy()

In [None]:
coords = list(coords_to_num.keys())
coords_to_emb_num = {c: tok_to_num[c] for c in coords}
coords_to_emb = {c: get_embedding(n) for c, n in coords_to_emb_num.items()}

In [None]:
dim_x_tot = len(x_coords)**2
dim_y_tot = len(y_coords)**2
#
dim_x = len(x_coords)
dim_y = len(y_coords)

D = np.zeros((dim_y_tot, dim_x_tot))
#
for query_x_idx, query_x in enumerate(x_coords):
    for query_y_idx, query_y in enumerate(y_coords):
        query_coord = query_x + query_y
        query_emb = coords_to_emb[query_coord]
        #
        for x_idx, x in enumerate(x_coords):
            for y_idx, y in enumerate(y_coords):
                coord = x + y
                emb = coords_to_emb[coord]
                #
                #d = np.abs(np.dot(query_emb, emb) / (np.linalg.norm(query_emb) * np.linalg.norm(emb)))
                d = np.linalg.norm(query_emb - emb)
                #d = np.abs(np.dot(query_emb, emb))
                #
                d_idx_y = dim_y * query_y_idx + y_idx
                d_idx_x = dim_x * query_x_idx + x_idx
                
                D[d_idx_y, d_idx_x] = d

In [None]:
sf = 2
plt.figure(figsize=(dim_x * sf, dim_y * sf))
plt.imshow(D, cmap="gray")
plt.xticks(list(range(5, 121, 11)), x_coords)
plt.yticks(list(range(9, 324, 18)), y_coords)
plt.show()

# Generate New Boulders

In [None]:
def generate_new_coord(grade, coords, model):
    model.eval()
    X_gen = [[grade] + coords + ["[HLD]"]]
    X_gen = batch_paddify(X_gen, maxlen + 1)
    M_hld = batch_hold_mask(X_gen)
    M_pad = batch_pad_mask(X_gen)
    X_gen = batch_endecode(X_gen, tok_to_num)
    #
    X_gen = np.array(X_gen)
    M_hld = np.array(M_hld)
    M_pad = np.array(M_pad)
    #
    X_gen = torch.Tensor(X_gen).long().to(device)
    M_hld = torch.Tensor(M_hld).bool().to(device)
    M_pad = torch.Tensor(M_pad).bool().to(device)
    
    with torch.no_grad():
        out = model.forward(X_gen, M_pad)
        out = out[M_hld]
        out = model.classifier_holds(out)
        out = out.argmax(dim=-1).cpu().numpy()
        holds_pred = [num_to_coords[n] for n in out]
    return holds_pred


def generate_all_coord(grade, coords, model):
    model.eval()
    X_gen = [[grade] + coords]
    X_gen = batch_paddify(X_gen, maxlen + 1)
    M_hld = batch_hold_mask(X_gen)
    M_pad = batch_pad_mask(X_gen)
    X_gen = batch_endecode(X_gen, tok_to_num)
    #
    X_gen = np.array(X_gen)
    M_hld = np.array(M_hld)
    M_pad = np.array(M_pad)
    #
    X_gen = torch.Tensor(X_gen).long().to(device)
    M_hld = torch.Tensor(M_hld).bool().to(device)
    M_pad = torch.Tensor(M_pad).bool().to(device)
    
    with torch.no_grad():
        out = model.forward(X_gen, M_pad)
        o_h = out[M_hld]
        o_p = out[:,0,:]
        o_h = model.classifier_holds(o_h)
        o_h = o_h.argmax(dim=-1).cpu().numpy()
        holds_pred = [num_to_coords[n] for n in o_h]
        #
        o_g = out[:,0,:]
        o_g = model.classifier_grades(o_g)
        o_g = o_g.argmax(dim=-1).cpu().numpy()
        grades_pred = [num_to_grade[n] for n in o_g]
    return holds_pred, grades_pred

def grade_boulder(coords, model, decode=True):
    model.eval()
    X_gen = [["[GRD]"] + coords]
    X_gen = batch_paddify(X_gen, maxlen + 1)
    M_hld = batch_hold_mask(X_gen)
    M_pad = batch_pad_mask(X_gen)
    X_gen = batch_endecode(X_gen, tok_to_num)
    #
    X_gen = np.array(X_gen)
    M_hld = np.array(M_hld)
    M_pad = np.array(M_pad)
    #
    X_gen = torch.Tensor(X_gen).long().to(device)
    M_hld = torch.Tensor(M_hld).bool().to(device)
    M_pad = torch.Tensor(M_pad).bool().to(device)

    with torch.no_grad():
        out = model.forward(X_gen, M_pad)
        #
        o_g = out[:,0,:]
        o_g = model.classifier_grades(o_g)
        y_g_p = index_of_first_non_zero(o_g <= 0.5)
    grade = y_g_p.cpu().item()
    if decode:
        if grade == -1:
            return "UG"
        grade = num_to_grade[grade]
    return grade

In [None]:
X_gen = ["F5"]
grade = "6B+"
for _ in range(6):
    print(X_gen)
    H_new = generate_new_coord(grade, X_gen, model)
    board = get_board_setup(P_BOARD, P_HOLDS, setup_year)
    X_gen = X_gen + H_new
    prob_board = draw_coords(board, X_gen)
    #
    grade_pred = grade_boulder(X_gen, model)
    #
    plt.figure(figsize=(5,9))
    plt.imshow(np.array(prob_board))
    plt.title(grade_pred)
    plt.show()

In [None]:
X_gen = ["F5"]
grade = "7B"
for _ in range(5):
    print(X_gen)
    H_new = generate_new_coord(grade, X_gen, model)
    board = get_board_setup(P_BOARD, P_HOLDS, setup_year)
    X_gen = X_gen + H_new
    prob_board = draw_coords(board, X_gen)
    #
    grade_pred = grade_boulder(X_gen, model)
    #
    plt.figure(figsize=(5,9))
    plt.imshow(np.array(prob_board))
    plt.title(grade_pred)
    plt.show()

In [None]:
h_min = 5
h_max = 9 + 1
step = 2
n_holds = list(range(h_min, h_max, step))

n_row = len(grade_names)
n_col = len(n_holds)
fig,axes = plt.subplots(n_row, n_col, figsize=(n_col * 11, n_row * 18))

for idx_row, grade in enumerate(grade_names):
    for idx_col, n_hold in enumerate(n_holds):
        X_gen = ["[HLD]"] * n_hold
        G = []
        for idx in range(len(X_gen)):
            H_new, grades_pred = generate_all_coord(grade, X_gen, model)
            board = get_board_setup(P_BOARD, P_HOLDS, setup_year)
            X_gen[idx] = H_new[0]
            grade_pred = grade_boulder(X_gen[1:], model)
            G.append(grade_pred)
        
        prob_board = draw_coords(board, X_gen)
        axes[idx_row][idx_col].imshow(np.array(prob_board))
        axes[idx_row][idx_col].set_title(G)
plt.show()

# Beamsearch

In [None]:
def get_coords(grade, coords, model):
    model.eval()
    X_gen = [[grade] + coords]
    X_gen = batch_paddify(X_gen, maxlen + 1)
    M_hld = batch_hold_mask(X_gen)
    M_pad = batch_pad_mask(X_gen)
    X_gen = batch_endecode(X_gen, tok_to_num)
    #
    X_gen = np.array(X_gen)
    M_hld = np.array(M_hld)
    M_pad = np.array(M_pad)
    #
    X_gen = torch.Tensor(X_gen).long().to(device)
    M_hld = torch.Tensor(M_hld).bool().to(device)
    M_pad = torch.Tensor(M_pad).bool().to(device)
    
    with torch.no_grad():
        out = model.forward(X_gen, M_pad)
        o_h = out[M_hld]
        o_h = model.classifier_holds(o_h)
        #o_h = o_h.argmax(dim=-1).cpu().numpy()
        #holds_pred = [num_to_coords[n] for n in o_h]
        #
        #o_g = out[:,0,:]
        #o_g = model.classifier_grades(o_g)
        #o_g = o_g.argmax(dim=-1).cpu().numpy()
        #grades_pred = [num_to_grade[n] for n in o_g]
    return o_h

def sequence_probabililty(seq):
    seq = np.array(seq)
    return np.log(seq).mean()

def all_seqs_beamsearch(seq_len, grade, top_k, n_beams):
    all_seqs = [["[HLD]"] * seq_len]
    all_probs = [[1.]]
    #
    for hold_idx in range(seq_len):
        #
        new_probs = []
        new_seqs = []
        #
        for seq_idx in range(len(all_seqs)):
            #
            seq = all_seqs[seq_idx][::]
            seq_prob = all_probs[seq_idx][::]
            #
            #print(hold_idx, seq_idx, seq)
            #
            o_h = get_coords(grade, seq, model)[0]
            p_h = torch.softmax(o_h, dim=-1)
            #
            top_probs, top_idcs = torch.topk(p_h, top_k)
            top_probs = top_probs.cpu().numpy()
            top_idcs = top_idcs.cpu().numpy()
            top_holds = [num_to_tok[idx] for idx in top_idcs]
            #
            for new_hold, new_prob in zip(top_holds, top_probs):
                seq_new = seq[::]
                seq_new[hold_idx] = new_hold
                #import pdb
                #pdb.set_trace()
                #
                seq_new_prob = seq_prob[::]
                seq_new_prob.append(new_prob)

                new_probs.append(seq_new_prob)
                new_seqs.append(seq_new)

        new_seqs_probs = [sequence_probabililty(seq) for seq in new_probs]
        new_idcs = np.argsort(new_seqs_probs)[-n_beams:]
        #
        all_probs = [new_probs[i] for i in new_idcs]
        all_seqs = [new_seqs[i] for i in new_idcs]
    return all_probs, all_seqs

In [None]:
seq_len = 6
grade = "7C"
top_k = 100
n_beams = 300

all_seqs = [["[HLD]"] * seq_len]
all_probs = [[1.]]
#
for hold_idx in range(seq_len):
    #
    new_probs = []
    new_seqs = []
    #
    for seq_idx in range(len(all_seqs)):
        #
        seq = all_seqs[seq_idx][::]
        seq_prob = all_probs[seq_idx][::]
        #
        #print(hold_idx, seq_idx, seq)
        #64
        o_h = get_coords(grade, seq, model)[0]
        p_h = torch.softmax(o_h, dim=-1)
        #
        top_probs, top_idcs = torch.topk(p_h, top_k)
        top_probs = top_probs.cpu().numpy()
        top_idcs = top_idcs.cpu().numpy()
        top_holds = [num_to_tok[idx] for idx in top_idcs]
        #
        for new_hold, new_prob in zip(top_holds, top_probs):
            seq_new = seq[::]
            seq_new[hold_idx] = new_hold
            #import pdb
            #pdb.set_trace()
            #
            seq_new_prob = seq_prob[::]
            seq_new_prob.append(new_prob)

            new_probs.append(seq_new_prob)
            new_seqs.append(seq_new)
    
    new_seqs_probs = [sequence_probabililty(seq) for seq in new_probs]
    new_idcs = np.argsort(new_seqs_probs)[-n_beams:]
    #
    all_probs = [new_probs[i] for i in new_idcs]
    all_seqs = [new_seqs[i] for i in new_idcs]

In [None]:
len(all_seqs)

# Generate
- 6a+ 6,7,8,
- 6b
- 6b+
- 6c
- 6c+
- 7a
- 7a+
- 7b
- 7b+
- 7c

In [None]:
GEN_BOULDERS = [
    ("6A+", 6, 2),
    ("6A+", 7, 2),
    ("6A+", 8, 2),
    ("6A+", 9, 2),
    #
    ("6B", 6, 2),
    ("6B", 7, 2),
    #
    ("6B+", 6, 2),
    ("6B+", 7, 2),
    #
    ("6C", 5, 2),
    ("6C", 6, 2),
    ("6C", 7, 2),
    #
    ("7A", 5, 2),
    ("7A", 6, 2),
     #
    ("7A+", 5, 2),
    ("7A+", 6, 2),
    #
    ("7B", 5, 2),
    ("7B", 6, 2),  
    #
    ("7B+", 5, 2),
    ("7B+", 6, 2), 
    #
    ("7C", 5, 2),
    ("7C", 6, 2), 
]

In [None]:
top_k = 100
n_beams = 500
GEN_ALL = []
for grade, seq_len, n_tot in GEN_BOULDERS:
    print(grade, seq_len)
    all_probs, all_seqs = all_seqs_beamsearch(seq_len, grade, top_k, n_beams)
    #
    all_seqs = set([tuple(sorted(s)) for s in all_seqs])
    all_seqs = [list(s) for s in all_seqs]
    #
    seqs = []
    for seq in all_seqs:
        grade_pred = grade_boulder(seq, model)
        if grade_pred == "UG":
            continue
        if grade_to_num[grade] == grade_to_num[grade_pred] or grade_to_num[grade] == grade_to_num[grade_pred] - 1:
            seqs.append(seq)
    #
    for idx in np.random.randint(0, len(seqs), size=min(n_tot, len(seqs))):
        seq = seqs[idx]
        GEN_ALL.append((grade, seq))

In [None]:
for grade, seq in GEN_ALL:
    prob_board = draw_coords(board, seq)
    prob_board = draw_coords(board, seq)
    plt.figure(figsize=(5,9))
    plt.imshow(np.array(prob_board))
    plt.title(grade)
    plt.show()   

# Gen One Boulder

In [None]:
n_tot = 5
seq_len = 7
grade = "7A"
top_k = 100
n_beams = 500
all_probs, all_seqs = all_seqs_beamsearch(seq_len, grade, top_k, n_beams)
#
all_seqs = set([tuple(sorted(s)) for s in all_seqs])
all_seqs = [list(s) for s in all_seqs]

In [None]:
#
seqs = []
for seq in all_seqs:
    grade_pred = grade_boulder(seq, model)
    if grade_to_num[grade] <= grade_to_num[grade_pred]:
        seqs.append(seq)
#
for idx in np.random.randint(0, len(seqs), size=min(n_tot, len(seqs))):
    seq = seqs[idx]
    prob_board = draw_coords(board, seq)
    prob_board = draw_coords(board, seq)
    plt.figure(figsize=(5,9))
    plt.imshow(np.array(prob_board))
    plt.title("")
    plt.show()