In [1]:
import os
import pickle
import numpy as np
import tensorflow as tf
import math 
import re
import operator
import time
import json
import utils

from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt

from keras.models import load_model
from keras_preprocessing.sequence import pad_sequences

In [2]:
# load the trained model
MODEL_PATH = 'moonboard_names_model_128'
NAME_MODEL = models.load_model(MODEL_PATH+'/model.h5')

In [3]:

params = json.load(open(MODEL_PATH + '/params.json'))
SEQUENCE_LENGTH = 128
char_to_int = params['char_to_int']
int_to_char = params['int_to_char']

In [4]:
END_TOKEN = char_to_int['|']

In [5]:
# let's pick some random problems to predict
cwd = os.getcwd()
print(cwd)
parent_wd = cwd.replace('/names', '')
raw_data_path = os.path.join(parent_wd, 'raw_data/moonGen_scrape_2016')

/Users/markliu/moonboard/names


In [6]:
with open(raw_data_path, 'rb') as f:
    raw_data = pickle.load(f)

In [7]:
print(list(raw_data.items())[3][1])

{'problem_name': 'GULAG', 'info': ['micklevin', '1 climber has repeated this problem', '7A', 'Feet follow hands', ''], 'url': 'https://moonboard.com/Problems/View/367885/gulag', 'num_empty': 1, 'num_stars': 2, 'moves': [{'Id': 2119166, 'Description': 'A5', 'IsStart': True, 'IsEnd': False}, {'Id': 2119167, 'Description': 'B8', 'IsStart': False, 'IsEnd': False}, {'Id': 2119168, 'Description': 'D12', 'IsStart': False, 'IsEnd': False}, {'Id': 2119169, 'Description': 'F14', 'IsStart': False, 'IsEnd': False}, {'Id': 2119170, 'Description': 'I18', 'IsStart': False, 'IsEnd': True}], 'grade': '7A', 'UserGrade': None, 'isBenchmark': False, 'repeats': 1, 'ProblemType': None, 'IsMaster': False, 'setter': {'Id': '05f91964-f58c-4a49-af50-abec02f71300', 'Nickname': 'micklevin', 'Firstname': 'Mick', 'Lastname': 'Levin', 'City': 'Halifax', 'Country': 'Canada', 'ProfileImageUrl': '/Content/Account/Users/Profile/05f91964-f58c-4a49-af50-abec02f71300.jpg?637231996827206508', 'CanShareData': True}}


In [15]:
def convert_problem_to_input(holds):
    # encode each problem as csv list of holds, followed by space, then name of the problem
    inp = []
    hold_string = ','.join([hold['Description'] for hold in holds])
    hold_string += ' '
    return hold_string

In [16]:
problem = list(raw_data.items())[3][1]

convert_problem_to_input(problem['moves'])

'A5,B8,D12,F14,I18 '

In [18]:
# grab 5 names or so

texts = [convert_problem_to_input(x[1]['moves']) for x in list(raw_data.items())[0:5]]
print(len(texts))
print(texts)

5
['F5,G2,H10,B11,E15,D18,E8 ', 'A5,D6,E9,A9,C13,D15,A18 ', 'G4,H8,G10,K12,G15,K18 ', 'A5,B8,D12,F14,I18 ', 'F5,H8,H10,H13,E15,G18,I4 ']


In [11]:
def sample_from_prob_vec(pr):
    prob_thresh = np.max(pr,1) * 0.1
    pr[pr < prob_thresh]=0
    pr = (pr/pr.sum(axis=1,keepdims=1))[0]
    idxs = np.arange(len(pr))
    return np.random.choice(idxs, 1, p=pr)[0]



In [24]:
def name_text(text):
    texts = [text]
    #single 
    MAX_LENGTH = 128
    # for each text, continue predicting until we reach max length or end token
    final_texts = texts
    while True:
    
        sequences = utils.texts_to_sequences(final_texts, char_to_int)
        data = pad_sequences(sequences, maxlen=SEQUENCE_LENGTH)
        predictions_list = NAME_MODEL.predict(data)
        # print(predictions_list)
        # sample
        
        
        
        ch = sample_from_prob_vec(predictions_list)
        if len(sequences[0]) > MAX_LENGTH or ch == END_TOKEN:
            break
        #print(int_to_char[ch])
        final_texts[0]+=int_to_char[ch]
        #print(final_texts)
    return final_texts[0][len(text):]

In [25]:
final_texts = [name_text(text) for text in texts]
print(final_texts)

['GOSTER PLASTANT', 'STRATO 1A', 'FAR PISS', 'FLOW', 'JUST DO WAT']
