In [None]:
import numpy as np
import pandas as pd
import json
from glob import glob
from scipy.stats import rankdata
from tensorflow.keras.utils import to_categorical

In [None]:
def make_input(obses):
    b = np.zeros((17, 7 * 11), dtype=np.int8)
    obs = obses[-1]

    for p, pos_list in enumerate(obs['geese']):
        # head position
        for pos in pos_list[:1]:
            b[0 + (p - obs['index']) % 4, pos] = 1
        # tip position
        for pos in pos_list[-1:]:
            b[4 + (p - obs['index']) % 4, pos] = 1
        # whole position
        for pos in pos_list:
            b[8 + (p - obs['index']) % 4, pos] = 1
            
    # previous head position
    if len(obses) > 1:
        obs_prev = obses[-2]
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1

    return b.reshape(-1, 7, 11)

In [None]:
def create_dataset_from_json(filepath):
    json_open = open(filepath, 'r')
    json_load = json.load(json_open)
    
    try:
        X = []
        y = []
        z = []
        actions = {'NORTH':0, 'SOUTH':1, 'WEST':2, 'EAST':3}
        rewards = [-1, -0.333, 0.333, 1]
        epid = int(filepath.split('/')[-1][:-5])
        df = epagents_df[epagents_df['EpisodeId'] == epid]
        
        for index, sub in zip(df['Index'].values, df['SubmissionId'].values):
            if sub in sub_to_score_top:
                rank = rankdata(np.array(json_load['rewards']), 'dense')[index] - 1
                obses = []
                for i in range(len(json_load['steps'])-1):
                    if json_load['steps'][i][index]['status'] == 'ACTIVE':
                        a = json_load['steps'][i+1][index]['action']
                        if a is not None:
                            step = json_load['steps'][i]
                            step[index]['observation']['geese'] = step[0]['observation']['geese']
                            step[index]['observation']['food'] = step[0]['observation']['food']
                            obses.append(step[index]['observation'])
                            y.append(actions[a])
                            z.append(rewards[rank])

                for j in range(len(obses)):
                    X_ = make_input(obses[:j+1])
                    X.append(X_)

        X = np.array(X, dtype=np.int8)
        y = np.array(y, dtype=np.int8)
        z = np.array(z, dtype=np.float16)

        return X, y, z
    except:
        return None, None, None

In [None]:
LOWEST_SCORE_THRESH = 1220
epagents_df = pd.read_csv('../input/meta-kaggle/EpisodeAgents.csv')
max_df = (epagents_df.sort_values(by=['EpisodeId'], ascending=False).groupby('SubmissionId').head(1).drop_duplicates().reset_index(drop=True))
max_df = max_df[max_df['UpdatedScore']>=LOWEST_SCORE_THRESH]
sub_to_score_top = max_df['SubmissionId'].values

paths = [path for path in glob('../input/fork-of-simulations-episode-scraper-match-download/*.json') if 'output' not in path]
paths.sort(reverse=True)
# size = 8500000
size = 5315649 # 1220
X_train = np.empty((size, 17, 7, 11), dtype=np.int8)
y_train = np.empty(size, dtype=np.int8)
z_train = np.empty(size, dtype=np.float16)

n = 0
for path in paths:
    X, y, z = create_dataset_from_json(path)
    if X is not None:
        for X_, y_, z_ in zip(X, y, z):
            X_train[n] = X_
            y_train[n] = y_
            z_train[n] = z_
            n += 1
            if n >= size:
                break
    print(f"\r[{'#' * int(n/size*50) + ' ' * (50-int(n/size*50))}] {n}/{size}", end="")
    if n >= size:
        break

In [None]:
np.save('states.npy', X_train)
np.save('actions.npy', to_categorical(y_train, dtype='int8'))
np.save('rewards.npy', z_train)

In [None]:
for value, count in zip(*np.unique(z_train, return_counts=True)):
    print(f'{value: .3f} : {count: >11}')