```
seq 10000 300000 | xargs -P 32 -I game_id wget https://rbc.jhuapl.edu/api/games/game_id/game_history --user SomeRegret --password $RC_PASSWORD_SR -nc -O game_id.json -q

find . -empty -type f -delete

ls . | wc -l
```

In [None]:
from glob import glob
import json
from pathlib import Path

import chess
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [None]:
import dask
import dask.bag as db
from dask.distributed import Client, progress

In [None]:
client = Client(n_workers=6, threads_per_worker=1)
client

In [None]:
def parse(path_and_text):
    text, path = path_and_text
    game = json.loads(text)["game_history"]
    game["id"] = int(path.split("/")[-1][:-5])
    return game

In [None]:
def get_actions(history):
    actions = []
    white_senses = iter(history["senses"]["true"])
    black_senses = iter(history["senses"]["false"])
    white_moves = iter(history["requested_moves"]["true"])
    black_moves = iter(history["requested_moves"]["false"])
    try:
        while True:
            square = next(white_senses)
            square = "00" if square is None else chess.SQUARE_NAMES[square]
            actions.append(square)
            move = next(white_moves)
            move = "0000" if move is None else move["value"]
            actions.append(move)
            square = next(black_senses)
            square = "00" if square is None else chess.SQUARE_NAMES[square]
            actions.append(square)
            move = next(black_moves)
            move = "0000" if move is None else move["value"]
            actions.append(move)
    except StopIteration:
        pass
    return " ".join(actions)

In [None]:
def flatten(game):
    return {
        "id": game["id"],
        "white": game["white_name"],
        "black": game["black_name"],
        "winner": game["winner_color"],
        "timeout": game["win_reason"]["value"] == "TIMEOUT",
        "actions": get_actions(game),
    }

In [None]:
b = db.read_text('../wgetGames/*.json', include_path=True)
b

In [None]:
df = b.map(parse).map(flatten).to_dataframe().set_index("id")

In [None]:
df.head(5)

In [None]:
df = df.compute()

In [None]:
df.to_pickle("dask_histories.pkl")
df.to_csv("dask_histories.csv")