In [9]:
import datetime
import json
import requests
try:
    import _pickle as pickle
except:
    import pickle

from dateutil import relativedelta
from urllib import request

from typing import List, Text, Callable, Optional

In [10]:
class Game:
    def __init__(self, json) -> None:
        self.game_id = json['game']
        self.player_count = json['player_count']
        self.total_vp = json['events']['faction']['all']['vp']['round']['all']
        
    def averageVPPerPlayer(self) -> float:
        return self.total_vp / self.player_count
    
    def __str__(self) -> str:
        return """
        Game: %s
        Players: %s
        Total VP: %s
        """ % (self.game_id, self.player_count, self.total_vp)
    
    def __repr__(self) -> str:
        return self.__str__()

In [11]:
def fetchAllSummaryData(minDate: datetime.datetime, maxDate: datetime.datetime,
                       keepPredicate: Callable[[Game], bool], local: bool = False,
                       maxGames: Optional[int]=None) -> List[Game]:
    """Fetches the Games based on summary data.
    
    Args:
        minDate: The smallest date from which to fetch games (only the month matters)
        maxDate: The largest date from which to fetch games (only the month matters)
        keepPredicate: A callable. For memory efficiency, we can push down filtering on games
            using this function. If this function returns true, the Game is kept. Otherwise
            it is immediately discared.
        maxGames: The maximum number of games to return. Games are retrieved from latest to
            oldest, date wise.
    
    Returns:
        A list of Game objects fetched from Terra Snellman.
    """
    BASE_URL = "https://terra.snellman.net/data/events"
    results: List[Game] = []
    while minDate < maxDate and (not maxGames or len(results) < maxGames):
        filename = "snellman/summary-%s.pkl" % (maxDate.strftime("%Y-%m"))
        address = "{}/{}.json".format(BASE_URL, maxDate.strftime("%Y-%m"))
        data = None
        if not local:
            try:
                with request.urlopen(address) as site:
                    data = json.loads(site.read().decode())
            except:
                maxDate -= relativedelta.relativedelta(months=1)
                continue
            with open("snellman/summary-%s.pkl" % (maxDate.strftime("%Y-%m")) , 'wb') as f:
                pickle.dump(data, f)
        else:
            with open(filename, 'rb') as f:
                data = pickle.load(f)
        prevNGames = len(results) 
        for obj in data:
            game = Game(obj)
            if keepPredicate(game):
                results.append(game)
        del data
        maxDate -= relativedelta.relativedelta(months=1)
        print("Collected %s games from %s." % (len(results), address))
    return results

In [12]:
def keepHighScoringGames(game: Game) -> bool:
    return game.averageVPPerPlayer() > 120

In [13]:
def downloadLogForGameAsSentence(game: Game) -> Text:
    kBaseUrl = "https://terra.snellman.net/app/view-game/"
    res = requests.post(kBaseUrl, data={'game': game.game_id})
    data = json.loads(res.content)
    commands = [command['commands'].split(".")
                for command in data['ledger']
                if 'commands' in command]
    gameSentence = " ".join(item.strip()
                            for sublist in commands
                            for item in sublist)
    return gameSentence

In [14]:
def fetchAllGameSetences(local: bool = False, saveEvery:int=1000):
    """Downloads game data and dumps to disk.
    """
    OLDEST_DATE = datetime.datetime(year=2013, month=1,day=15)
    NEWEST_DATE = datetime.datetime.now()
    data = fetchAllSummaryData(OLDEST_DATE, NEWEST_DATE,
                               keepPredicate=keepHighScoringGames,
                               maxGames=20000, local=local)
    sentences = []
    if not local:
        for i, game in enumerate(data):
            sentence = downloadLogForGameAsSentence(game)
            sentences.append(sentence)
            if i + 1 % saveEvery == 0:
                with open('snellman/sentences-%s-of-%s.pkl' % (i, len(data)), 'wb') as f:
                    pickle.dump(sentences, f)
                del sentences
                senteces = []
    # Load it from disk.
    text = ""
    for i, game in range(0, len(data), saveEvery):
        with open('snellman/sentences-%s-of-%s.pkl' % (i, len(data)), 'rb') as f:
            text += "\n".join(pickle.load(f))
    return text
            

In [None]:
with open("snellman/games.input", "w") as f:
    f.write(fetchAllGameSetences(local=False))

Collected 965 games from https://terra.snellman.net/data/events/2019-01.json.
Collected 1768 games from https://terra.snellman.net/data/events/2018-12.json.
Collected 2620 games from https://terra.snellman.net/data/events/2018-11.json.
Collected 3524 games from https://terra.snellman.net/data/events/2018-10.json.
Collected 4444 games from https://terra.snellman.net/data/events/2018-09.json.
Collected 5369 games from https://terra.snellman.net/data/events/2018-08.json.
Collected 6333 games from https://terra.snellman.net/data/events/2018-07.json.
Collected 7286 games from https://terra.snellman.net/data/events/2018-06.json.
Collected 8337 games from https://terra.snellman.net/data/events/2018-05.json.
