In [None]:
import preprocessing

In [None]:
# import importlib
# importlib.reload(preprocessing)

In [None]:
categories = preprocessing.categories

In [None]:
from game import *

constraints = [
    # We always want a continent
    Constraint.category_at_least("continent", 1),
    
    # Some categories are pretty boring to appear multiple times
    Constraint.category_at_most("capital_ending_letter", 1),
    Constraint.category_at_most("capital_starting_letter", 1),
    Constraint.category_at_most("ending_letter", 1)
]

games = [preprocessing.create_game(constraints, shuffle=True) for _ in range(1000)]

In [None]:
def get_difficulty(game):
    game_cats = [cat for cat, val in game.rows + game.cols]
    cat_sizes = [len(cat.sets) for cat in game_cats]
    
    cell_sizes = sum([[len(cell) for cell in row] for row in game.solutions], [])
    num_unique = len([i for i, size in enumerate(cell_sizes) if size == 1])
    cat_difficulty = sum(cat.difficulty for cat in game_cats)
    
    return {"cell_sizes": cell_sizes,
            "cell_sizes_median": pd.Series(cell_sizes).median(),
            "num_unique": num_unique,
            "cat_sizes": cat_sizes,
            "cat_sizes_median": pd.Series(cell_sizes).median(),
            "cat_difficulty": cat_difficulty}

info = pd.DataFrame([{"game": game, **get_difficulty(game)} for game in games])
info["score"] = -info["num_unique"] + info["cat_difficulty"] + info["cell_sizes_median"] + info["cat_sizes_median"]

display(info.head(30))

# idea: countries with small population / size are harder to guess
# log(sum(populations of all solution)) median of all cells

In [None]:
import matplotlib.pyplot as plt

plt.hist(info["cat_difficulty"], rwidth=.9)
plt.title("Category difficulty sum")
plt.show()

In [None]:
SAVE = False

if SAVE:
    json.dump([game.to_json() for game in games], open("data/games.json", mode="w", encoding="utf-8"))