Australia/island: 'True' is already set as main value - swapping with 'False'
CI/name: 'Ivory Coast' is already set as main value - skipping
MK/name: 'North Macedonia' is already set as main value - skipping
PS/name: 'Palestinian Territory' is already set as main value - swapping with 'Palestine'
TR/name: 'Turkey' is already set as main value - swapping with 'Türkiye'
VA/name: 'Vatican' is already set as main value - skipping
US/name: 'United States' is already set as main value - skipping
CZ/name: 'Czechia' is already set as main value - swapping with 'Czech Republic'
Armenia/continent: 'AS' is already set as main value - skipping
Georgia/continent: 'AS' is already set as main value - skipping
Azerbaijan/continent: 'AS' is already set as main value - skipping
Trinidad and Tobago/continent: 'NA' is already set as main value - skipping
Panama/continent: 'NA' is already set as main value - skipping
Egypt/continent: 'AF' is already set as main value - skipping
Russia/continent: 'EU' is al

In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import preprocessing
from difficulty import compute_game_difficulties, DifficultyLevel
from generator import Constraint, GameGenerator

df = preprocessing.df
categories = preprocessing.categories

In [None]:
# from importlib import reload
# reload(preprocessing)

### Show all alternative values

In [None]:
altcols = [col for col in df.columns if col.endswith("_alt")]
print("\nAll countries with alternative values:")
display(df[df[altcols].applymap(len).sum(axis=1) > 0])

In [None]:
constraints = [
    # Some categories are pretty boring to appear multiple times
    Constraint.category_at_most("capital_ending_letter", 1),
    Constraint.category_at_most("capital_starting_letter", 1),
    Constraint.category_at_most("ending_letter", 1),
    # Limit the number of cells a country can appear in
    *Constraint.solutions_at_most(df.iso.tolist(), 3)
]
category_probs = {
    'continent': 4,
    'starting_letter': 3,
    'ending_letter': 1.5,
    'capital_starting_letter': 2,
    'capital_ending_letter': .5,
    'flag_colors': 3,
    'landlocked': 4,
    'island': 4
}

generator = preprocessing.get_generator(constraints, category_probs,
                                        seed=None, selection_mode="shuffle_setkeys", uniform=False, shuffle=True)
games = list(generator.sample_games(n=5000, progress_bar=True))
game_info = compute_game_difficulties(games)

In [None]:
preprocessing.save_games(games, "occurence-limit")

In [None]:
plt.hist([game.sample_tries for game in games], bins=50, rwidth=.9)
plt.title("Number of tries for game generation")
plt.show()

In [None]:
sample_tries = pd.Series([game.sample_tries for game in games]).value_counts()
sample_tries = np.array([sample_tries.get(i, 0) for i in range(100)])
p = (1 - sample_tries / np.roll(sample_tries, 1))[1:]
plt.scatter(x=p, y=np.zeros_like(p))

In [None]:
game_info["max_occurences"] = game_info["game"].apply(lambda game: pd.Series(sum(sum(game.solutions, []), [])).value_counts().max())

In [None]:
game_info["max_occurences"].describe()

In [None]:
plt.hist(game_info["max_occurences"], bins=range(10), rwidth=.9)
plt.show()

In [None]:
for game in game_info.nlargest(20, "max_cell_difficulty")["game"]:
    print(game.data)
    display(game.to_dataframe(solution=True))

In [None]:
plt.hist(game_info["max_cell_difficulty"], bins=20, rwidth=.9)
plt.title("Max cell difficulty")
plt.show()

In [None]:
scatter = plt.scatter(x=game_info["avg_cell_difficulty"], y=game_info["max_cell_difficulty"], c=game_info["level"])
plt.xlabel("Average cell difficulty")
plt.ylabel("Maximum cell difficulty")
plt.title("Distribution of game difficulty")
plt.xlim([0,10])
plt.ylim([0,10])
ax = plt.gca()
legend = ax.legend(scatter.legend_elements()[0], ["Easy", "Medium", "Hard"], loc="lower right", title="Difficulty Level")
ax.add_artist(legend)
plt.show()

In [None]:
game_info["level"].value_counts()

In [None]:
# plt.hist(cell_info["content_difficulty"], bins=20, rwidth=.9)
# plt.title("Cell content difficulty")
# plt.show()

In [None]:
# plt.scatter(x=cell_info["row_col_difficulty"], y=cell_info["content_difficulty"])
plt.scatter(x=cell_info["row_difficulty"], y=cell_info["col_difficulty"], c=cell_info["content_difficulty"])
# plt.scatter(x=cell_info["row_col_difficulty"], y=cell_info["row_col_difficulty_harmonic"])
plt.show()

In [None]:
import matplotlib.pyplot as plt

# plt.scatter(x=np.log(df["gdp"]) - np.log(df["population"]), y=df["gdp_per_capita"])
# plt.scatter(x=np.log(df["population"]), y=np.log(df["gdp"]), c=df["difficulty"])
# plt.scatter(x=np.log(df["population"]), y=np.log(df["gdp"]), c=df["difficulty"])
plt.show()