In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.tight_layout()
dataset = pd.read_parquet("../data/dataset_depth8_Sam_Raph_Sothatsit6_Kaggle.parquet")
len(dataset.index)

In [None]:
dataset.astype({'light_turn': 'int32'}).hist(figsize=(20, 15))

In [None]:
pd.set_option('display.max_columns', None)
dataset.astype({'light_turn': 'int32'}).describe()

In [None]:
pd.set_option('display.max_columns', None)
dataset.sample(n=10)

In [None]:
dataset.columns

In [None]:
dataset.dtypes

In [None]:
# use a fixed scale (1 to 2.5 let's say)
#  DONE
# Flip the scale for ranking (lower is better)
#  DONE
# Show the data WITH the colors
#  DONE
import numpy as np

def show_heatmap(df_heatmap, prefix, suffix, cmap, vrange=None, threshold = 0, invert_colorbar = False):
    if vrange is None:
        vrange = [None, None]
    fig, ax = plt.subplots()
    if len(df_heatmap.index) < 8:
        # Create new dataframe with NaN values
        new_index = pd.Index([1, 2, 3, 4, 5, 6, 7, 8], name=df_heatmap.index.name)
        new_df = pd.DataFrame(index=new_index, columns=df_heatmap.columns, dtype=float)
        new_df[:] = np.nan

        # Copy values from original dataframe
        new_df.iloc[:len(df_heatmap.index), :] = df_heatmap.values

        # Replace original dataframe with new dataframe
        df_heatmap = new_df
    im = ax.imshow(df_heatmap, cmap=cmap, interpolation='nearest', vmin=vrange[0], vmax=vrange[1])

    # Add colorbar
    cbar = ax.figure.colorbar(im, ax=ax)
    if invert_colorbar:
        cbar.ax.invert_yaxis()

    # Set axis labels
    ax.set_xticks(np.arange(3))
    ax.set_yticks(np.arange(8))
    ax.set_xticklabels(["A", "B", "C"])
    ax.set_yticklabels(list(np.arange(8) + 1))

    # Rotate x-axis labels
    plt.setp(ax.get_xticklabels(), rotation=0, ha="center", rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(df_heatmap.index)):
        for j in range(len(df_heatmap.columns)):
            color = 'white' if df_heatmap.iloc[i, j] < threshold else 'black'
            ax.text(j, i, round(df_heatmap.iloc[i, j], 2), ha="center", va="center", color=color)

    # Add title
    ax.set_title(f"{prefix} of pawn of Expectimax depth 8 {suffix}")

    plt.show()

def gen_df(dataset, df_type):
    return {
        'all': dataset.pivot_table(values=df_type, index='y', columns='x'),
        '1': dataset.query("roll == 1").pivot_table(values=df_type, index='y', columns='x'),
        '2': dataset.query("roll == 2").pivot_table(values=df_type, index='y', columns='x'),
        '3': dataset.query("roll == 3").pivot_table(values=df_type, index='y', columns='x'),
        '4': dataset.query("roll == 4").pivot_table(values=df_type, index='y', columns='x'),
    }

def show_all_df(dfs, prefix, all_rolls_string, specific_roll_string, cmap, ranges, **kwargs):
    for v in dfs:
        show_heatmap(dfs[v], prefix, all_rolls_string if v == 'all' else f"{specific_roll_string} {v}", cmap, ranges, **kwargs)

In [None]:
df_utility = gen_df(dataset, 'utility')

In [None]:
df_utility['all']

In [None]:
from matplotlib.colors import LinearSegmentedColormap
colors = [(1, 0, 0), (1, 1, 0), (0, 1, 0)] # red, yellow, green
cmap_name = 'my_colormap'
cm = LinearSegmentedColormap.from_list(cmap_name, colors, N=256)
colors.reverse()
icm = LinearSegmentedColormap.from_list(cmap_name, colors, N=256)

In [None]:
show_all_df(df_utility, "Utility", "for all rolls", "for rolls of", cm, (-1.5, 3))

In [None]:
df_rank = gen_df(dataset, 'rank')

In [None]:
show_all_df(df_rank, "Rank", "for all rolls", "for rolls of", icm, (1, 2.5), invert_colorbar=True)

In [None]:
"""
Ways to define the early game:

- First player to get to the rosette
- Both players have 6 or more pawns left to play

Ways to define the end game:

- Score of one of the players is at least 6

"""

df_early_game = dataset[(dataset["dark_left"] > 4) & (dataset["light_left"] > 4)]
df_early_game_rolls_rank = gen_df(df_early_game, 'rank')

df_end_game = dataset[(dataset["dark_score"] > 5) | (dataset["light_score"] > 5)]
df_end_game_rolls_rank = gen_df(df_end_game, 'rank')

In [None]:
show_all_df(df_early_game_rolls_rank, "Rank", "in early game for all rolls", "in early game for rolls of", icm, (1, 2.5), invert_colorbar=True)

In [None]:
# red = end game is better than early game
# green = early game is better than end game

def diff_all_df(dict1, dict2):
    return {
        key: dict1[key] - dict2[key] for key in dict1.keys()
    }

def add_all_df(dict1, scalar):
    return {
        key: dict1[key] + scalar for key in dict1.keys()
    }

show_all_df(
    # add 0.39, which is the difference in averages of the early and end game. 
    # This makes it so that the values are more aligned around 0
    add_all_df(diff_all_df(df_early_game_rolls_rank, df_end_game_rolls_rank), 0.39),
    "Rank difference",
    "in early game VS. end game for all rolls",
    "in early game VS. end game for rolls of",
    icm,
    (-0.8, 0.8),
    invert_colorbar=True,
    threshold=-2
)

In [None]:
df_early_game[df_early_game["roll"] == 2]["rank"].mean()

In [None]:
df_end_game[df_end_game["roll"] == 2]["rank"].mean()

In [None]:
2.174 - 1.785