In [None]:
!pip install kaggle-environments --upgrade

In [None]:
from pprint import pprint
import os
import sys
from time import time, sleep
import json
from datetime import datetime
import warnings
from operator import itemgetter
from itertools import groupby, count
from collections import defaultdict, Counter

import numpy as np
import numba
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from kaggle_environments import (
    evaluate, make, utils,
    get_episode_replay, list_episodes, list_episodes_for_submission  # list_episodes_for_team is no longer available
)

SANTA2020_COMPETITION_ID = 24539
pd.options.display.max_rows = 1000
warnings.filterwarnings("ignore")

In [None]:
!wget 'https://www.kaggle.com/c/santa-2020/leaderboard.json?includeBeforeUser=true&includeAfterUser=false' -O leaderboard.json

In [None]:
with open("leaderboard.json") as f:
    jsn = json.load(f)
leaderboard_data = jsn["beforeUser"] + jsn["afterUser"]

In [None]:
df_episode_agents = pd.read_csv("../input/meta-kaggle/EpisodeAgents.csv")

df_episodes = pd.read_csv("../input/meta-kaggle/Episodes.csv")
df_episodes["CreateTime"] = pd.to_datetime(df_episodes["CreateTime"], format="%m/%d/%Y %H:%M:%S")
df_episodes["EndTime"] = pd.to_datetime(df_episodes["EndTime"], format="%m/%d/%Y %H:%M:%S")

df_teams = pd.read_csv("../input/meta-kaggle/Teams.csv")
df_teams["ScoreFirstSubmittedDate"] = pd.to_datetime(df_teams["ScoreFirstSubmittedDate"], format="%m/%d/%Y")
df_teams["LastSubmissionDate"] = pd.to_datetime(df_teams["LastSubmissionDate"], format="%m/%d/%Y")
df_teams["MedalAwardDate"] = pd.to_datetime(df_teams["MedalAwardDate"], format="%m/%d/%Y")

In [None]:
l = df_episodes[df_episodes["CompetitionId"]==SANTA2020_COMPETITION_ID]["Id"].min()
r = df_episodes["Id"].max()
missing_ids = sorted(set(range(l, r+1)) - set(df_episodes["Id"].values))
assert len(missing_ids) <= 100000

In [None]:
%%time

episodes = []
episode_agents = []
dict_submissions = {}
dict_teams = {}

def get_new_data(ids):
    for _ in range(3):
        try:
            res = list_episodes(ids)
            sleep(10)
            break
        except:
            print("ERROR!")
            sleep(60)
    else:
        print("SKIPPED!")
        return False
    print(f'len(res["result"]["episodes"])={len(res["result"]["episodes"])}')
    if len(res["result"]["episodes"]) == 0:
        return False
    for episode in res["result"]["episodes"]:
        episodes.append(episode)
        for agent in episode["agents"]:
            agent["episodeId"] = episode["id"]
            episode_agents.append(agent)
    for submission in res["result"]["submissions"]:
        if submission["status"] != "error":
            dict_submissions[submission["id"]] = submission
    for team in res["result"]["teams"]:
        dict_teams[team["id"]] = team
    return True

for idx_missing_ids in range(0, len(missing_ids), 1000):
    ids = missing_ids[idx_missing_ids:idx_missing_ids+1000]
    print(f"{ids[0]}-{ids[-1]}")
    get_new_data(ids)

l = df_episodes["Id"].max()+1
for id_ in range(l, l+500000, 1000):
    ids = list(range(id_, id_+1000))
    print(f"{ids[0]}-{ids[-1]}")
    updated = get_new_data(ids)
    if ids[0] > 17759033 and not updated:
        print("finished!")
        break

In [None]:
len(episodes)

In [None]:
gold_score = min(float(x["score"]) for x in leaderboard_data if x["medal"]=="gold")
silver_score = min(float(x["score"]) for x in leaderboard_data if x["medal"]=="silver")
bronze_score = min(float(x["score"]) for x in leaderboard_data if x["medal"]=="bronze")
medal_thresholds = [gold_score, silver_score, bronze_score]
medal_colors = ["#B88121", "#838280", "#8E5B3D"]
medal_thresholds

In [None]:
def episode_type_str_to_int(t):
    if t == "public":
        return 1
    elif t == "validation":
        return 4
    else:
        # ?
        return -100

df_episodes_new = pd.DataFrame(episodes)
df_episodes_new.columns = [col[0].upper() + col[1:] for col in df_episodes_new.columns]
df_episodes_new["CreateTime"] = df_episodes_new["CreateTime"].map(lambda x: datetime.fromtimestamp(x["seconds"]))
df_episodes_new["EndTime"] = df_episodes_new["EndTime"].map(lambda x: datetime.fromtimestamp(x["seconds"]))
df_episodes_new["Type"] = df_episodes_new["Type"].map(episode_type_str_to_int)
df_episodes_new = df_episodes_new[df_episodes.columns]

df_episodes = pd.concat([df_episodes, df_episodes_new])
df_episodes.drop_duplicates("Id", keep="last", inplace=True)
df_episodes.sort_values("Id", inplace=True)
df_episodes.reset_index(drop=True, inplace=True)

In [None]:
df_episode_agents_new = pd.DataFrame(episode_agents)
df_episode_agents_new.columns = [col[0].upper() + col[1:] for col in df_episode_agents_new.columns]
df_episode_agents_new["State"] = -1
df_episode_agents_new = df_episode_agents_new[df_episode_agents.columns]

df_episode_agents = pd.concat([df_episode_agents, df_episode_agents_new])
df_episode_agents.drop_duplicates("Id", keep="last", inplace=True)
df_episode_agents.sort_values("Id", inplace=True)
df_episode_agents.reset_index(drop=True, inplace=True)

In [None]:
df_teams_new = pd.DataFrame(dict_teams.values())
df_teams_new.columns = [col[0].upper() + col[1:] for col in df_teams_new.columns]
df_teams_new["LastSubmissionDate"] = df_teams_new["LastSubmissionDate"].map(lambda x: datetime.fromtimestamp(x["seconds"]) if x is not None else np.nan)
df_teams_new = df_teams_new[list(set(df_teams_new.columns) & set(df_teams.columns))]

df_teams = pd.concat([df_teams, df_teams_new])
df_teams.drop_duplicates("Id", keep="last", inplace=True)
df_teams.sort_values("Id", inplace=True)
df_teams.reset_index(drop=True, inplace=True)

In [None]:
df_episode_agents = df_episode_agents[df_episode_agents["EpisodeId"].map(dict(df_episodes[["Id", "CompetitionId"]].values))==SANTA2020_COMPETITION_ID]
df_episode_agents.reset_index(inplace=True, drop=True)
df_episode_agents = df_episode_agents[df_episode_agents["EpisodeId"].map(df_episode_agents["EpisodeId"].value_counts())==2]
df_episode_agents.sort_values("EpisodeId", inplace=True)
df_episode_agents.reset_index(inplace=True, drop=True)
df_episode_agents["TeamId"] = df_episode_agents["SubmissionId"].map(lambda x: dict_submissions[x]["teamId"] if x in dict_submissions else -1)

df_submissions = df_episode_agents.drop_duplicates("SubmissionId", keep="last")[["SubmissionId", "TeamId", "UpdatedScore"]]
df_submissions.rename(columns={"UpdatedScore": "Rating"}, inplace=True)
df_submissions.reset_index(drop=True, inplace=True)
df_submissions = df_submissions[~df_submissions["Rating"].isna()]
df_submissions.sort_values("SubmissionId", inplace=True)
df_submissions.reset_index(drop=True, inplace=True)
df_submissions["SubmissionDate"] = df_submissions["SubmissionId"].map(lambda x: datetime.fromtimestamp(dict_submissions[x]["dateSubmitted"]["seconds"]) if x in dict_submissions else np.nan)

dict_team_id_to_team_rank = defaultdict(lambda: 99999)
dict_team_id_to_team_rank.update({team["teamId"]: team["rank"] for team in leaderboard_data})

# Boxplot (last 30 agents for each team)

In [None]:
last_n = 30
max_rank = 100
max_rating = (int(df_submissions["Rating"].max()) // 100 + 1) * 100
min_rating = max_rating - 1000

plt.figure(figsize=(20, 50))
plt.vlines(medal_thresholds, 0, max_rank+1, medal_colors, linewidth=1.5, linestyles=(0, (3, 10)))

dict_submission_id_to_rating = dict(df_submissions[["SubmissionId", "Rating"]].values)
last_submission_ids_each_team = defaultdict(list)
last_submission_ids_each_team.update(dict(df_submissions.groupby("TeamId")["SubmissionId"].apply(lambda x: sorted(x)[-last_n:])))
last_submission_ratings_each_team = defaultdict(list)
last_submission_ratings_each_team.update({team_id: [dict_submission_id_to_rating[sub_id] for sub_id in sub_ids] for team_id, sub_ids in last_submission_ids_each_team.items()})
top_teams_last_submission_ratings = []
for team in leaderboard_data[:max_rank]:
    rank, score, team_id, team_name, n_agents = team["rank"], team["score"], team["teamId"], team["teamName"], team["entries"]
    ratings = last_submission_ratings_each_team[team_id]
    top_teams_last_submission_ratings.append(ratings)
    
    plt.scatter(ratings, np.random.randn(len(ratings))*0.15+rank)
    plt.annotate(f"{rank:3d}  {team_name}  {score}  {n_agents}", xy=(min_rating, rank), fontsize=20)
plt.boxplot(top_teams_last_submission_ratings, vert=False)
plt.xlim(min_rating, max_rating)
plt.ylim(max_rank+1, 0)
plt.xticks(list(range(min_rating, max_rating+100, 100)))
plt.tick_params(labeltop=True)
plt.grid()
plt.show()

# Win rate (last 30 agents for each team)

## last 30 vs last 30

In [None]:
top_n = 25
last_n = 30

last_submission_ids_each_team = dict(df_submissions.groupby("TeamId")["SubmissionId"].apply(lambda x: sorted(x)[-last_n:]))

wins = np.zeros((top_n, top_n))
loses = np.zeros((top_n, top_n))
draws = np.zeros((top_n, top_n))
top_team_ids = set(team["teamId"] for team in leaderboard_data[:top_n])
target_submission_ids = {sub_id for team_id, sub_ids in last_submission_ids_each_team.items() for sub_id in sub_ids if team_id in top_team_ids}
it = df_episode_agents.itertuples()
for agent1, agent2 in zip(it, it):
    assert agent1.EpisodeId == agent2.EpisodeId
    if agent1.SubmissionId in target_submission_ids and agent2.SubmissionId in target_submission_ids:
        team1, team2 = dict_submissions[agent1.SubmissionId]["teamId"], dict_submissions[agent2.SubmissionId]["teamId"]
        idx1, idx2 = dict_team_id_to_team_rank[team1] - 1, dict_team_id_to_team_rank[team2] - 1
        if (np.isnan(agent1.Reward) and np.isnan(agent2.Reward)) or agent1.Reward == agent2.Reward:
            draws[idx1, idx2] += 1.0
            draws[idx2, idx1] += 1.0
        elif np.isnan(agent2.Reward) or agent1.Reward > agent2.Reward:
            wins[idx1, idx2] += 1.0
            loses[idx2, idx1] += 1.0
        else:
            loses[idx1, idx2] += 1.0
            wins[idx2, idx1] += 1.0

plt.figure(figsize=(24, 18))
plt.imshow((wins + draws*0.5) / (wins + loses + draws), cmap="RdBu_r", vmin=0, vmax=1)
for y in range(top_n):
    for x in range(top_n):
        plt.annotate(f"{int(wins[y,x])}/{int(loses[y,x])}/{int(draws[y,x])}", xy=(x, y), ha="center", va="center")
team_names = [team["teamName"] for team in leaderboard_data[:top_n]]
plt.xticks(np.arange(top_n), team_names, rotation=90)
plt.yticks(np.arange(top_n), team_names)
plt.xlabel("last 30 agents")
plt.ylabel("last 30 agents")
plt.tick_params(labeltop=True, labelright=True)
plt.colorbar()
plt.show()

## last 30 vs all

In [None]:
top_n = 25
last_n = 30

all_submission_ids_each_team = dict(df_submissions.groupby("TeamId")["SubmissionId"].apply(lambda x: sorted(x)))
last_submission_ids_each_team = {team_id: sub_ids[-last_n:] for team_id, sub_ids in all_submission_ids_each_team.items()}

wins = np.zeros((top_n, top_n))
loses = np.zeros((top_n, top_n))
draws = np.zeros((top_n, top_n))
top_team_ids = set(team["teamId"] for team in leaderboard_data[:top_n])
target_submission_ids_1 = {sub_id for team_id, sub_ids in last_submission_ids_each_team.items() for sub_id in sub_ids if team_id in top_team_ids}
target_submission_ids_2 = {sub_id for team_id, sub_ids in all_submission_ids_each_team.items() for sub_id in sub_ids if team_id in top_team_ids}
it = df_episode_agents.itertuples()
for agent1_, agent2_ in zip(it, it):
    assert agent1_.EpisodeId == agent2_.EpisodeId
    for agent1, agent2 in [[agent1_, agent2_], [agent2_, agent1_]]:
        if agent1.SubmissionId in target_submission_ids_1 and agent2.SubmissionId in target_submission_ids_2:
            team1, team2 = dict_submissions[agent1.SubmissionId]["teamId"], dict_submissions[agent2.SubmissionId]["teamId"]
            idx1, idx2 = dict_team_id_to_team_rank[team1] - 1, dict_team_id_to_team_rank[team2] - 1
            if (np.isnan(agent1.Reward) and np.isnan(agent2.Reward)) or agent1.Reward == agent2.Reward:
                draws[idx1, idx2] += 1.0
            elif np.isnan(agent2.Reward) or agent1.Reward > agent2.Reward:
                wins[idx1, idx2] += 1.0
            else:
                loses[idx1, idx2] += 1.0

plt.figure(figsize=(24, 18))
plt.imshow((wins + draws*0.5) / (wins + loses + draws), cmap="RdBu_r", vmin=0, vmax=1)
for y in range(top_n):
    for x in range(top_n):
        plt.annotate(f"{int(wins[y,x])}/{int(loses[y,x])}/{int(draws[y,x])}", xy=(x, y), ha="center", va="center")
team_names = [team["teamName"] for team in leaderboard_data[:top_n]]
plt.xticks(np.arange(top_n), team_names, rotation=90)
plt.yticks(np.arange(top_n), team_names)
plt.xlabel("all agents")
plt.ylabel("last 30 agents")
plt.tick_params(labeltop=True, labelright=True)
plt.colorbar()
plt.show()

## last 30 vs rating

In [None]:
top_n = 100
last_n = 30
n_classes = 25
bin_size = 25
max_rating = (int(df_submissions["Rating"].max()) // bin_size + 1) * bin_size

all_submission_ids_each_team = dict(df_submissions.groupby("TeamId")["SubmissionId"].apply(lambda x: sorted(x)))
last_submission_ids_each_team = {team_id: sub_ids[-last_n:] for team_id, sub_ids in all_submission_ids_each_team.items()}

def rating_to_class(rating):
    res = (max_rating - int(rating)) // bin_size
    if res >= n_classes:
        return n_classes - 1
    return res

wins = np.zeros((top_n, n_classes))
loses = np.zeros((top_n, n_classes))
draws = np.zeros((top_n, n_classes))
top_team_ids = set(team["teamId"] for team in leaderboard_data[:top_n])
target_submission_ids_1 = {sub_id for team_id, sub_ids in last_submission_ids_each_team.items() for sub_id in sub_ids if team_id in top_team_ids}
it = df_episode_agents.itertuples()
for agent1_, agent2_ in zip(it, it):
    assert agent1_.EpisodeId == agent2_.EpisodeId
    for agent1, agent2 in [[agent1_, agent2_], [agent2_, agent1_]]:
        if agent1.SubmissionId in target_submission_ids_1:
            team1 = dict_submissions[agent1.SubmissionId]["teamId"]
            idx1, idx2 = dict_team_id_to_team_rank[team1] - 1, rating_to_class(dict_submission_id_to_rating[agent2.SubmissionId])
            if (np.isnan(agent1.Reward) and np.isnan(agent2.Reward)) or agent1.Reward == agent2.Reward:
                draws[idx1, idx2] += 1.0
            elif np.isnan(agent2.Reward) or agent1.Reward > agent2.Reward:
                wins[idx1, idx2] += 1.0
            else:
                loses[idx1, idx2] += 1.0

plt.figure(figsize=(24, 80))
plt.imshow((wins + draws*0.5) / (wins + loses + draws), cmap="RdBu_r", vmin=0.0, vmax=1.0)
for y in range(top_n):
    for x in range(n_classes):
        text = f"{int(wins[y,x])}/{int(loses[y,x])}/{int(draws[y,x])}"
        if len(text) >= 9:
            text = f"{int(wins[y,x])}/{int(loses[y,x])}\n/{int(draws[y,x])}"
        plt.annotate(text, xy=(x, y), ha="center", va="center", ma="right")
team_names = [team["teamName"] for team in leaderboard_data[:top_n]]
plt.xticks(np.arange(-0.5, n_classes, 1.0), np.arange(max_rating, max_rating-bin_size*n_classes, -bin_size))
plt.yticks(np.arange(top_n), team_names)
plt.xlabel("current rating")
plt.ylabel("last 30 agents")
plt.tick_params(labeltop=True, labelright=True)
plt.colorbar(aspect=60)
plt.show()

# Rating vs Submission date

In [None]:
plt.figure(figsize=(20, 70))
cmap = plt.get_cmap("tab10")
dict_team_id_to_team_name = defaultdict(lambda: np.nan)
dict_team_id_to_team_name.update({team["teamId"]: team["teamName"] for team in leaderboard_data})

xlim = datetime(2020, 12, 9), datetime(2021, 2, 3)
max_rating = (int(df_submissions["Rating"].max()) // 100 + 1) * 100
min_rating = max_rating - 1000

for team_id, group in groupby(sorted(dict_submissions.values(), key=lambda x: dict_team_id_to_team_rank[x["teamId"]]), key=itemgetter("teamId")):
    rank = dict_team_id_to_team_rank[team_id]
    if not 1 <= rank <= 100:
        continue
    plt.subplot(20, 5, rank)
    plt.hlines(medal_thresholds, *xlim, medal_colors, linewidth=1.2, linestyles="solid")
    group = list(group)
    group = [sub for sub in group if sub["status"] != "error"]
    rates = [dict_submission_id_to_rating[sub["id"]] for sub in group]
    sub_datetimes = [datetime.fromtimestamp(sub["dateSubmitted"]["seconds"]) for sub in group]
    plt.scatter(sub_datetimes, rates, s=10, c=cmap((rank-1)%10))
    plt.xlim(*xlim)
    plt.ylim(min_rating, max_rating)
    team_name = dict_team_id_to_team_name[team_id]
    plt.title(f"{rank}  {team_name}")
    plt.gca().xaxis.set_major_locator(mdates.DayLocator(bymonthday=(1, 16)))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))
    plt.grid()
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.3)
plt.show()

# Top agents

In [None]:
df = df_submissions[df_submissions["Rating"] >= 1350]
df.sort_values("Rating", ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)
df["TeamName"] = df["TeamId"].map(dict_team_id_to_team_name)
df["Rank"] = np.arange(1, len(df)+1)

top_n = 100
last_n = 30
n_classes = 8
bin_size = 100
max_rating = (int(df_submissions["Rating"].max()) // bin_size + 1) * bin_size

all_submission_ids_each_team = dict(df_submissions.groupby("TeamId")["SubmissionId"].apply(lambda x: sorted(x)))
last_submission_ids_each_team = {team_id: sub_ids[-last_n:] for team_id, sub_ids in all_submission_ids_each_team.items()}

def rating_to_class(rating):
    res = (max_rating - int(rating)) // bin_size
    if res >= n_classes:
        return n_classes - 1
    return res

dict_submission_id_to_agent_rank = dict(df[["SubmissionId", "Rank"]].values)
n_targets = len(dict_submission_id_to_agent_rank)
wins = np.zeros((n_targets, n_classes), dtype=np.int64)
loses = np.zeros((n_targets, n_classes), dtype=np.int64)
draws = np.zeros((n_targets, n_classes), dtype=np.int64)
it = df_episode_agents.itertuples()
for agent1_, agent2_ in zip(it, it):
    assert agent1_.EpisodeId == agent2_.EpisodeId
    for agent1, agent2 in [[agent1_, agent2_], [agent2_, agent1_]]:
        if agent1.SubmissionId in dict_submission_id_to_agent_rank:
            idx1, idx2 = dict_submission_id_to_agent_rank[agent1.SubmissionId] - 1, rating_to_class(dict_submission_id_to_rating[agent2.SubmissionId])
            if (np.isnan(agent1.Reward) and np.isnan(agent2.Reward)) or agent1.Reward == agent2.Reward:
                draws[idx1, idx2] += 1.0
            elif np.isnan(agent2.Reward) or agent1.Reward > agent2.Reward:
                wins[idx1, idx2] += 1.0
            else:
                loses[idx1, idx2] += 1.0

win_lose_draw_columns = []
for idx_classes in range(n_classes):
    l = max_rating - idx_classes * bin_size
    r = l - bin_size if idx_classes != n_classes - 1 else ""
    df[f"vs_{l}-{r}_win"] = wins[:, idx_classes]
    df[f"vs_{l}-{r}_lose"] = loses[:, idx_classes]
    df[f"vs_{l}-{r}_draw"] = draws[:, idx_classes]
    col = f"vs_{l}-{r}"
    win_lose_draw_columns.append(col)
    df[col] = df[f"vs_{l}-{r}_win"].astype(str) + "/" + df[f"vs_{l}-{r}_lose"].astype(str) + "/" + df[f"vs_{l}-{r}_draw"].astype(str)

def win_rate_gradient(s, cmap="PuBu", low=0, high=0, text_color_threshold=0.408):
    # https://github.com/pandas-dev/pandas/blob/v1.2.1/pandas/io/formats/style.py
    
    if (
        not isinstance(text_color_threshold, (float, int))
        or not 0 <= text_color_threshold <= 1
    ):
        msg = "`text_color_threshold` must be a value from 0 to 1."
        raise ValueError(msg)

    with pd.io.formats.style._mpl(pd.io.formats.style.Styler.background_gradient) as (plt, colors):
        smin = 0.0
        smax = 1.0
        rng = smax - smin
        norm = colors.Normalize(smin - (rng * low), smax + (rng * high))
        def to_rate(win_lose_draw):
            w, l, d = map(np.float64, win_lose_draw.split("/"))
            rate = (w + d * 0.5 + 1e-100) / (w + l + d + 2e-100)
            return rate
        rgbas = plt.cm.get_cmap(cmap)(norm(np.vectorize(to_rate)(s)))

        def relative_luminance(rgba):
            r, g, b = (
                x / 12.92 if x <= 0.03928 else ((x + 0.055) / 1.055 ** 2.4)
                for x in rgba[:3]
            )
            return 0.2126 * r + 0.7152 * g + 0.0722 * b

        def css(rgba):
            dark = relative_luminance(rgba) < text_color_threshold
            text_color = "#f1f1f1" if dark else "#000000"
            return f"background-color: {colors.rgb2hex(rgba)};color: {text_color};"

        if s.ndim == 1:
            return [css(rgba) for rgba in rgbas]
        else:
            return pd.DataFrame(
                [[css(rgba) for rgba in row] for row in rgbas],
                index=s.index,
                columns=s.columns,
            )

pd.options.display.max_colwidth = 50
pd.options.display.expand_frame_repr = False
df.set_index("Rank").reindex(columns=["Rating", "TeamName", "SubmissionDate", "SubmissionId"] + win_lose_draw_columns) \
.style.format({"Rating": lambda x: f"{x:7.2f}",
               "SubmissionId": lambda x: f'<a href="https://www.kaggle.com/c/santa-2020/leaderboard?dialog=episodes-submission-{x}">{x}</a>',
               "SubmissionDate": lambda x: x.strftime("%Y-%m-%d")}) \
.apply(win_rate_gradient, subset=win_lose_draw_columns, cmap="RdBu_r")
#.set_properties(subset=win_lose_draw_columns, width="50px")

In [None]:
df = df_submissions.copy()
step = 50
max_rating = (int(df_submissions["Rating"].max()) // step + 1) * step
min_rating = 1000
bin_columns = []
for r in range(max_rating, min_rating, -step):
    col = f"{r}-{r-step}"
    df[col] = (r-step <= df["Rating"]) & (df["Rating"] < r)
    bin_columns.append(col)
df = df.groupby("TeamId")[bin_columns].sum()
df.reset_index(inplace=True)
df["TeamRank"] = df["TeamId"].map(dict_team_id_to_team_rank)
df["TeamName"] = df["TeamId"].map(dict_team_id_to_team_name)
df.sort_values("TeamRank", inplace=True)
df.reset_index(drop=True, inplace=True)
df = df.reindex(columns=["TeamRank", "TeamId", "TeamName", *bin_columns])
df = df[df[bin_columns].sum(1) >= 1]
df.set_index("TeamRank", inplace=True)
df.style.background_gradient(cmap="OrRd", axis=None, subset=bin_columns)

# Wooden spoon

In [None]:
df_submissions["TeamName"] = df_submissions["TeamId"].map(dict_team_id_to_team_name)
df_submissions.sort_values("Rating", ascending=False).tail(10).reset_index(drop=True).reindex(columns=["Rating", "TeamName", "SubmissionId"]).style.format({"Rating": lambda x: f"{x:7.2f}", "SubmissionId": lambda x: f'<a href="https://www.kaggle.com/c/santa-2020/leaderboard?dialog=episodes-submission-{x}">{x}</a>'})

# LB progress

In [None]:
df = df_episodes[df_episodes["CompetitionId"]==SANTA2020_COMPETITION_ID]
df.sort_values("EndTime", inplace=True)
df.reset_index(drop=True, inplace=True)
for agent_index in range(2):
    df_agents = df_episode_agents[df_episode_agents["Index"]==agent_index]
    df_agents.set_index("EpisodeId", inplace=True)
    df[f"Agent{agent_index}SubmissionId"] = df["Id"].map(df_agents["SubmissionId"])
    df[f"Agent{agent_index}UpdatedScore"] = df["Id"].map(df_agents["UpdatedScore"])
df

In [None]:
from tqdm.notebook import tqdm
from heapq import heappush, heappop

team_data = {
    team_id: {
        "dict_submission_id_to_rating": {},
        "heap_ratings": [],
        "counter_to_remove_from_heap": Counter(),
        "max_rating_trajectory": [],
    } for team_id in dict_teams
}
for end_time, a0_sub_id, a0_score, a1_sub_id, a1_score in tqdm(zip(df["EndTime"].dt.to_pydatetime(), df["Agent0SubmissionId"].values, df["Agent0UpdatedScore"].values, df["Agent1SubmissionId"].values, df["Agent1UpdatedScore"].values), total=len(df)):
    for sub_id, score in [[a0_sub_id, a0_score], [a1_sub_id, a1_score]]:
        if sub_id not in dict_submissions:
            continue
        team_id = dict_submissions[sub_id]["teamId"]
        if team_id not in team_data:
            continue
        team_dat = team_data[team_id]
        heap_ratings = team_dat["heap_ratings"]
        counter_to_remove_from_heap = team_dat["counter_to_remove_from_heap"]
        dict_submission_id_to_rating = team_dat["dict_submission_id_to_rating"]
        max_rating_trajectory = team_dat["max_rating_trajectory"]
        if sub_id not in dict_submission_id_to_rating:
            dict_submission_id_to_rating[sub_id] = score
            heappush(heap_ratings, -score)
        else:
            old_score = dict_submission_id_to_rating[sub_id]
            counter_to_remove_from_heap[old_score] += 1
            while len(heap_ratings) > 0 and -heap_ratings[0] in counter_to_remove_from_heap:
                max_score = -heap_ratings[0]
                if counter_to_remove_from_heap[max_score] == 1:
                    del counter_to_remove_from_heap[max_score]
                else:
                    counter_to_remove_from_heap[max_score] -= 1
                heappop(heap_ratings)
            
            dict_submission_id_to_rating[sub_id] = score
            heappush(heap_ratings, -score)
        max_score = -heap_ratings[0]
        if len(max_rating_trajectory) == 0 or max_score != max_rating_trajectory[-1][0]:
            max_rating_trajectory.append((max_score, end_time))


In [None]:
# from bisect import insort
# all_trajectories = []
# for team_id, team_dat in team_data.items():
#     for score, end_time in team_dat["max_rating_trajectory"]:
#         all_trajectories.append((team_id, score, end_time))
# all_trajectories.sort(key=itemgetter(end_time))
# top_teams = []
# for team_id, score, end_time in all_trajectories:
#     if top_teams.append(())

In [None]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objects as go
fig = go.Figure()
fig.update_layout(autosize=False, width=1500, height=800)
for team_id, team_dat in team_data.items():
    max_rating_trajectory = team_dat["max_rating_trajectory"]
    if len(max_rating_trajectory) == 0:
        continue
    xs = []
    ys = []
    for y, x in max_rating_trajectory:
        ys.extend([y, y])
        xs.extend([x, x])
    del xs[0]
    xs.append(datetime.now())
    if max(ys) < 1350:
        continue
    team_name = dict_team_id_to_team_name[team_id]
    fig.add_trace(go.Scatter(x=xs, y=ys, name=team_name))
iplot(fig)