> **<h1><center> Indoor Location & Navigation (leaderboard analysis) </center></h1>**

Updated on 18 April 2021

<h2><center> <img src="https://spreo.co/wp-content/uploads/2017/09/indoor-location-and-workplace-mapping-technology.png"></center></h2>

Leveraging and inspired by the analysis of @demche in [Santa competition](https://www.kaggle.com/demche/santa-2020-who-s-lucky-eda), I am trying out a Leader board analysis of Indoor Location & Navigation use Kaggle Meta data.

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import warnings
from kaggle_environments import list_episodes
from IPython.display import display, Markdown
pd.set_option("display.max_rows", 200)
pd.options.display.float_format = '{:,.2f}'.format
warnings.filterwarnings('ignore')

In [None]:
!wget "https://www.kaggle.com/c/indoor-location-navigation/leaderboard.json?includeBeforeUser=true&includeAfterUser=false" -O leaderboard.json

In [None]:
with open("leaderboard.json") as f:
    jsn = json.load(f)
leaderboard = pd.DataFrame(columns = ["team_name", "team_id", "score", "n_agents", "team_rank"])
for user in jsn["beforeUser"]+jsn["afterUser"]:
    leaderboard = leaderboard.append({"team_name": user["teamName"], 
                                      "team_id": user["teamId"], 
                                      "score": user["score"], 
                                      "n_agents": user["entries"],
                                     "team_rank": user["rank"]}, 
                                     ignore_index=True)
leaderboard[["score", "n_agents", "team_rank"]] = leaderboard[["score", "n_agents", "team_rank"]].apply(pd.to_numeric)
gold_min_score = leaderboard.sort_values("score", ascending=True)["score"][9]
silver_min_score = leaderboard.sort_values("score", ascending=True)["score"][49]
bronze_min_score = leaderboard.sort_values("score", ascending=True)["score"][99]

In [None]:
episodes = pd.read_csv("../input/meta-kaggle/Episodes.csv")
gaps = sorted(set(range(episodes[episodes["CompetitionId"] == 24539]["Id"].min(), episodes["Id"].max() + 1)) - set(episodes["Id"].values), reverse=True)
episodes = episodes.loc[episodes["CompetitionId"] == 24539]
episodes["CreateTime"] = pd.to_datetime(episodes["CreateTime"], format="%m/%d/%Y %H:%M:%S")
episodes = episodes[["Id", "CreateTime"]]

episode_agents = pd.read_csv("../input/meta-kaggle/EpisodeAgents.csv")
episode_agents = pd.merge(episode_agents, episodes, left_on="EpisodeId", right_on="Id")
episode_agents = episode_agents[["EpisodeId", "CreateTime", "SubmissionId", "UpdatedScore"]]
episode_agents = episode_agents.drop_duplicates()
episode_agents["date"] = episode_agents["CreateTime"].dt.date
agents_mapping = pd.DataFrame(columns = ["team_id", "submission_id", "submission_dt"])

episodes_to_consider = episode_agents[episode_agents["EpisodeId"].isin(episodes["Id"])].groupby(["SubmissionId"])["EpisodeId"].max().to_list()
for i in range(0, len(episodes_to_consider), 1000):
    batch = episodes_to_consider[i:i + 1000]
    try:
        resp = list_episodes(batch)  
        for episode in resp["result"]["submissions"]:
            agents_mapping = agents_mapping.append({"team_id": episode["teamId"],
                                "submission_id":  episode["id"] ,
                                "submission_dt": datetime.datetime.strptime(episode["dateSubmitted"][:19], "%Y-%m-%dT%H:%M:%S")
                               }, ignore_index=True)
        del episode, batch
    except Exception as ex:
        print("Error:", ex)
        continue

for i in range(0, len(gaps), 1000):
    batch = gaps[i:i + 1000]
    try:
        resp = list_episodes(batch)      
        if len(resp["result"]["episodes"]) != 0:
            for episode in resp["result"]["episodes"]:
                if episode["competitionId"] == 24539:
                    EpisodeId = episode["id"]
                    for agent in episode["agents"]:
                        submissionId = agent["submissionId"]
                        updatedScore = agent["updatedScore"]
                        CreateTime = datetime.strptime(episode["createTime"][:19], "%Y-%m-%dT%H:%M:%S")
                        episode_agents = episode_agents.append({"EpisodeId": EpisodeId,
                                                    "CreateTime": CreateTime,
                                                    "SubmissionId": submissionId,
                                                    "UpdatedScore": updatedScore
                                                    }, ignore_index=True)           
            for episode in episodes["result"]["submissions"]:
                agents_mapping = agents_mapping.append({"team_id": episode["teamId"],
                                    "submission_id":  episode["id"] ,
                                    "submission_dt": datetime.datetime.strptime(episode["dateSubmitted"][:19], "%Y-%m-%dT%H:%M:%S")
                                   }, ignore_index=True)
            del episode, batch
    except Exception as ex:
        print("Error:", ex)
        continue
        
agents_mapping = agents_mapping.drop_duplicates(subset=["submission_id"])
episode_agents = episode_agents[episode_agents["SubmissionId"].isin(agents_mapping["submission_id"])]
episode_agents = episode_agents.drop_duplicates()
agents = episode_agents.loc[episode_agents.groupby("SubmissionId").CreateTime.idxmax()].dropna(subset=["UpdatedScore"]).\
    loc[:, ["SubmissionId", "UpdatedScore"]].reset_index(drop=True)
agents.columns = ["submission_id", "score"]
agents = pd.merge(agents, agents_mapping, on="submission_id", how="left")
agents = agents.drop_duplicates(subset=["submission_id"])
agents = pd.merge(agents, leaderboard.loc[:, ["team_name", "team_id"]], on="team_id", how="left")
agents["medal"] = ["gold" if x >= gold_min_score else "silver" if x >= silver_min_score else "bronze" if x >= bronze_min_score else "no medal" \
     for x in agents["score"]]

# 1. Score distribution

In [None]:
plt.figure(figsize=(25,8))
plt.hist(leaderboard["score"], color="lightsteelblue", bins=10)
plt.axvline(x=gold_min_score, color="gold")
plt.axvline(x=silver_min_score, color="silver")
plt.axvline(x=bronze_min_score, color="peru")
plt.xlabel("Team score")
plt.ylabel("Number of teams")
plt.legend(title="Team score distribution (vertical lines are medal thresholds)", loc="upper center", title_fontsize=25)
plt.show()

In [None]:
plt.figure(figsize=(25,8))
plt.hist(leaderboard["score"][leaderboard["score"] <4.5], color="thistle", bins=10)
plt.axvline(x=gold_min_score, color="gold")
plt.axvline(x=silver_min_score, color="silver")
plt.axvline(x=bronze_min_score, color="peru")
plt.xlabel("Team score")
plt.ylabel("Number of teams")
plt.legend(title="Team score distribution (teams with score <4.5, vertical lines are medal thresholds)", loc="upper center", title_fontsize=25)
plt.show()

In [None]:
leaderboard["score"].describe()

# 2. Number of submissions for medal-winning teams

In [None]:
plt.figure(figsize=(25,8))
plt.hist([leaderboard.sort_values("score", ascending=False)["n_agents"][:10],
          leaderboard.sort_values("score", ascending=False)["n_agents"][11:51],
          leaderboard.sort_values("score", ascending=False)["n_agents"][51:101]],
         label=["gold-winning team", "silver-winning team", "bronze-winning team"],
         color= ["gold", "silver", "peru"], bins=50, stacked=True, alpha=0.7)
plt.xlabel("Number of submissions")
plt.ylabel("Number of teams")
plt.legend(title="Total number of submissions for medal-winning teams", loc="upper center", title_fontsize=20)
plt.show()