Data pack with various statistics regarding the Lux AI Challenge Season 1

In [None]:
import pandas as pd
import numpy as np
import os
import json
import datetime
import collections
import time

In [None]:
## You should configure these to your needs. Choose one of ...
# 'hungry-geese', 'rock-paper-scissors', santa-2020', 'halite', 'google-football'
COMP = 'lux-ai-2021'

In [None]:
ROOT ="."
META = "../input/meta-kaggle/"
COMPETITIONS = {
    'lux-ai-2021': 30067,
    'hungry-geese': 25401,
    'rock-paper-scissors': 22838,
    'santa-2020': 24539,
    'halite': 18011,
    'google-football': 21723
}

Load and filter only our competition.

In [None]:
# Load Episodes
st = time.time()
episodes_df = pd.read_csv(META + "Episodes.csv", usecols=['CompetitionId','Id'])
print(time.time()-st)
print(f'Episodes.csv: {len(episodes_df)} rows before filtering.')
episodes_df = episodes_df[episodes_df.CompetitionId == COMPETITIONS[COMP]] 
print(f'Episodes.csv: {len(episodes_df)} rows after filtering for {COMP}.')

In [None]:
# Load EpisodeAgents
st = time.time()
epagents_df = pd.read_csv(META + "EpisodeAgents.csv", usecols=['EpisodeId','Reward','SubmissionId','UpdatedScore'])
print(time.time()-st)
print(f'EpisodeAgents.csv: {len(epagents_df)} rows before filtering.')
epagents_df = epagents_df[epagents_df.EpisodeId.isin(episodes_df.Id)]
print(f'EpisodeAgents.csv: {len(epagents_df)} rows after filtering for {COMP}.')

In [None]:
score_diffs = \
    epagents_df[1::2]['Reward'].values - epagents_df[0::2]['Reward'].values

In [None]:
# the data pack!
data = dict(
    num_of_episodes_run = len(episodes_df),
    num_teams = 1186, # pulled from website
    num_competitors = 1464, # pulled from website
    num_valid_submissions = 22508, # pulled from website
    num_submissions = len(epagents_df['SubmissionId'].unique()),
    num_public_notebooks = 126, # pulled from website
    peak_episodes_hour = 6000, # internal data
    discord_messages = 15666, # discord data
    kaggle_forum_posts = 220, # pulled from website
    
    # uses the way reward is computed for determining win/tie/loss. Reward is 10000 * city_count + unit_count
    most_cities_in_one_episode = int(epagents_df['Reward'].max() / 10000),
    most_units_in_one_episode = int((epagents_df['Reward'] % 10000).max()),
    
    # num matches the winner won by virtue of one more unit
    num_close_wins = (np.abs(score_diffs) == 1).sum(),
    
    # based on npm downloads https://npm-stat.com/charts.html?package=lux-viewer-2021&from=2021-07-20&to=2022-01-19
    # the way kaggle replays are served on the kaggle website, most of the time it triggers a "download" increment by npm
    # this is heavily underestimated since it doesn't track every single click, just the number of times visualizer code has been fetched
    num_times_kaggle_replay_watched = 269730, 
    
    # number of times the organizers have hopelessly talked about carts, approximately aggregated across discord, twitch, and kaggle forums
    num_times_organizers_hopelessly_mentioned_carts = 86
)
for k, v in data.items():
    print(f"{k} = {v}")