In [232]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact

import requests
from bs4 import BeautifulSoup

# Loading historical data

Loading json from Eric Wastl's github

In [216]:
#!curl https://raw.githubusercontent.com/topaz/aoc-tmp-stats/master/aoc-2015-2021-stats.json > history.json

In [217]:
with open("history.json") as file:
    history = json.load(file)

In [227]:
def get_evo(year=2021, level=2, transform='identical'):
    """Return a dataframe with data for a given year, level,
    and a transformation applied
    """
    funcs = {
        'identical': lambda x: x,
        'log': np.log
    }

    # Number 100
    level_to_100 = [history[str(year)][str(d)][str(level)][99] for d in range(1,26)]
    # Number 50
    level_to_50 = [history[str(year)][str(d)][str(level)][49] for d in range(1,26)]
    # Mean time
    level_avg_100 = [sum(history[str(year)][str(d)][str(level)][:100])/100 for d in range(1,26)]

    evo_level = pd.DataFrame({
                    'To 100': level_to_100,
                    'To 50': level_to_50,
                    'Avg 100': level_avg_100
                    },
                    index=range(1,26)) / 60

    return evo_level.applymap(funcs[transform])

## Evolution of time to solve for a chosen advent

In [384]:
@interact
def plot_evo(year=range(2021,2014,-1), transform=['identical','log']):
    fig, axes = plt.subplots(1,2)
    fig.set_figwidth(15);
    for level in [1,2]:
        evo_level = get_evo(year=year, level=level, transform=transform)
        evo_level.plot(ax=axes[level-1]);

interactive(children=(Dropdown(description='year', options=(2021, 2020, 2019, 2018, 2017, 2016, 2015), value=2…

### Conclusions:
- Interestingly, for almost any given year, we see somewhat of an exponential trend over the days.
- This is confirmed when we do a log transformation: we see a somewhat linear trend.
- The first two years are a bit of an exception, especially for the first days. Not surprisingly because there were less contendents.
- We see similar patters for level 1 and level 2. The year 2019 is a notable exception in this.
- After a peak of difficulty (mostly one, exceptionally two days), we see a return to an easier challenge.

In [318]:
count_level_two_in_one_hour = 100
for i in range(1, count_level_one_in_one_hour+1):
    if times[i] > 60.0:
        print(times[i])
        count_level_two_in_one_hour = i
        break

60.25


In [275]:
count_level_two_in_one_hour

35

## Compare the years to each other

In [386]:
@interact
def plot_to_100_over_years(level=[2,1], transform=['identical','log'], 
                           metric=['To 100', 'To 50', 'Avg 100'],
                           plot=['evolution', 'cat', 'box', 'violin']):
    levels = pd.DataFrame()
    for year in range(2015, 2022):
        levels[str(year)] = get_evo(year=year, level=level, transform=transform)[metric]
    if plot == 'evolution':
        levels.plot();
    elif plot == 'box':
        sns.boxplot(data=levels);
    elif plot == 'violin':
        sns.violinplot(data=levels);
    elif plot == 'cat':
        sns.catplot(data=levels);
    else:
        print("Choose a plot")

interactive(children=(Dropdown(description='level', options=(2, 1), value=2), Dropdown(description='transform'…

### Conclusions:
- More or less same evolution: difficulty increases as we get closer to the end, with an exception in the days jus before the end. Well, we knew that already, but now it's (sort of) confirmed.
- 2015 and 2016 are a bit exceptional, especially at the beginning, but they also peak much higher. Probably just because there were less contenders.

# General conclusions

- Quid ChatGPT? Launched 30 November 2022. Need to check impact on 2022.
- We know people on the big leaderboard have been around for some time, and puzzles come back. And they know libraries to help with the hard stuff.


# Potential way to go

- Make df with info time info: over all puzzles, how much time did it take the n-th ranked person?
  - Columns: n-th ranked person for level 1 or 2
  - Rows: time for each challenge for every year for this rank
- Determine years to consider. Not 2015. 2016?
- Check rank of last one to solve within one hour, or 100th one (two ranks, one for level 1 and one for level 2)
- For this rank, check in the column in which quartile this would fit
- Based on quartile, assign difficulty category, or score

In [452]:
# Df with time info
level = 2
time_df = pd.DataFrame()
for n in range(0,100):
    time_df[n+1] = [history[str(year)][str(d)][str(level)][n] / 60 for d in range(1,26) for year in range(2017, 2022)]

time_df.describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
count,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,...,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0
mean,7.902933,9.0952,10.002267,10.560133,11.032533,11.4416,11.789333,12.2544,12.562533,12.829467,...,25.513867,25.638533,25.739467,25.855333,25.9432,26.0508,26.1508,26.2484,26.362667,26.475067
std,6.569183,7.368808,8.434585,8.934962,9.281389,9.621822,9.877485,10.569599,10.738282,10.923051,...,24.685732,24.871802,24.978787,25.106823,25.192324,25.282043,25.391473,25.484585,25.629629,25.754065
min,1.116667,1.2,1.216667,1.233333,1.3,1.433333,1.483333,1.5,1.533333,1.55,...,2.65,2.65,2.666667,2.666667,2.666667,2.683333,2.683333,2.7,2.716667,2.733333
25%,3.633333,4.133333,4.55,4.716667,4.966667,5.116667,5.183333,5.3,5.366667,5.65,...,9.85,9.85,9.883333,9.9,9.933333,10.166667,10.166667,10.216667,10.266667,10.283333
50%,5.666667,6.666667,7.116667,7.383333,7.683333,8.083333,8.2,8.733333,8.916667,9.166667,...,15.883333,15.933333,15.983333,16.016667,16.033333,16.1,16.1,16.15,16.233333,16.233333
75%,9.733333,13.183333,14.016667,14.883333,15.083333,15.55,15.85,16.283333,16.466667,16.5,...,33.033333,33.05,33.083333,33.183333,33.216667,33.3,33.383333,33.416667,33.483333,33.5
max,36.066667,36.916667,46.033333,47.65,47.766667,50.866667,51.183333,56.716667,56.966667,57.0,...,139.816667,140.133333,140.616667,140.966667,141.333333,141.4,142.433333,142.716667,143.2,143.283333


In [489]:
np.log(time_df).describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
count,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,...,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0,125.0
mean,1.772833,1.914599,1.997453,2.050433,2.095901,2.132337,2.162447,2.192438,2.219864,2.241697,...,2.870356,2.873855,2.877418,2.881387,2.884489,2.888509,2.892233,2.895802,2.899659,2.903437
std,0.776855,0.77727,0.790615,0.792403,0.790083,0.790119,0.791001,0.797705,0.795611,0.795447,...,0.856734,0.858176,0.858562,0.859162,0.859592,0.859866,0.859973,0.860193,0.860641,0.86105
min,0.110348,0.182322,0.196115,0.209721,0.262364,0.360003,0.394292,0.405465,0.427444,0.438255,...,0.97456,0.97456,0.980829,0.980829,0.980829,0.98706,0.98706,0.993252,0.999406,1.005522
25%,1.290151,1.419084,1.515127,1.551102,1.602749,1.632503,1.645448,1.667707,1.680207,1.731656,...,2.287471,2.287471,2.29085,2.292535,2.295896,2.319114,2.319114,2.32402,2.328902,2.330524
50%,1.734601,1.89712,1.962439,1.999225,2.039053,2.089804,2.104134,2.167147,2.187922,2.215574,...,2.76527,2.768413,2.771547,2.77363,2.77467,2.778819,2.778819,2.78192,2.787067,2.787067
75%,2.275556,2.578953,2.640247,2.700242,2.71359,2.744061,2.76317,2.790142,2.801338,2.80336,...,3.497517,3.498022,3.49903,3.502048,3.503052,3.505557,3.508057,3.509055,3.511048,3.511545
max,3.585369,3.608663,3.829366,3.863883,3.866328,3.929208,3.935414,4.038068,4.042466,4.043051,...,4.940332,4.942594,4.946038,4.948523,4.951121,4.951593,4.958874,4.960861,4.964242,4.964824


In [497]:
def difficulty_quartile(time, pos, historical):
    """Calculates the difficulty quartile
    Compares the time it took the n-th contender
    to historical data.
    
    Parameters
    ----------
    time : float
        Time it took the n-th contender
    n : int
        Position of the contender to evaluate
    Returns
    -------
    int (0 to 5)
        Quartile: from 0 (faster than ever) to 5 (slower than ever)
"""
    if time > historical[n].quantile(1.00):
        difficulty = 5
    if time < historical[n].quantile(1.00):
        difficulty = 4
    if time < historical[n].quantile(0.75):
        difficulty = 3
    if time < historical[n].quantile(0.5):
        difficulty = 2
    if time < historical[n].quantile(0.25):
        difficulty = 1
    if time < historical[n].quantile(0.00):
        difficulty = 0
    return difficulty

In [498]:
difficulty_quartile(143, 100, time_df)

4

In [495]:
def difficulty_score(time, n, historical):
    """Calculates a difficulty score
    Compares the log of the time it took the n-th contender
    to historical data.
    
    Parameters
    ----------
    time : float
        Time it took the n-th contender
    n : int
        Position of the contender to evaluate
    historical : pd.DataFrame
        Historical date for the first 100 contenders
    
    Returns
    -------
    float
        Score: log compared to historical data
        Goes from 0 (faster than ever) to 1 (slower than ever)
    """
    log_min = np.log(time_df[n].min())
    log_spread = np.log(time_df[n].max()) - log_min
    score = (np.log(time) - log_min) / log_spread
    return min(max(0, score), 1)

In [496]:
difficulty_score(time=3, n=100, historical=time_df)

0.023511825920309277

# Scraping the AOC website

In [387]:
# today_data = requests.get(url="https://adventofcode.com/2022/leaderboard/day/22").text
with open("test.html") as file:
    today_data = file.read()

In [388]:
soup = BeautifulSoup(today_data)

In [389]:
times_text = [entry.contents[0][-8:] for entry in soup.find_all(class_="leaderboard-time")]
times = [int(time[:2])*60 + int(time[3:5]) + int(time[6:])/60 for time in times_text]
positions = [int(entry.contents[0][:-1].strip()) for entry in soup.find_all(class_="leaderboard-position")]

In [390]:
level_one_results = []
level_two_results = []
for i in range(0, min(100, len(positions)+1)):
    level_two_results.append(times[i])
    if positions[i+1] < positions[i]:
        break
print(level_two_results)
start_of_level_one = len(level_two_results)
for i in range(start_of_level_one, len(times)):
#     print(i)
    level_one_results.append(times[i])
level_one_results

[25.933333333333334, 39.18333333333333, 39.45, 39.733333333333334, 40.916666666666664, 41.233333333333334, 41.333333333333336, 41.63333333333333, 41.93333333333333, 42.11666666666667, 42.9, 44.31666666666667, 47.5, 48.06666666666667, 48.166666666666664, 48.916666666666664, 49.166666666666664, 50.63333333333333, 51.38333333333333, 51.45, 51.56666666666667, 51.766666666666666, 52.45, 53.483333333333334, 54.733333333333334, 54.916666666666664, 55.333333333333336, 55.583333333333336, 55.85, 56.53333333333333, 56.78333333333333, 58.15, 58.416666666666664, 58.983333333333334, 59.43333333333333]


[7.766666666666667,
 8.166666666666666,
 9.883333333333333,
 9.966666666666667,
 10.05,
 10.25,
 10.533333333333333,
 10.55,
 10.6,
 10.833333333333334,
 11.083333333333334,
 11.133333333333333,
 11.166666666666666,
 11.45,
 11.916666666666666,
 12.183333333333334,
 12.566666666666666,
 12.783333333333333,
 12.85,
 12.916666666666666,
 13.133333333333333,
 13.183333333333334,
 13.366666666666667,
 13.4,
 13.616666666666667,
 13.65,
 13.783333333333333,
 14.016666666666667,
 14.066666666666666,
 14.15,
 14.2,
 14.4,
 14.45,
 14.483333333333333,
 14.516666666666667,
 14.55,
 14.633333333333333,
 14.65,
 14.666666666666666,
 14.733333333333333,
 14.766666666666667,
 14.866666666666667,
 14.95,
 14.983333333333333,
 15.133333333333333,
 15.583333333333334,
 15.666666666666666,
 15.7,
 15.95,
 16.033333333333335,
 16.116666666666667,
 16.133333333333333,
 16.383333333333333,
 16.433333333333334,
 16.45,
 16.516666666666666,
 16.566666666666666,
 16.6,
 16.666666666666668,
 16.68333333333333

In [391]:
count_level_one_in_one_hour = len(level_one_results)
count_level_two_in_one_hour = len(level_two_results)
count_level_one_in_one_hour, count_level_two_in_one_hour

(94, 35)