In [None]:
from contextlib import closing
import json
import numpy as np
import pandas as pd
import requests as rq
from tqdm.auto import tqdm

In [None]:
TOKEN = "PUT TOKEN HERE"

In [None]:
def fetch_sequence_cfp_scipy(url1, max_queries=50):
    sequence = []
    url = url1
    max_queries = 50
    num_queries = 0
    num_results_expected = None

    with closing(tqdm(total=max_queries)) as progress:
        while True:
            response = rq.get(url, headers={"Authorization": f"Token {TOKEN}"})
            assert response.ok
            data = response.json()
            progress.update()
            num_queries += 1

            assert "results" in data
            assert "next" in data

            if num_results_expected is None and "count" in data:
                num_results_expected = data["count"]
                max_queries = int(np.ceil(num_results_expected / len(data["results"])))
                progress.reset(max_queries)
                progress.update(num_queries)
            else:
                assert num_results_expected == data["count"]

            sequence += data["results"]
            url = data["next"]
            if not url:
                break

    return sequence

In [None]:
submissions_ = fetch_sequence_cfp_scipy("https://cfp.scipy.org/api/events/2024/submissions/")
len(submissions_)

In [None]:
reviews_ = fetch_sequence_cfp_scipy("https://cfp.scipy.org/api/events/2024/reviews/")
len(reviews_)

In [None]:
submissions = pd.DataFrame.from_records(submissions_)
submissions

In [None]:
submissions["submission_type"] = submissions["submission_type"].apply(lambda x: x["en"] if isinstance(x, dict) else x)
submissions

In [None]:
tutorials = submissions.loc[(submissions["submission_type"] == "Tutorial") & (submissions["state"] == "submitted")].copy()
tutorials

In [None]:
tutorials.to_csv("tutorials.csv", index=False)

In [None]:
reviews = pd.DataFrame.from_records(reviews_)
reviews

In [None]:
reviews["score"] = reviews["score"].map(float)
reviews

In [None]:
score_quantiles = reviews.groupby("user", as_index=False).agg({"score": lambda g: list(g.quantile(q=[.25, .5, .75]))}).rename(columns={"score": "quantiles"})
score_quantiles

In [None]:
def normalize_by_quantiles(score, q_low, med, q_up):
    if score <= med:
        if med == q_low:
            return 0.
        else:
            return (score - med) / (med - q_low)
    else:
        if med == q_up:
            return 0.
        else:
            return (score - med) / (q_up - med)

In [None]:
reviews_norm = reviews[["submission", "text", "user", "score"]].merge(score_quantiles, on="user")
reviews_norm["norm"] = [
    normalize_by_quantiles(score, q_low, med, q_up)
    for score, (q_low, med, q_up) in reviews_norm[["score", "quantiles"]].itertuples(index=False)
]
reviews_norm

In [None]:
reviews_cochairs = pd.read_csv("cochair-reviews.csv")
reviews_cochairs.drop(columns=[c for c in reviews_cochairs.columns if c.startswith("Unnamed")], inplace=True)
reviews_cochairs.rename(inplace=True, columns={
    "Proposal ID": "submission",
    "Cochair Reviewer": "user_",
    "Score (-10/10)": "score",
    "Notes/Comments": "text"
})
reviews_cochairs

In [None]:
cochair_quantiles = reviews_cochairs.groupby("user_", as_index=False).agg({"score": lambda g: list(g.quantile(q=[.25, .5, .75]))}).rename(columns={"score": "quantiles"})
cochair_quantiles

In [None]:
reviews_cochairs = reviews_cochairs.drop(columns="quantiles", errors="ignore").merge(cochair_quantiles, on="user_")
reviews_cochairs["norm"] = [
    normalize_by_quantiles(score, q_low, med, q_up)
    for score, (q_low, med, q_up) in reviews_cochairs[["score", "quantiles"]].itertuples(index=False)
]
reviews_cochairs["user"] = reviews_cochairs["user_"].map(lambda x: x + "*")
reviews_cochairs

In [None]:
tags = pd.read_csv("tags.csv")
tags

In [None]:
tutorials_reviewed = (
    tutorials
    .assign(authors=tutorials["speakers"].map(lambda x: ', '.join(a["name"] for a in x)))[
        ["code", "authors", "title"]
    ].merge(
        pd.concat([
            reviews_norm[["submission", "user", "text", "score", "norm"]],
            reviews_cochairs[["submission", "user", "text", "score", "norm"]]
        ]),
        how="left",
        left_on="code",
        right_on="submission"
    )
    .drop(columns=["submission"])
    .merge(tags, on="code")
)
with pd.option_context("display.max_rows", None, "display.max_colWidth", None):
    display(tutorials_reviewed)

In [None]:
from pprint import pprint

In [None]:
print('decisions = {\n' + ',\n'.join(f'    "{code}": "what"' for code in palmares["code"]) + '\n}')

In [None]:
decisions = tutorials_reviewed[["code"]].drop_duplicates().assign(decision="what")
decisions

In [None]:
NL = '\n'

In [None]:
palmares = (
    tutorials_reviewed
    .groupby(["code", "authors", "title", "tags"], as_index=False)
    .agg({"text": "count", "score": "median", "norm": "mean"})
    .rename(columns={"text": "num_reviews"})
    .merge(decisions, on="code")
    .sort_values("score", ascending=False)
    .reset_index(drop=True)
)
with pd.option_context("display.max_rows", None):
    display(palmares)

get_ipython().set_next_input(
    f"""\
decisions = pd.DataFrame([
{f',{NL}'.join(f'    ("{code}", "{decision}")' for code, decision in palmares[["code", "decision"]].itertuples(index=False))}
], columns=["code", "decision"])""")

In [None]:
palmares.to_csv("palmares.csv", index=False)

In [None]:
decisions = pd.DataFrame([
    ("3DVH7S", "ACCEPT"),
    ("VQCXLP", "ACCEPT"),
    ("MTYGRK", "ACCEPT"),
    ("Y9GD78", "ACCEPT"),
    ("CLKG8E", "ACCEPT"),
    ("S8FKUN", "ACCEPT"),
    ("JRLMLD", "ACCEPT"),
    ("PQMQ3K", "ACCEPT"),
    ("8QVDUL", "ACCEPT"),
    ("ZHEB8N", "ACCEPT"),
    ("QRMWNB", "ACCEPT"),
    ("FUYG37", "what"),
    ("UKLNLQ", "what"),
    ("W3ZJWG", "what"),
    ("UUPUM8", "what"),
    ("9UU7AW", "what"),
    ("BARWGR", "what"),
    ("LDSV7R", "what"),
    ("JCCXZC", "what"),
    ("9FATTN", "what"),
    ("QLLABG", "what"),
    ("PNGX8L", "what"),
    ("NBMZ3G", "what"),
    ("BZGQMC", "what"),
    ("YJVQ9T", "what"),
    ("BWVYMV", "what"),
    ("BTG9U3", "what"),
    ("PLWHZF", "what"),
    ("QH8MNM", "what"),
    ("DTRQAF", "what"),
    ("GKGRWE", "what"),
    ("HHVZ9T", "what"),
    ("7UAFHL", "what"),
    ("9JLCAA", "what"),
    ("SNYJRW", "what"),
    ("DYNEPX", "what"),
    ("HPTCUP", "what"),
    ("GJFZV3", "what"),
    ("KLMZUG", "what"),
    ("KDWCUY", "what"),
    ("3YDRLD", "what"),
    ("ZMUX8C", "what"),
    ("WT8QQT", "what"),
    ("WSDHK7", "what"),
    ("NZDP7Z", "what"),
    ("P3PACX", "what"),
    ("LQKEWN", "what"),
    ("KAESWL", "what"),
    ("J3NGVU", "what"),
    ("DMJWHY", "what"),
    ("FYGNKM", "what"),
    ("9XGCGR", "what"),
    ("NXT98S", "what"),
    ("9AMDQ3", "what"),
    ("9AUABF", "what"),
    ("UYVW7H", "what"),
    ("VSRDS9", "what"),
    ("SMQHZ8", "what"),
    ("VTQMZH", "what"),
    ("XUD8XW", "what"),
    ("CRZE3B", "what"),
    ("R9MHPC", "what"),
    ("EB7WG7", "what"),
    ("BHLJ9Z", "what"),
    ("SFX8HC", "what"),
    ("USZGYZ", "what"),
    ("TLHY3K", "what"),
    ("T8KFM8", "what"),
    ("EJZTLX", "what"),
    ("QT9GBY", "what"),
    ("PSSASU", "what"),
    ("S33LXK", "what"),
    ("TPNZ8Y", "what"),
    ("JZHRA8", "what"),
    ("98U7KE", "what"),
    ("USLA8A", "what"),
    ("VQUVVU", "what"),
    ("NZMB8H", "what"),
    ("P8U3LX", "what"),
    ("VRKZPW", "what")
], columns=["code", "decision"])

In [None]:
decisions = pd.DataFrame([
    ("3DVH7S", "ACCEPT"),
    ("VQCXLP", "ACCEPT"),
    ("MTYGRK", "ACCEPT"),
    ("Y9GD78", "ACCEPT"),
    ("CLKG8E", "ACCEPT"),
    ("S8FKUN", "ACCEPT"),
    ("JRLMLD", "ACCEPT"),
    ("PQMQ3K", "ACCEPT"),
    ("8QVDUL", "ACCEPT"),
    ("ZHEB8N", "ACCEPT"),
    ("QRMWNB", "ACCEPT"),
    ("FUYG37", "what"),
    ("UKLNLQ", "what"),
    ("W3ZJWG", "what"),
    ("UUPUM8", "what"),
    ("9UU7AW", "what"),
    ("BARWGR", "what"),
    ("LDSV7R", "what"),
    ("JCCXZC", "what"),
    ("9FATTN", "what"),
    ("QLLABG", "what"),
    ("PNGX8L", "what"),
    ("NBMZ3G", "what"),
    ("BZGQMC", "what"),
    ("YJVQ9T", "what"),
    ("BWVYMV", "what"),
    ("BTG9U3", "what"),
    ("PLWHZF", "what"),
    ("QH8MNM", "what"),
    ("DTRQAF", "what"),
    ("GKGRWE", "what"),
    ("HHVZ9T", "what"),
    ("7UAFHL", "what"),
    ("9JLCAA", "what"),
    ("SNYJRW", "what"),
    ("DYNEPX", "what"),
    ("HPTCUP", "what"),
    ("GJFZV3", "what"),
    ("KLMZUG", "what"),
    ("KDWCUY", "what"),
    ("3YDRLD", "what"),
    ("ZMUX8C", "what"),
    ("WT8QQT", "what"),
    ("WSDHK7", "what"),
    ("NZDP7Z", "what"),
    ("P3PACX", "what"),
    ("LQKEWN", "what"),
    ("KAESWL", "what"),
    ("J3NGVU", "what"),
    ("DMJWHY", "what"),
    ("FYGNKM", "what"),
    ("9XGCGR", "what"),
    ("NXT98S", "what"),
    ("9AMDQ3", "what"),
    ("9AUABF", "what"),
    ("UYVW7H", "what"),
    ("VSRDS9", "what"),
    ("SMQHZ8", "what"),
    ("VTQMZH", "what"),
    ("XUD8XW", "what"),
    ("CRZE3B", "what"),
    ("R9MHPC", "what"),
    ("EB7WG7", "what"),
    ("BHLJ9Z", "what"),
    ("SFX8HC", "what"),
    ("USZGYZ", "what"),
    ("TLHY3K", "what"),
    ("T8KFM8", "what"),
    ("EJZTLX", "what"),
    ("QT9GBY", "what"),
    ("PSSASU", "what"),
    ("S33LXK", "what"),
    ("TPNZ8Y", "what"),
    ("JZHRA8", "what"),
    ("98U7KE", "what"),
    ("USLA8A", "what"),
    ("VQUVVU", "what"),
    ("NZMB8H", "what"),
    ("P8U3LX", "what"),
    ("VRKZPW", "what")
], columns=["code", "decision"])