In [43]:
from collections import Counter
from functools import reduce
import requests
import os
from urllib.parse import urljoin
import pandas as pd
from tabulate import tabulate

In [44]:
def api_get(path):
    api_url = urljoin("https://pretalx.com/api/v2", path)
    resp = requests.get(api_url, headers={"Authorization": f"Token {os.getenv('TOKEN')}"})
    return resp.json()

In [49]:
def get_submissions():
    """Get all submissions, irrespective of status"""
    resp = api_get("events/djangocon-europe-2023/submissions/?limit=200")
    return {
        result["code"]: result for result in resp["results"]
}

In [50]:
all_submissions = get_submissions()

In [52]:
def get_reviews():
    """Get all individual reviews"""
    page = 1
    reviews = []
    url = f"events/djangocon-europe-2023/reviews?page={page}"
    while True:
        resp = api_get(url)
        reviews.extend(resp["results"])
        if resp["next"]:
            url = resp["next"]
        else:
            break
    return reviews

In [None]:
reviews = get_reviews()

In [None]:
def get_reviews_by_submission(all_submissions, reviews):
    """
    Collate reviews by submission, and add in the submission URL, speaker info and title.
    Return a dict of review dicts, separated by submission type.
    """
    all_reviewers = {review["user"] for review in reviews}
    reviewer_key = {reviewer: f"reviewer_{i}" for i, reviewer in enumerate(all_reviewers, start=1)}
    reviews_by_submission = {}
    for review in reviews:
        code = review["submission"]
        reviewer_code = reviewer_key[review["user"]]
        reviews_by_submission.setdefault(
            code, {"url": f"https://pretalx.com/orga/event/djangocon-europe-2023/submissions/{code}/reviews"}
        )[reviewer_code] = review["score"]
    
    talks = {}
    workshops = {}
    for code in reviews_by_submission:
        submission = all_submissions[code]
        submission_type = submission["submission_type"] if isinstance(submission["submission_type"], str) else submission["submission_type"]["en"]
        submission_info = {
            "speakers":  ", ".join([speaker["name"] for speaker in submission["speakers"]]),
            "submission_type": submission_type,
            "title": submission["title"],
            "duration": submission["duration"],
        }
        review_data = {**reviews_by_submission[code], **submission_info}
        if submission_type == "Talk":
            talks[code] = review_data
        else:
            assert submission_type == "Workshop"
            workshops[code] = review_data

    assert len(talks) + len(workshops) == len(reviews_by_submission)
    return {"talks": talks, "workshops": workshops}


In [None]:
def summary_dataframe(input_dict):
    """
    Convert a review dict to a dataframe and calculate:
    - mean
    - median
    - min
    - max
    - range
    - total number of reviewers
    - number of reviewers rating the min score
    - number of reviewers rating the max score
    - a preliminary decision based on median scores for submissions with consensus in their reviews
    """
    df = pd.DataFrame.from_dict(input_dict, orient='index')
    df = df.reindex(sorted(df.columns), axis=1)
    reviewer_cols = [col for col in df.columns if col.startswith("reviewer_")]
    for col in reviewer_cols:
        df[col] = pd.to_numeric(df[col])
    df = df.assign(mean=df.loc[:, reviewer_cols].mean(axis=1, numeric_only=True))
    df = df.assign(median=df.loc[:, reviewer_cols].median(axis=1, numeric_only=True))
    df = df.assign(min=df.loc[:, reviewer_cols].min(axis=1, numeric_only=True))
    df = df.assign(max=df.loc[:, reviewer_cols].max(axis=1, numeric_only=True))
    df = df.assign(range=df.loc[:, reviewer_cols].max(axis=1, numeric_only=True) - df.loc[:, reviewer_cols].min(axis=1, numeric_only=True))
    df = df.assign(reviewers=df.loc[:, reviewer_cols].count(axis=1, numeric_only=True))
    
    def min_count(row):
        return sum(row[reviewer_cols] == row["min"])
    
    def max_count(row):
        return sum(row[reviewer_cols] == row["max"])
    
    df["min_counts"] = df.apply(min_count, axis=1)
    df["max_counts"] = df.apply(max_count, axis=1)
    
    def autodecide(row):
        if row["range"] <= 1:
            if row["median"] < 3:
                return "reject"
            if row["median"] >= 4:
                return "accept"
        return ""
    
    df["decision_prelim"] = df.apply(autodecide, axis=1)
    return df


In [None]:
def decision_df(df, decision):
    return df[df["decision_prelim"] == decision]

In [None]:
def disagreed_df(df):
    return df[df['range'] >= 3]

In [None]:
def summarise(df):
    print(f"Total number of submissions: {len(df)}\n\n")
    
    print("Submissions with consensus (all scores within one point)")
    print("==========================================================")
    agreed = df[df['range'] <= 1]
    
    print(f"Total: {agreed['range'].count()}\n")
    
    counter = Counter(agreed['median'])
    print(tabulate(sorted(counter.items()), headers=["Median score", "Count"]))
    
    accepted = decision_df(df, "accept")
    rejected = decision_df(df, "reject")
    print("\n")
    print(f"Preliminarily accepted (median score >= 4): {len(accepted)}\n")
    print(f"Rejected (median score < 3): {len(rejected)}\n")

    counter = Counter(accepted["speakers"])
    duplicate_speakers = {k: v for k, v in counter.items() if v > 1}
    if duplicate_speakers:
        print("Authors with more than one accepted submission:\n")
        print(tabulate(duplicate_speakers.items(), headers=["Name", "#"]))
    else:
        print("Authors with more than one accepted submission: None")
    
    print("\n")
    print("Submissions without consensus (scores range >= 3)")
    print("==========================================================")
    disagreed = disagreed_df(df)
    print(f"Total: {disagreed['range'].count()}\n")
    
    disagreed_scores = sorted(
        zip(
            disagreed["range"],
            disagreed["median"], 
            disagreed["min"], 
            disagreed["min_counts"], 
            disagreed["max"], 
            disagreed["max_counts"],
            disagreed["reviewers"],
        ),
        reverse=True
    )
    
    
    print(
        tabulate(
            [(scores[0], scores[1], f"{scores[2]} ({scores[3]})", f"{scores[4]} ({scores[5]})", scores[6]) for scores in disagreed_scores], 
            headers=["Range", "Median", "Min (# reviewers)", "Max (# reviewers)", "# reviewers"]
        )
    )
    

In [None]:
reviews_by_submissions = get_reviews_by_submission(all_submissions, reviews)

In [None]:
talks_df = summary_dataframe(reviews_by_submissions["talks"])

In [None]:
workshops_df = summary_dataframe(reviews_by_submissions["workshops"])

In [None]:
summarise(talks_df)


In [None]:
summarise(workshops_df)

In [None]:
def export_df(df):
    df = df.sort_values("median", ascending=False)
    reviewer_cols = [col for col in df.columns if col.startswith("reviewer_")]
    submission_type = df.iloc[0].submission_type.lower()
    cols = ["title", "speakers", "url", "median", "min", "max", "range", "decision_prelim", *reviewer_cols]
    if submission_type == "workshop":
        cols.insert(2, "duration")
    export_df = df[cols] 
    export_df.to_csv(f"dce2023_{submission_type}s_reviews.csv", index=False)
    
    disagreed_export_df = disagreed_df(export_df)
    disagreed_export_df.to_csv(f"dce2023_{submission_type}s_reviews_without_consensus.csv", index=False)

In [None]:
export_df(talks_df)

In [None]:
export_df(workshops_df)