In [1]:
from collections import Counter
from functools import reduce
import json
import os
from pathlib import Path
from urllib.parse import urljoin
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import pandas as pd
import requests
from tabulate import tabulate

In [2]:
def api_get(path):
    api_url = urljoin("https://pretalx.com/api/v2", path)
    resp = requests.get(api_url, headers={"Authorization": f"Token {os.getenv('TOKEN')}"})
    return resp.json()

In [3]:
def get_submissions():
    """Get all submissions, irrespective of status"""
    submissions_path = Path("submissions.json")
    if submissions_path.exists():
        return json.load(submissions_path.open())
    else:
        resp = api_get("events/djangocon-europe-2023/submissions/?limit=200")
        submissions = {
            result["code"]: result for result in resp["results"]
        }
        with submissions_path.open("w") as out:
            json.dump(submissions, out)
        return submissions

In [4]:
all_submissions = get_submissions()

In [5]:
def export_submissions():
    submissions = get_submissions()
    for_export = {
        subm['code']: {
            "speakers": ", ".join(speaker["name"] for speaker in subm["speakers"]),
            "title": subm["title"],
            "abstract": subm["abstract"],
        } for subm in submissions.values() if subm["state"] == "submitted"
    }
    df = pd.DataFrame.from_dict(for_export, orient='index')
    df.to_csv("submitted.csv")

In [6]:
def get_reviews():
    """Get all individual reviews"""
    page = 1
    reviews = []
    url = f"events/djangocon-europe-2023/reviews?page={page}&limit=100"
    while True:
        resp = api_get(url)
        reviews.extend(resp["results"])
        if resp["next"]:
            url = resp["next"]
        else:
            break
    return reviews

In [7]:
reviews = get_reviews()

In [8]:
def categorised_submissions():
    """
    Assumes submissions have been exported with export_submissions() and categorised offline to add
    a 'category' column
    """
    return pd.read_csv("submitted_with_categories.csv",index_col="code")
    

In [38]:
def get_reviews_by_submission(all_submissions, reviews):
    """
    Collate reviews by submission, and add in the submission URL, speaker info and title, and category.
    Return a dict of review dicts, separated by submission type.
    """
    
    categories_df = categorised_submissions()
    
    all_reviewers = {review["user"] for review in reviews}
    reviewer_key = {reviewer: f"reviewer_{i}" for i, reviewer in enumerate(all_reviewers, start=1)}
    reviews_by_submission = {}
    for review in reviews:
        code = review["submission"]
        reviewer_code = reviewer_key[review["user"]]
        reviews_by_submission.setdefault(
            code, {"url": f"https://pretalx.com/orga/event/djangocon-europe-2023/submissions/{code}/reviews"}
        )[reviewer_code] = review["score"]
    
    talks = {}
    workshops = {}
    for code in reviews_by_submission:
        submission = all_submissions[code]
        submission_type = submission["submission_type"] if isinstance(submission["submission_type"], str) else submission["submission_type"]["en"]
        submission_info = {
            "speakers":  ", ".join([speaker["name"] for speaker in submission["speakers"]]),
            "submission_type": submission_type,
            "title": submission["title"],
            "duration": submission["duration"],
            "category": categories_df.loc[code].category
        }
        review_data = {**reviews_by_submission[code], **submission_info}
        if submission_type == "Talk":
            talks[code] = review_data
        else:
            assert submission_type == "Workshop"
            workshops[code] = review_data

    assert len(talks) + len(workshops) == len(reviews_by_submission)
    return {"talks": talks, "workshops": workshops, "reviewers": reviewer_key}


In [10]:
def summary_dataframe(input_dict):
    """
    Convert a review dict to a dataframe and calculate:
    - mean
    - median
    - min
    - max
    - range
    - total number of reviewers
    - number of reviewers rating the min score
    - number of reviewers rating the max score
    - a preliminary decision based on median scores for submissions with consensus in their reviews
    """
    df = pd.DataFrame.from_dict(input_dict, orient='index')
    df = df.reindex(sorted(df.columns), axis=1)
    reviewer_cols = [col for col in df.columns if col.startswith("reviewer_")]
    for col in reviewer_cols:
        df[col] = pd.to_numeric(df[col])
    df = df.assign(mean=df.loc[:, reviewer_cols].mean(axis=1, numeric_only=True))
    df = df.assign(median=df.loc[:, reviewer_cols].median(axis=1, numeric_only=True))
    df = df.assign(min=df.loc[:, reviewer_cols].min(axis=1, numeric_only=True))
    df = df.assign(max=df.loc[:, reviewer_cols].max(axis=1, numeric_only=True))
    df = df.assign(range=df.loc[:, reviewer_cols].max(axis=1, numeric_only=True) - df.loc[:, reviewer_cols].min(axis=1, numeric_only=True))
    df = df.assign(reviewers=df.loc[:, reviewer_cols].count(axis=1, numeric_only=True))
    
    def min_count(row):
        return sum(row[reviewer_cols] == row["min"])
    
    def max_count(row):
        return sum(row[reviewer_cols] == row["max"])
    
    df["min_counts"] = df.apply(min_count, axis=1)
    df["max_counts"] = df.apply(max_count, axis=1)
    
    def autodecide(row):
        if row["range"] <= 1:
            if row["median"] < 3:
                return "reject"
            if row["median"] >= 4:
                return "accept"
        return ""
    
    df["decision_prelim"] = df.apply(autodecide, axis=1)
    return df


In [11]:
def decision_df(df, decision):
    return df[df["decision_prelim"] == decision]

In [12]:
def disagreed_df(df):
    return df[df['range'] >= 3]

In [13]:
def summarise(df):
    from IPython.display import display, Markdown, Latex
    display(Markdown('*some markdown* $\phi$'))
    print(f"Total number of submissions: {len(df)}\n\n")
    
    print("Submissions with consensus (all scores within one point)")
    print("==========================================================")
    agreed = df[df['range'] <= 1]
    
    print(f"Total: {agreed['range'].count()}\n")
    
    counter = Counter(agreed['median'])
    print(tabulate(sorted(counter.items()), headers=["Median score", "Count"]))
    
    accepted = decision_df(df, "accept")
    rejected = decision_df(df, "reject")
    print("\n")
    print(f"Preliminarily accepted (median score >= 4): {len(accepted)}\n")
    print(f"Rejected (median score < 3): {len(rejected)}\n")

    counter = Counter(accepted["speakers"])
    duplicate_speakers = {k: v for k, v in counter.items() if v > 1}
    if duplicate_speakers:
        print("Authors with more than one accepted submission:\n")
        print(tabulate(duplicate_speakers.items(), headers=["Name", "#"]))
    else:
        print("Authors with more than one accepted submission: None")
    
    print("\n")
    print("Submissions without consensus (scores range >= 3)")
    print("==========================================================")
    disagreed = disagreed_df(df)
    print(f"Total: {disagreed['range'].count()}\n")
    
    disagreed_scores = sorted(
        zip(
            disagreed["range"],
            disagreed["median"], 
            disagreed["min"], 
            disagreed["min_counts"], 
            disagreed["max"], 
            disagreed["max_counts"],
            disagreed["reviewers"],
        ),
        reverse=True
    )
    
    
    print(
        tabulate(
            [(scores[0], scores[1], f"{scores[2]} ({scores[3]})", f"{scores[4]} ({scores[5]})", scores[6]) for scores in disagreed_scores], 
            headers=["Range", "Median", "Min (# reviewers)", "Max (# reviewers)", "# reviewers"]
        )
    )
    

In [14]:
def summarise_md(df):    
    display(Markdown(f"# {df.iloc[0].submission_type}s"))
    display(Markdown(f"Total number of submissions: {len(df)}\n\n"))
    
    display(Markdown("## Submissions with consensus (all scores within one point)"))
    
    agreed = df[df['range'] <= 1]
    
    display(Markdown(f"Total: {agreed['range'].count()}\n"))
    
    counter = Counter(agreed['median'])    
    rows = '\n'.join(f"|{row[0]}|{row[1]}|" for row in sorted(counter.items()))
    display(Markdown(f"|Median score|Count|\n|--|--|\n{rows}"))
    
    accepted = decision_df(df, "accept")
    rejected = decision_df(df, "reject")
    display(Markdown(f"Preliminarily accepted (median score >= 4): {len(accepted)}\n"))
    display(Markdown(f"Rejected (median score < 3): {len(rejected)}\n"))

    counter = Counter(accepted["speakers"])
    duplicate_speakers = {k: v for k, v in counter.items() if v > 1}
    if duplicate_speakers:
        display(Markdown("Authors with more than one accepted submission:"))
        rows = '\n'.join(f"|{row[0]}|{row[1]}|" for row in sorted(duplicate_speakers.items()))    
        display(Markdown(f"|Name|#|\n|--|--|\n{rows}"))
    else:
        display(Markdown("Authors with more than one accepted submission: None"))
    
    display(Markdown("## Submissions without consensus (scores range >= 3)"))
    disagreed = disagreed_df(df)
    display(Markdown(f"Total: {disagreed['range'].count()}\n"))
    
    disagreed_scores = sorted(
        zip(
            disagreed["range"],
            disagreed["median"], 
            disagreed["min"], 
            disagreed["min_counts"], 
            disagreed["max"], 
            disagreed["max_counts"],
            disagreed["reviewers"],
        ),
        reverse=True
    )
    
    headers = "|Range|Median|Min (# reviewers)|Max (# reviewers)|# reviewers|"
    sep = "|---|---|---|---|---|"
    rows = [(scores[0], scores[1], f"{scores[2]} ({scores[3]})", f"{scores[4]} ({scores[5]})", scores[6]) for scores in disagreed_scores]
    rows = ["|".join(str(it) for it in row) for row in rows]
    rows = "\n".join([f"|{row}|" for row in rows])
    display(Markdown(f"{headers}\n{sep}\n{rows}"))
    
    

In [40]:
reviews_by_submissions = get_reviews_by_submission(all_submissions, reviews)

In [16]:
talks_df = summary_dataframe(reviews_by_submissions["talks"])

In [17]:
workshops_df = summary_dataframe(reviews_by_submissions["workshops"])

In [18]:
summarise_md(talks_df)


# Talks

Total number of submissions: 162



## Submissions with consensus (all scores within one point)

Total: 57


|Median score|Count|
|--|--|
|1.0|5|
|2.0|9|
|2.5|1|
|3.0|5|
|3.5|1|
|4.0|32|
|4.5|1|
|5.0|3|

Preliminarily accepted (median score >= 4): 36


Rejected (median score < 3): 15


Authors with more than one accepted submission:

|Name|#|
|--|--|
|Paolo Melchiorre|2|

## Submissions without consensus (scores range >= 3)

Total: 34


|Range|Median|Min (# reviewers)|Max (# reviewers)|# reviewers|
|---|---|---|---|---|
|4.0|4.0|1.0 (1)|5.0 (1)|6|
|4.0|4.0|1.0 (1)|5.0 (1)|5|
|4.0|3.0|1.0 (1)|5.0 (1)|6|
|4.0|3.0|1.0 (1)|5.0 (1)|5|
|4.0|3.0|1.0 (1)|5.0 (1)|5|
|4.0|3.0|1.0 (1)|5.0 (1)|5|
|3.0|4.0|2.0 (2)|5.0 (1)|5|
|3.0|4.0|2.0 (1)|5.0 (2)|5|
|3.0|4.0|2.0 (1)|5.0 (1)|5|
|3.0|4.0|2.0 (1)|5.0 (1)|5|
|3.0|4.0|1.0 (1)|4.0 (4)|5|
|3.0|4.0|1.0 (1)|4.0 (4)|5|
|3.0|4.0|1.0 (1)|4.0 (3)|5|
|3.0|4.0|1.0 (1)|4.0 (3)|5|
|3.0|4.0|1.0 (1)|4.0 (3)|5|
|3.0|3.5|1.0 (1)|4.0 (3)|6|
|3.0|3.0|2.0 (2)|5.0 (1)|5|
|3.0|3.0|2.0 (2)|5.0 (1)|5|
|3.0|3.0|2.0 (1)|5.0 (1)|5|
|3.0|3.0|2.0 (1)|5.0 (1)|5|
|3.0|3.0|1.0 (1)|4.0 (2)|6|
|3.0|3.0|1.0 (1)|4.0 (2)|5|
|3.0|3.0|1.0 (1)|4.0 (2)|5|
|3.0|3.0|1.0 (1)|4.0 (1)|6|
|3.0|3.0|1.0 (1)|4.0 (1)|6|
|3.0|3.0|1.0 (1)|4.0 (1)|5|
|3.0|3.0|1.0 (1)|4.0 (1)|5|
|3.0|3.0|1.0 (1)|4.0 (1)|5|
|3.0|3.0|1.0 (1)|4.0 (1)|5|
|3.0|2.0|2.0 (3)|5.0 (1)|5|
|3.0|2.0|1.0 (2)|4.0 (1)|6|
|3.0|2.0|1.0 (1)|4.0 (1)|6|
|3.0|2.0|1.0 (1)|4.0 (1)|5|
|3.0|2.0|1.0 (1)|4.0 (1)|5|

In [19]:
summarise_md(workshops_df)

# Workshops

Total number of submissions: 22



## Submissions with consensus (all scores within one point)

Total: 5


|Median score|Count|
|--|--|
|2.0|1|
|4.0|3|
|5.0|1|

Preliminarily accepted (median score >= 4): 4


Rejected (median score < 3): 1


Authors with more than one accepted submission: None

## Submissions without consensus (scores range >= 3)

Total: 7


|Range|Median|Min (# reviewers)|Max (# reviewers)|# reviewers|
|---|---|---|---|---|
|4.0|3.5|1.0 (1)|5.0 (1)|6|
|3.0|4.0|2.0 (1)|5.0 (1)|6|
|3.0|4.0|1.0 (1)|4.0 (3)|5|
|3.0|3.0|1.0 (2)|4.0 (2)|5|
|3.0|2.5|1.0 (1)|4.0 (1)|6|
|3.0|2.0|1.0 (1)|4.0 (2)|5|
|3.0|1.0|1.0 (3)|4.0 (1)|5|

In [20]:
def export_df(df):
    df = df.sort_values("median", ascending=False)
    reviewer_cols = [col for col in df.columns if col.startswith("reviewer_")]
    submission_type = df.iloc[0].submission_type.lower()
    cols = ["title", "speakers", "category", "url", "median", "min", "max", "range", "decision_prelim", *reviewer_cols]
    if submission_type == "workshop":
        cols.insert(2, "duration")
    export_df = df[cols] 
    export_df.to_csv(f"dce2023_{submission_type}s_reviews.csv", index=False)
    
    disagreed_export_df = disagreed_df(export_df)
    disagreed_export_df.to_csv(f"dce2023_{submission_type}s_reviews_without_consensus.csv", index=False)

In [21]:
export_df(talks_df)

In [22]:
export_df(workshops_df)

In [54]:
def plot_reviews(df):
    reviewer_cols = [col for col in df.columns if col.startswith("reviewer_")]
    plt.hist([df[col] for col in reviewer_cols], rwidth=2)
    plt.xticks(ticks=[1, 2, 3, 4, 5])
    plt.xlabel("Score")
    plt.ylabel("# submissions")
    plt.legend(reviewer_cols)
    plt.title(f"{df.iloc[0].submission_type}s")
    fig = plt.gcf()
    fig.set_size_inches(12, 8)
    plt.show()