---
title: Current Software Review Stats
subtitle: pyOpenSci Peer Review Summary Stats
license:
  code: BSD-3-Clause
---

This is a workflow that colates all GitHub issues associated with our reviews. 

In [1]:
# https://github.com/ryantam626/jupyterlab_code_formatter
import warnings
from datetime import datetime, timezone

import altair as alt
import pandas as pd
import pytz
from pyosmeta import ProcessIssues
from pyosmeta.github_api import GitHubAPI

from pyosmetrics.functions import count_edits_by_quarter

# Suppress all warnings
warnings.filterwarnings("ignore")

In [2]:
# Open Reviews

In [3]:
# Get a list of reviews submitted to us
# This potentially doesn't include issues that were deemed out of scope...
github_api = GitHubAPI(
    org="pyopensci",
    repo="software-submission",
    labels=[
        "0/seeking-editor",
        "1/editor-assigned",
        "2/seeking-reviewers",
        "3/reviewers-assigned",
        "4/reviews-in-awaiting-changes",
        "5/awaiting-reviewer-response",
        "7/under-joss-review",
        "8/joss-review-complete",
        "New Submission!",
    ],
)
process_review = ProcessIssues(github_api)
issues = process_review.get_issues()
reviews, errors = process_review.parse_issues(issues)
review_table = [
    {
        "package_name": name,
        "created_at": review.created_at,
        "date_closed": review.closed_at,
        "editor": review.editor.github_username,
        # "editor": review.editor.name,
        "labels": review.labels,
    }
    for name, review in reviews.items()
]

reviews_df = pd.DataFrame(review_table)
open_reviews = reviews_df[reviews_df["date_closed"].isna()]

today = datetime.now(timezone.utc)
open_reviews["days_open"] = (today - open_reviews["created_at"]).dt.days
open_reviews.drop(columns=["date_closed"], inplace=True)
open_reviews["created_at"] = open_reviews["created_at"].dt.date
total_open = len(open_reviews)

## Current open reviews & total days open

pyOpenSci currently has **{eval}`total_open`** total open submissions.

In [4]:
open_reviews

Unnamed: 0,package_name,created_at,editor,labels,days_open
0,Solar Data Tools,2024-08-17,TBD,[0/seeking-editor],11
1,THzTools,2024-08-01,TBD,[0/seeking-editor],27
2,BlackMarblePy,2024-07-18,yeelauren,[1/editor-assigned],41
3,MontePy,2024-07-01,kellyrowland,[3/reviewers-assigned],58
4,Great Tables,2024-06-14,Batalex,[1/editor-assigned],75
5,Stingray,2024-06-14,hamogu,"[2/seeking-reviewers, astropy]",75
6,Fluidimage,2024-05-30,TBD,[0/seeking-editor],90
7,astrodata,2024-05-13,hamogu,"[3/reviewers-assigned, astropy]",106
8,QuadratiK,2024-05-13,isabelizimm,[4/reviews-in-awaiting-changes],107
9,PyPartMC,2024-05-03,russbiggs,[1/editor-assigned],117


In [5]:
# Get Presubmission inquiries
gh_presubmissions = GitHubAPI(
    org="pyopensci", repo="software-submission", labels=["presubmission"]
)
process_review = ProcessIssues(gh_presubmissions)
pre_issues = process_review.get_issues()
pre_submissions, errors = process_review.parse_issues(pre_issues)
pre_submission_table = [
    {
        "package_name": name,
        "created_at": review.created_at,
        "date_closed": review.closed_at,
        "labels": review.labels,
    }
    for name, review in pre_submissions.items()
]

In [6]:
presubmission_df = pd.DataFrame(pre_submission_table)
all_presubmissions = len(presubmission_df)

In [7]:
# Get all currently open presubmissions
open_presubmissions = presubmission_df[presubmission_df["date_closed"].isna()]
today = datetime.now(timezone.utc)
open_presubmissions["days_open"] = (today - open_presubmissions["created_at"]).dt.days
open_presubmissions["created_at"] = open_presubmissions["created_at"].dt.date
open_presubmissions.reset_index(drop=True, inplace=True)

total_open = len(open_presubmissions)

# Stylize table columns
open_presubmissions.sort_values(by="created_at", ascending=False, inplace=True)
open_presubmissions.drop(columns=["date_closed"], inplace=True)

## All presubmissions

There are **{eval}`all_presubmissions`** total presubmissions to date, including closed presubmissions. 

## Currently open software presubmission inquiries

* It could be useful to grab the most recent comments on each
* It would also be useful to grab the gh usernames of all people involved in the discussion and credit them. So for one i see astropy editors + alex being involved.

There are **{eval}`len(open_presubmissions)` presubmission requests** currently open. 


In [8]:
# # Render table of strictly open presubmissions
open_presubmissions

Unnamed: 0,package_name,created_at,labels,days_open
0,gentropy,2024-05-24,[presubmission],96
1,GALAssify,2024-05-24,[presubmission],96


In [9]:
# Static list of all editors, updated 7/13/2024
# TODO: get this list of current editors dynamically
all_editors = [
    "cmarmo",
    "dhomeier",
    "ocefpaf",
    "NikleDave",
    "SimonMolinsky",
    "Batalex",
    "sneakers-the-rat",
    "tomalrussel",
    "ctb",
    "mjhajharia",
    "hamogu",
    "isabelizimm",
    "yeelauren",
    "banesullivan",
]
submissions_per_editor = {}
for editor in all_editors:
    submissions_per_editor[editor] = 0

# Get table of editors who are currently assigned to an open submission
busy_editors = open_reviews.drop(open_reviews[open_reviews.editor == "TBD"].index)
busy_editors = busy_editors["editor"]

# Populate dictionary of number of open submissions per editor
for editor in busy_editors:
    if not (editor in submissions_per_editor):
        submissions_per_editor[editor] = 0
        # Add new editor to current editor list
        # for debugging only, TODO: output to error log
        # print("Editor", editor, "has an assigned project but is not in master list of editors")

    submissions_per_editor[editor] = submissions_per_editor[editor] + 1

# Render table of all editors and their number of open submissions
editor_activity_df = pd.DataFrame(
    list(submissions_per_editor.items()), columns=["editor", "num_submissions"]
)
editor_activity_df = editor_activity_df.sort_values(by="num_submissions")
editor_activity_df.reset_index(drop=True, inplace=True)

# Get counts of available and unavailable editors
num_busy_editors = editor_activity_df[editor_activity_df["num_submissions"] > 0].shape[
    0
]
num_available_editors = len(editor_activity_df) - num_busy_editors

## Available editors

There are currently **{eval}`num_available_editors` available editors** and **{eval}`num_busy_editors` editors who are assigned to a submission**.

In [10]:
# Display editor table
editor_activity_df

Unnamed: 0,editor,num_submissions
0,ocefpaf,0
1,NikleDave,0
2,SimonMolinsky,0
3,banesullivan,0
4,tomalrussel,0
5,mjhajharia,0
6,cmarmo,1
7,russbiggs,1
8,kellyrowland,1
9,isabelizimm,1


In [11]:
# Get all currently closed / approved issues

# calculate time that they were in review.

## Editors load

In [12]:
ignore_editors = ["TBD"]
editors = pd.unique(reviews_df.editor)

In [13]:
n_edits = count_edits_by_quarter(reviews_df)["n_edits"].to_frame()
for editor in ignore_editors:
    n_edits.drop(editor, inplace=True)
n_edits

Unnamed: 0_level_0,Unnamed: 1_level_0,n_edits
editor,created_at,Unnamed: 2_level_1
Batalex,2024Q2,1
NimaSarajpoor,2024Q2,1
cmarmo,2024Q2,1
ctb,2024Q1,1
dhomeier,2024Q1,1
hamogu,2024Q2,2
isabelizimm,2024Q2,1
kellyrowland,2024Q3,1
lwasser,2019Q1,1
lwasser,2019Q4,1


In [14]:
# Get axes min and max values so all plots have the same scale

# Get vmin and vmax for the dates
min_date = reviews_df.created_at.min().to_period("Q").start_time
end_of_this_quarter = pd.Timestamp.today().to_period("Q").end_time
time_domain = (min_date, end_of_this_quarter)

# Get vmin and vmax for the number of edits per quarter
n_edits_domain = (0, n_edits.values.max())

In [15]:
edits = (
    reviews_df
    .rename(columns=dict(created_at="Date"))
    .copy()
)

In [16]:
charts = []
for editor in editors:
    if editor in ignore_editors:
        continue

    chart = (
        alt.Chart(edits.loc[edits.editor == editor])
        .mark_bar()
        .encode(
            x=alt.X("yearquarter(Date):T").scale(domain=time_domain),
            y=alt.Y("count(package_name)", title="Number of edits per quarter").scale(
                domain=n_edits_domain
            ),
            tooltip=["yearquarter(Date)", "count(package_name)"],
        )
        .properties(
            title=alt.TitleParams(text=f"{editor}", fontSize=18),
            width=600,
            height=200,
        )
    )
    charts.append(chart)

full_chart = alt.vconcat(*charts)
full_chart.show()