---
title: Current Software Review Stats
subtitle: pyOpenSci Peer Review Summary Stats
license:
  code: BSD-3-Clause
---

This is a workflow that collects all GitHub issues associated with our reviews.

In [1]:
# https://github.com/ryantam626/jupyterlab_code_formatter
# TODO: calculate days open in the script that grabs the data!!
import os
import warnings
from datetime import datetime, timezone

import altair as alt
import pandas as pd
import pytz
from pyosmeta import ProcessIssues
from pyosmeta.github_api import GitHubAPI

from pyosmetrics.functions import count_edits_by_quarter

# Suppress all warnings
warnings.filterwarnings("ignore")
# Make tables nicer
pd.set_option("display.max_colwidth", None)
today = datetime.now(timezone.utc)

In [2]:
# Get all current reviews
reviews = pd.read_csv(
    "../../_data/review_submissions.csv",
    parse_dates=["date_opened", "date_closed", "last_comment_date"],
)
reviews["date_opened"] = reviews["date_opened"]
reviews["last_comment_date"] = reviews["last_comment_date"]
reviews = reviews.rename(
    columns={
        "package_name": "Name",
        "date_opened": "Date Opened",
        "date_closed": "Date Closed",
        "issue_num": "Issue",
        "description": "Description",
        "categories": "Categories",
        "last_comment_date": "Last Comment",
        "last_comment_user": "Last User to Comment",
    }
)

In [3]:
open_reviews = reviews[reviews["Date Closed"].isna()]
open_reviews.drop(columns=["Date Closed","Unnamed: 0"], inplace=True)
total_open = len(open_reviews)

In [4]:
open_reviews["Days Open"] = (today - open_reviews["Date Opened"]).dt.days
open_reviews["Date Opened"] = open_reviews["Date Opened"].dt.date
open_reviews["Last Comment"] = open_reviews["Last Comment"].dt.date

In [5]:
open_reviews["editor"]

0                  TBD
1                  TBD
2                  TBD
3                  TBD
4                  TBD
5                  TBD
6                  TBD
7                  TBD
8                  TBD
9           tkoyama010
10              cmarmo
12           yeelauren
13        kellyrowland
14             Batalex
16              cmarmo
17              hamogu
18         isabelizimm
19              cmarmo
20                 TBD
21                 TBD
22       NimaSarajpoor
24                 TBD
27                 ctb
28            dhomeier
30          zeitsperre
33    sneakers-the-rat
37         isabelizimm
40           yeelauren
Name: editor, dtype: object

## Current open reviews & total days open

pyOpenSci currently has **{eval}`total_open`** total open submissions.

## Packages that need editors

The packages below need an editor before the review can begin.

In [6]:
seeking_editor = open_reviews[
    open_reviews["labels"].str.contains("0/seeking-editor", na=False)
].copy()
seeking_editor.drop(
    columns=[ "labels", "status"], inplace=True
)
seeking_editor.reset_index(drop=True, inplace=True)

In [7]:
seeking_editor.style.set_properties(
    **{"text-align": "left", "white-space": "normal"}
)

Unnamed: 0,Name,editor,eic,Date Opened,Issue,Description,Categories,Last Comment,Last User to Comment,Days Open
0,pymultifit,TBD,coatless,2025-01-21,233,A python library for fitting data with multiple models.,['data-processing-munging'],2025-01-31,syedalimohsinbukhari,16
1,BlockingPy,TBD,coatless,2025-01-09,232,Blocking records for record linkage and deduplication with Approximate Nearest Neighbor algorithms.;,['data-processing-munging'],2025-01-21,T-Strojny,28
2,PIVA,TBD,coatless,2025-01-04,231,Visualization and analysis toolkit for experimental data from Angle-Resolved Photoemission Spectroscopy (ARPES),"['data-extraction', 'data-visualization']",2025-01-23,coatless,33
3,GREOPy,TBD,coatless,2024-12-21,227,Calculate relativistic light rays sent by an emitter to a receiver in the presence of a gravitational field.,['data-processing-munging'],2025-01-31,coatless,47
4,GALAssify,TBD,SimonMolinsky,2024-09-30,214,A Python package for visually classifying astronomical objects,"['data-validation-testing', 'data-visualization']",2024-12-02,SimonMolinsky,129
5,neonutilities,TBD,cmarmo,2024-09-25,213,neonutilities is a package for accessing and wrangling data generated and published by the National Ecological Observatory Network.,"['data-retrieval', 'data-processing-munging']",2024-12-09,cklunch,134
6,disdrodb,zeitsperre,isabelizimm,2024-01-18,156,disdrodb - A software for the decentralized archiving and standardization of global disdrometer data,"['data-retrieval', 'data-processing-munging', 'data-deposition']",2025-02-06,lwasser,385


## Seeking reviewers

These are reviews that are paused because we are searching for reviewers. 

In [8]:
seeking_reviewers = open_reviews[
    open_reviews["labels"].str.contains("2/seeking-reviewers", na=False)
].copy()
seeking_reviewers

Unnamed: 0,Name,editor,eic,Date Opened,labels,Issue,Description,Categories,status,Last Comment,Last User to Comment,Days Open
10,Solar Data Tools,cmarmo,cmarmo,2024-08-17,['2/seeking-reviewers'],210,Library of tools for analyzing photovoltaic power time-series data.,"['data-retrieval', 'data-extraction', 'data-processing-munging', 'data-visualization']",under-review,2025-02-04,cmarmo,173


In [9]:
presubmissions = pd.read_csv(
    "../../_data/review_presubmissions.csv",
    parse_dates=["date_opened", "date_closed", "last_comment_date"],
)
presubmissions["date_opened"] = presubmissions["date_opened"]
presubmissions["last_comment_date"] = presubmissions["last_comment_date"]
presubmissions = presubmissions.rename(
    columns={
        "package_name": "Name",
        "date_opened": "Date Opened",
        "date_closed": "Date Closed",
        "issue_num": "Issue",
        "description": "Description",
        "categories": "Categories",
        "last_comment_date": "Last Comment",
        "last_comment_user": "Last User to Comment",
    }
)

all_presubmissions = len(presubmissions)

In [10]:
# Get all currently open presubmissions
open_presubmissions = presubmissions[presubmissions["Date Closed"].isna()]
today = datetime.now(timezone.utc)
open_presubmissions["days_open"] = (
    today - open_presubmissions["Date Opened"]
).dt.days
open_presubmissions["Date Opened"] = open_presubmissions["Date Opened"].dt.date
open_presubmissions["Date Opened"] = open_presubmissions["Last Comment"].dt.date
open_presubmissions.reset_index(drop=True, inplace=True)

total_open = len(open_presubmissions)

# Cleanup
open_presubmissions.sort_values(by="Date Opened", ascending=False, inplace=True)
open_presubmissions.drop(columns=["Date Closed", "Unnamed: 0", "editor"], inplace=True)

## All presubmissions

There are **{eval}`all_presubmissions`** total presubmissions to date, including closed presubmissions.

## Currently open software presubmission inquiries

* It could be useful to grab the most recent comments on each
* It would also be useful to grab the gh usernames of all people involved in the discussion and credit them. So for one i see astropy editors + alex being involved.

There are **{eval}`len(open_presubmissions)` presubmission requests** currently open.


In [11]:
open_presubmissions

Unnamed: 0,Name,eic,Date Opened,labels,Issue,Description,Categories,status,Last Comment,Last User to Comment,days_open
1,VARGRAM,coatless,2025-02-06,"['presubmission', '⌛ pending-maintainer-response']",225,A Python visualization tool for genomic surveillance,"['data-processing-munging', 'data-visualization']",presubmission,2025-02-06 09:22:06+00:00,cjpalpallatoc,51
0,Litrepl,coatless,2025-02-05,"['presubmission', 'currently-out-of-scope']",226,A tool for code snippet evaluation in Markdown/LaTeX documents.,['workflow-automation'],presubmission,2025-02-05 08:00:21+00:00,coatless,50
2,c4dynamics,coatless,2025-02-05,"['presubmission', '⌛ pending-maintainer-response']",224,Python framework for algorithms of dynamic systems,[],presubmission,2025-02-05 07:38:25+00:00,coatless,52


## Editorial team status

In [12]:
# Static list of all editors, updated 7/13/2024
# Let's pull this list from unique people above who are active. 
# TODO: get this list of current editors dynamically
all_editors = [
    "cmarmo",
    "dhomeier",
    "ocefpaf",
    "NikleDave",
    "SimonMolinsky",
    "Batalex",
    "sneakers-the-rat",
    "tomalrussel",
    "ctb",
    "mjhajharia",
    "hamogu",
    "isabelizimm",
    "yeelauren",
    "banesullivan",
]
all_editors_df = pd.DataFrame({"editor": all_editors})
all_editors_df

Unnamed: 0,editor
0,cmarmo
1,dhomeier
2,ocefpaf
3,NikleDave
4,SimonMolinsky
5,Batalex
6,sneakers-the-rat
7,tomalrussel
8,ctb
9,mjhajharia


In [13]:
# Create table of editors who are currently assigned to an open submission
busy_editors = open_reviews.loc[open_reviews.editor != "TBD", ["editor"]].value_counts().reset_index()
busy_editors

Unnamed: 0,editor,count
0,cmarmo,3
1,isabelizimm,2
2,yeelauren,2
3,Batalex,1
4,NimaSarajpoor,1
5,ctb,1
6,dhomeier,1
7,hamogu,1
8,kellyrowland,1
9,sneakers-the-rat,1


In [14]:
all_editor_activity = all_editors_df.merge(busy_editors, on="editor", how="outer").fillna(0)

all_editor_activity["count"]= all_editor_activity["count"].astype(int)
all_editor_activity.sort_values(by="count", inplace=True)
all_editor_activity = all_editor_activity.reset_index(drop=True)

### Who is currently available? 

Below are editors that currently do not have submissions assigned to them.

In [15]:
# Get counts of available and unavailable editors
available_editors = all_editor_activity[all_editor_activity["count"] == 0]
all_busy_editors = all_editor_activity[all_editor_activity["count"] != 0]

num_available_editors = len(available_editors)
available_editors

Unnamed: 0,editor,count
0,NikleDave,0
1,tomalrussel,0
2,SimonMolinsky,0
3,banesullivan,0
4,ocefpaf,0
5,mjhajharia,0


## Available editors

There are currently **{eval}`len(available_editors)` available editors** and **{eval}`len(all_busy_editors)` editors who are assigned to atleast one software review**.

In [17]:
# Display editor table
all_editor_activity

Unnamed: 0,editor,count
0,NikleDave,0
1,tomalrussel,0
2,SimonMolinsky,0
3,banesullivan,0
4,ocefpaf,0
5,mjhajharia,0
6,Batalex,1
7,tkoyama010,1
8,sneakers-the-rat,1
9,kellyrowland,1


In [None]:
# TODO: calculate time that they were in review.

## Editors load

Next, we look at the load of each editor over time to get a sense of their time demands. 

NOTE: The data below represents all editors over time, not just currently active editors

In [18]:
# Get a list of all editors over time that have supported pyOpenSci
ignore_editors = ["TBD"]
# ignore lwasser and xmnlab to bring min date to a more recent date
ignore_editors += ["lwasser", "xmnlab"]
editors = [
    editor
    for editor in pd.unique(reviews.editor)
    if editor not in ignore_editors
]
editors

['tkoyama010',
 'cmarmo',
 'banesullivan',
 'yeelauren',
 'kellyrowland',
 'Batalex',
 'hamogu',
 'isabelizimm',
 'NimaSarajpoor',
 'dhomeier',
 'ctb',
 'zeitsperre',
 'ocefpaf',
 'sneakers-the-rat',
 'SimonMolinsky',
 'tomalrussell',
 'snacktavish',
 'arianesasso',
 'mjhajharia',
 'NickleDave',
 'jbencook',
 'jlpalomino',
 'luizirber']

In [19]:
n_edits = count_edits_by_quarter(reviews)["n_edits"].to_frame()


KeyError: 'n_edits'

In [None]:
for editor in ignore_editors:
    n_edits.drop(editor, inplace=True)
n_edits

In [None]:
edits = reviews_df.rename(columns=dict(created_at="Date")).copy()

In [None]:
charts = [
    alt.Chart(edits.loc[edits.editor == editor])
    .mark_bar(color="purple")
    .encode(
        x=alt.X("yearquarter(Date):T"),
        y=alt.Y("count(package_name)", title="Number of edits per quarter"),
        tooltip=["yearquarter(Date)", "count(package_name)"],
    )
    .properties(
        title=alt.TitleParams(
            text=f"{editor}",
            fontSize=18,
            orient="right",
            angle=0,
            align="right",
        ),
        width=600,
        height=200,
    )
    for editor in editors
]

full_chart = alt.vconcat(*charts).resolve_scale(x="shared", y="shared")
full_chart.show()