diff --git a/.github/workflows/update-pr-data.yml b/.github/workflows/update-pr-data.yml index 3710323..6e016d5 100644 --- a/.github/workflows/update-pr-data.yml +++ b/.github/workflows/update-pr-data.yml @@ -45,8 +45,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.PROJECTS_READ }} run: python scripts/get-sprint-data.py - #- name: Update editors - # run: python scripts/get-editors.py + - name: Update editors + if: github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + ( github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name == github.repository ) + env: + GITHUB_TOKEN: ${{ secrets.PYOS_GHA_TEAMS_READ }} + run: python scripts/get-editors.py - name: get-review-contributors run: python scripts/get-review-contributors.py - name: get-package-data diff --git a/_data/editorial_team_domains.csv b/_data/editorial_team_domains.csv index 9e2e332..6c2849d 100644 --- a/_data/editorial_team_domains.csv +++ b/_data/editorial_team_domains.csv @@ -1,18 +1,18 @@ -gh_username,active,first_name,last_name,country,state,OS,Domain_areas,Description,technical_areas -ab93,yes,Avik,Basu,United States,California,"Mac, Linux","NLP, text analysis, Linguistics, Mathematics, Statistics, ML, AI, Computer sciences, Education","Deep Learning, time series, industry data science, deep unsupervised learning, ML in finance ","Data visualization, Data munging, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Docker, Tool usability / accessibility, Python best practices" -banesullivan,yes,Bane,Sullivan,United States,California,Mac,"Spatial data, spatial analysis, GIS, Geosciences / earth science, 3D visualization","Remote sensing of the environment and subsurface, developer advocacy, data science, 3D visualization","Data visualization, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Docker, Tool usability / accessibility" -batalex,yes,Alexandre,Batisse,France,,,"Statistics, ML, AI, Computer sciences, Bioinformatics","I work as a Data Scientist on health care data. I conduct epidemiology studies and maintain private packages (analytics, dataviz).","Data visualization, Data extraction & retrieval, Data munging, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Docker" -cmarmo,yes,Chiara,Marmo,United States,Hawaii,Linux,"Spatial data, spatial analysis, GIS, Space sciences, Geosciences / earth science, Astronomy","Data processing in Astronomy, Planetary Sciences, Geospatial data. Standard development, interoperability.","Data extraction & retrieval, Data munging, Data deposition, Documentation quality, Continuous Integration" -coatless,yes,James,Balamuta,United States,California,"Mac, Linux","NLP, text analysis, Spatial data, spatial analysis, GIS, Mathematics, Statistics, ML, AI, Computer sciences, Bioinformatics, Education","Latent variable modeling, restricted latent class models, deep learning, computational statistics, psychometrics, item response theory, biostatistics, genomics","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Web scraping, Security, Docker, Tool usability / accessibility" -crhea93,yes,Carter,Rhea,Canada,,,"NLP, text analysis, Spatial data, spatial analysis, GIS, Physics, Mathematics, Statistics, ML, AI, Computer sciences, Hydrology, Space sciences, Geosciences / earth science",Astronomical Image and Spectral Pipeline and Analysis,"Data visualization, Data extraction & retrieval, Documentation quality, Unit Testing, Web API's, Tool usability / accessibility" -ctb,yes,Titus,,,,,,, -eliotwrobson,yes,Eliot,Robson,United States,Illinois,"Windows, Linux","Mathematics, Computer sciences, Education","Algorithms, specifically involving randomness, geometry, and graph theory.","Data visualization, Data extraction & retrieval, Data munging, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming" -hamogu,yes,Hans Moritz,Günther,United States,MA,"Mac, Linux","Physics, Astronomy","Astronomy wit ha focus on star formation and high-energy observations, also instrument development","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming" -haozeke,yes,Rohit,Goswami,Switzerland,Vaud,,"Physics, Chemistry, Mathematics, Statistics, ML, AI","Transition state searches, kinetic monte carlo, excited state calculations, heavy element (relativistic) calculations, Gaussian Process Regression, Bayesian Hierarchical models, Numerical lasing studies, molecular dynamics","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Docker" -jonas-eschle,yes,Jonas,Eschle,Switzerland,Geneva,,"Physics, Statistics, ML, AI",statistical analysis of physics at CERN,"Data visualization, Data extraction & retrieval, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming" -julimillan,Yes,Julieta,Millan,Argentina,Buenos Aires,,"Statistics, ML, AI, Ecology / Biology","Biology, neuroscience, industry data science","Data visualization, Data extraction & retrieval, Python package structure, Documentation quality, Object oriented programming, Tool usability / accessibility" -mjhajharia,,Meenal,,,,,,, -simonmolinsky,yes,Simon,Molinsky,,,,,, -slobentanzer,,Sebastian,Lobentanzer,,,,,, -tkoyama010,yes,Tetsuo,Koyama,Japan,Tokyo,Linux,"Physics, Mathematics",Scientific computing,"Data visualization, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Tool usability / accessibility" -yeelauren,yes,Lauren,Yee,Canada,Ontario,"Windows, Mac, Linux","Spatial data, spatial analysis, GIS, Statistics, ML, AI, Ecology / Biology, Epidemiology, Geosciences / earth science","data scientist, consultant, machine learning and remote sensing, ecology based projects, computer vision, deep learning","Data visualization, Data extraction & retrieval, Data munging, Documentation quality, Web scraping, Docker, Tool usability / accessibility" \ No newline at end of file +gh_username,active,first_name,last_name,country,state,OS,Domain_areas,Description,technical_areas +ab93,yes,Avik,Basu,United States,California,"Mac, Linux","NLP, text analysis, Linguistics, Mathematics, Statistics, ML, AI, Computer sciences, Education","Deep Learning, time series, industry data science, deep unsupervised learning, ML in finance ","Data visualization, Data munging, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Docker, Tool usability / accessibility, Python best practices" +banesullivan,yes,Bane,Sullivan,United States,California,Mac,"Spatial data, spatial analysis, GIS, Geosciences / earth science, 3D visualization","Remote sensing of the environment and subsurface, developer advocacy, data science, 3D visualization","Data visualization, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Docker, Tool usability / accessibility" +batalex,yes,Alexandre,Batisse,France,,,"Statistics, ML, AI, Computer sciences, Bioinformatics","I work as a Data Scientist on health care data. I conduct epidemiology studies and maintain private packages (analytics, dataviz).","Data visualization, Data extraction & retrieval, Data munging, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Docker" +cmarmo,yes,Chiara,Marmo,United States,Hawaii,Linux,"Spatial data, spatial analysis, GIS, Space sciences, Geosciences / earth science, Astronomy","Data processing in Astronomy, Planetary Sciences, Geospatial data. Standard development, interoperability.","Data extraction & retrieval, Data munging, Data deposition, Documentation quality, Continuous Integration" +coatless,yes,James,Balamuta,United States,California,"Mac, Linux","NLP, text analysis, Spatial data, spatial analysis, GIS, Mathematics, Statistics, ML, AI, Computer sciences, Bioinformatics, Education","Latent variable modeling, restricted latent class models, deep learning, computational statistics, psychometrics, item response theory, biostatistics, genomics","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Web scraping, Security, Docker, Tool usability / accessibility" +crhea93,yes,Carter,Rhea,Canada,,,"NLP, text analysis, Spatial data, spatial analysis, GIS, Physics, Mathematics, Statistics, ML, AI, Computer sciences, Hydrology, Space sciences, Geosciences / earth science",Astronomical Image and Spectral Pipeline and Analysis,"Data visualization, Data extraction & retrieval, Documentation quality, Unit Testing, Web API's, Tool usability / accessibility" +ctb,yes,Titus,,,,,,, +eliotwrobson,yes,Eliot,Robson,United States,Illinois,"Windows, Linux","Mathematics, Computer sciences, Education","Algorithms, specifically involving randomness, geometry, and graph theory.","Data visualization, Data extraction & retrieval, Data munging, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming" +hamogu,yes,Hans Moritz,Günther,United States,MA,"Mac, Linux","Physics, Astronomy","Astronomy wit ha focus on star formation and high-energy observations, also instrument development","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming" +haozeke,yes,Rohit,Goswami,Switzerland,Vaud,,"Physics, Chemistry, Mathematics, Statistics, ML, AI","Transition state searches, kinetic monte carlo, excited state calculations, heavy element (relativistic) calculations, Gaussian Process Regression, Bayesian Hierarchical models, Numerical lasing studies, molecular dynamics","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Docker" +jonas-eschle,yes,Jonas,Eschle,Switzerland,Geneva,,"Physics, Statistics, ML, AI",statistical analysis of physics at CERN,"Data visualization, Data extraction & retrieval, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming" +julimillan,Yes,Julieta,Millan,Argentina,Buenos Aires,,"Statistics, ML, AI, Ecology / Biology","Biology, neuroscience, industry data science","Data visualization, Data extraction & retrieval, Python package structure, Documentation quality, Object oriented programming, Tool usability / accessibility" +mjhajharia,,Meenal,,,,,,, +simonmolinsky,yes,Simon,Molinsky,,,,,, +slobentanzer,,Sebastian,Lobentanzer,,,,,, +tkoyama010,yes,Tetsuo,Koyama,Japan,Tokyo,Linux,"Physics, Mathematics",Scientific computing,"Data visualization, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Tool usability / accessibility" +yeelauren,yes,Lauren,Yee,Canada,Ontario,"Windows, Mac, Linux","Spatial data, spatial analysis, GIS, Statistics, ML, AI, Ecology / Biology, Epidemiology, Geosciences / earth science","data scientist, consultant, machine learning and remote sensing, ecology based projects, computer vision, deep learning","Data visualization, Data extraction & retrieval, Data munging, Documentation quality, Web scraping, Docker, Tool usability / accessibility" diff --git a/_data/emeritus_editor_domains.csv b/_data/emeritus_editor_domains.csv index f4a06e7..01ffef7 100644 --- a/_data/emeritus_editor_domains.csv +++ b/_data/emeritus_editor_domains.csv @@ -1,5 +1,8 @@ gh_username,active,first_name,last_name,country,state,OS,Domain_areas,Description,technical_areas -isabelizimm,yes,Isabel,Zimmerman,USA,Florida,Mac,"Statistics, ML, AI, Computer sciences",building IDEs and MLOps Python frameworks,"Data visualization, Data extraction & retrieval, Python package structure, Documentation quality, Continuous Integration" -lwasser,,,,,,,,, -nimasarajpoor,,,,,,,,, -sneakers-the-rat,yes,Jonny,Saunders,US,CA,"Mac, Linux","NLP, text analysis, Linguistics, Ecology / Biology, Bioinformatics, Bibliometrics (Library science, scientific literature access), Social Sciences","Formerly auditory neuroscience, animal models of phonetics. Realtime experimental hardware/software, low-resource computing, data modeling and schema creation. Currently peer-to-peer social/data systems.","Data visualization, Data extraction & retrieval, Data munging, Data deposition, Python package structure, Documentation quality, Unit Testing, Continuous Integration, Object oriented programming, Web API's, Web scraping, Security, Tool usability / accessibility" +cmarmo,False,Chiara,Marmo,United States,Hawaii,Linux,"Spatial data, spatial analysis, GIS, Space sciences, Geosciences / earth science, Astronomy","Data processing in Astronomy, Planetary Sciences, Geospatial data. Standard development, interoperability.","Data extraction & retrieval, Data munging, Data deposition, Documentation quality, Continuous Integration" +isabelizimm,False,,,,,,,, +lwasser,False,,,,,,,, +nickledave,False,,,,,,,, +nimasarajpoor,False,,,,,,,, +sneakers-the-rat,False,,,,,,,, +xmnlab,False,,,,,,,, diff --git a/peer-review/editorial-dashboard.qmd b/peer-review/editorial-dashboard.qmd index 93b40ea..14ab0c7 100644 --- a/peer-review/editorial-dashboard.qmd +++ b/peer-review/editorial-dashboard.qmd @@ -109,17 +109,22 @@ active_editor_counts = ( .rename_axis("gh_username") .reset_index(name="count") ) + ``` ```{python} # Open editor team data # The total list is derived from the scripts/editors.py script which parses the (active) editorial team data. +# The data opened in this section is collected using the scripts/get-editors.py workflow. editor_path = Path.cwd().parents[0] / "_data" / "editorial_team_domains.csv" all_editors = pd.read_csv(editor_path) all_editors = all_editors[["gh_username","first_name","Domain_areas","Description","technical_areas"]] +emeritus_editor_path = Path.cwd().parents[0] / "_data" / "emeritus_editor_domains.csv" +emeritus_editors = pd.read_csv(emeritus_editor_path) + editor_activity = ( all_editors.merge(active_editor_counts, on="gh_username", how="left") .fillna({"count": 0}) @@ -133,15 +138,31 @@ busy_editors = editor_activity[editor_activity["count"] != 0] num_available_editors = len(available_editors) # Rename and cleanup -editor_activity=editor_activity.rename(columns={"count": "active review count"}) +editor_activity = editor_activity.rename(columns={"count": "active review count"}) col = editor_activity.pop("active review count") editor_activity.insert(1, "active review count", col) -``` +# Next, compare editor activity to editors that want to offboard after that review +offboard_usernames = emeritus_editors["gh_username"] +active_editors = open_reviews["editor"].dropna() +active_editors = active_editors[active_editors != "TBD"] +active_editors = active_editors.astype(str).str.split(r"[ ,&]", n=1).str[0] + +# TODO: note that blockingpy by data need to be updated - carter should be first here... +# TODO - filter reviews df by editors that are in the emeritus df (emeritus_editors) +offboard_usernames = emeritus_editors["gh_username"] + +# This doesn't work - pick up here +# offboard_reviews = reviews[editor.isin(offboard_usernames)].copy() +# offboard_reviews + +# Active editors are ones that are currently leading reviews. But some may intend to offboard after. Generate a list of reviews lead by those +offboarding_editor_reviews = open_reviews[open_reviews["editor"].isin(offboard_usernames)].copy() +``` -## Row {height=.5%} +## Row {height=auto} ```{python} #| content: valuebox @@ -176,29 +197,42 @@ dict( ) ``` -## Row {height=1%} +## Row {height=5%} + ```{python} -#| title: "pyOpenSci Active Editorial Team" +#| title: "pyOpenSci Current Active Editorial Review Counts" show(editor_activity) ``` -## Row {height=1%} +## Row {height=auto} + +```{python} +#| title: "Editors Leading Reviews That Are or Have Offboarded After" + +# TODO: The table below is a good start but it should capture AMS lead by Nima as well. And the one that Chiara still has opened. +print("Below are reviews that the peer review lead and Editor in Chief should watch. We may need to assign an additional editor the ensure these reviews moves forward.") + +offboarding_editor_reviews = offboarding_editor_reviews.drop(columns=["date_accepted", "Categories"], errors="ignore") +offboarding_editor_reviews["Date Opened"] = pd.to_datetime(offboarding_editor_reviews["Date Opened"]).dt.strftime("%Y-%b-%d") + +offboarding_editor_reviews + +``` + +## Row {height=auto} ```{python} #| title: "Busy editors running reviews " show(busy_editors) ``` -## Row {height=.8%} +## Row {height=auto} ```{python} #| title: "Available Editors" show(available_editors) ``` - - ```{python} -# TODO: make this focus only on current open reviews vs all reviews over time. # Get a list of all editors over time that have supported pyOpenSci ignore_editors = ["TBD"] @@ -234,23 +268,23 @@ edits = reviews.rename(columns={"Date Opened": "Date"}).copy() ```{python} -# TODO: If this uses open_reviews it's only showing current load -# if it uses the reviews df it's showing reviews all time 2019 to present. open_reviews has a slightly different structure +# Ensure datetime and a clean quarter label +# 2023 is when we started running again with funding edits = reviews[["editor", "Name", "Date Opened"]] edits = edits.rename(columns={"Date Opened": "Date", "Name":"package_name"}) edits = edits[edits["editor"] != "TBD"] +edits["Date"] = pd.to_datetime(edits["Date"]) +edits["Year"] = edits["Date"].dt.year +edits = edits[edits["Year"] >= 2023] ``` -## Editor availability +## Editor Activity by Quarter -## Row {height=6%} +## Row {height=auto} ```{python} -# Cleanup -# Ensure datetime and a clean quarter label -edits["Date"] = pd.to_datetime(edits["Date"]) -edits["Year"] = edits["Date"].dt.year +# Add quarter counts edits["QuarterNum"] = edits["Date"].dt.quarter edits["QuarterLabel"] = edits["Year"].astype(str) + " Q" + edits["QuarterNum"].astype(str) @@ -276,6 +310,7 @@ df_full = ( .reindex(full_index, fill_value=0) .reset_index() ) + ``` ```{python} @@ -285,7 +320,7 @@ df["QuarterLabel"] = pd.Categorical(df["QuarterLabel"], categories=quarter_order facet_wrap = 2 num_editors = len(df["editor"].unique()) num_rows = (num_editors + facet_wrap - 1) // facet_wrap -row_height = 400 +row_height = 300 fig = px.bar( df, @@ -296,8 +331,8 @@ fig = px.bar( facet_col_spacing=0.06, facet_col_wrap=facet_wrap, color_discrete_sequence=["indigo"], - labels={"count": "Number of edits", "QuarterLabel": "Quarter"}, - title="Editor Activity by Quarter (Current Editor Team)", + labels={"count": "Number of edits"}, + title="Review Count by Quarter (Since 2023)", height=row_height * num_rows, width=1200, ) @@ -308,16 +343,16 @@ fig = fig.for_each_annotation( fig = fig.update_xaxes( tickangle=45, tickfont=dict(size=10), - title_text="Quarter", showticklabels=True ) fig = fig.update_yaxes( dtick=1, tickformat=",d", - title_text="Number of edits", + title_text="Review Count", range=[0, 4] ) fig = fig.update_layout( + xaxis_title="", showlegend=False, margin=dict(t=80), title_font_size=24, @@ -326,3 +361,9 @@ fig = fig.update_layout( fig.show() ``` + +## Row {height=auto} + +## Summary + +This is the end of the dashboard. \ No newline at end of file diff --git a/scripts/get-editors.py b/scripts/get-editors.py index 405f089..7207323 100644 --- a/scripts/get-editors.py +++ b/scripts/get-editors.py @@ -1,23 +1,34 @@ """This script updates our editorial team csv file with the most current editors. -1. It parses a partially manually created list of editors found int he csv -file: `_data/editorial_team_domains`. This csv was initially created by -manually adding editor names to the file with domain areas from our google sheet. -The (private) google sheet collects what domains they can support when they -apply to be an editor -2. It then hits the github api to return the list of gh usernames from the editorial team on GitHub -When we onboard a new editor, we add them to that team so they have proper permissions in repos in our org. -The GitHub team data are grabbed using graphQL. - -3. Finally, this script merges the data parsed from the team with the csv file. - -The output is a csv file called _data/editorial_team_domains.csv that can be -used to parse editor data. +1. It parses a manually created list of editors found in the csv +file: `_data/editorial_team_domains`. This csv was created by +manually adding editor names to the file with domain areas from our Google sheet. +The (private) google sheet is generated from a google form that collects editor +expertise and domains when they apply to be an editor with us. +2. The script, uses the GitHub API to return the list of GitHub usernames from +the editorial team on GitHub. (see) + * https://github.com/orgs/pyOpenSci/teams/emeritus-editors/members + * https://github.com/orgs/pyOpenSci/teams/editorial-board/members +When we onboard a new editor, we add them to the editorial-board GitHub team so +they have proper permissions in repositories in our organization. When an editor +wishes to step down, we move them to the emeritus-editors team. However, they +may still be active in reviews so we keep them on the editorial-board team until +they have completed all of their reviews. + +The GitHub team data are collected using the GitHub graphQL interface. + +3. Finally, this script merges the data parsed from the team with the .csv file. + +This script creates two .csv files. The _data/editorial_team_domains.csv contains +all currently "activate" editors. _data/emeritus_editor_domains.csv contains +editors that are either fully offboarded of intend to offboard after theyr currently +active reviews. TODO: * it would be good to find a more automated way to get the domain data from our -google sheet. one way to do this would be to create a new spreadsheet that -pulls from our editor signup but only contains gh username and then the domain areas. +Google Sheet. one way to do this would be to create a new spreadsheet that +pulls from our editor signup but only contains gh username and then the domain +areas. """