In [None]:
pypi_name = "pyomo"
start_date = None
end_date = None

# PyPI downloads

This Jupyter notebook fetches download statistics from PyPI, which is used to distribute the software, and plots them. The PyPI site knows about "mirrors", which represent duplicate copies of the software but not real downloads, so we can exclude them from the counts.

*Note: The statistics maintained by PyPI go back 180 days, so this script would need to be run periodically to track more than ~6 months of time.*


In [None]:
import numpy as np
import pandas as pd
import pypistats

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## Fetch statistics from PyPI
Next, we fetch the data from PyPI using the `pypistats` package. This returns a Pandas dataframe that we can manipulate to extract only the downloads without mirrors, sorting by date, and calculating the cumulative number.

In [None]:
data = (
    pypistats.overall(
        pypi_name,
        total="daily",
        format="pandas",
        start_date=start_date,
        end_date=end_date,
    )
    [lambda d: d["category"].isin(["without_mirrors"])]
    .set_index("date")
    .sort_index()
    .assign(
        cumulative_downloads=lambda d: d["downloads"].cumsum()
    )
)
data

## Generate plot
Now we plot the data, saving the plot as a file and displaying it below.

In [None]:
def _make_multi_axes(
        df,
        x: str,
        y: str,
        y_secondary: str,
    ):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    x_data = df[x]
    
    return (
        fig
        .add_trace(
            go.Scatter(
                x=x_data,
                y=df[y],
                mode="lines+markers",
                name=y,
            ),
            secondary_y=False,
        )
        .add_trace(
            go.Scatter(
                x=x_data,
                y=df[y_secondary],
                mode="lines+markers",
                name=y_secondary,
            ),
            secondary_y=True,
        )
    )

(
    data
    .pipe(
        _make_multi_axes,
        x="date",
        y="cumulative_downloads",
        y_secondary="downloads"
    )
    .update_layout(
        title=f"Download counts for package: {pypi_name}"
    )
)

In [None]:
with pd.option_context('display.max_rows', None):
    display(data)