Method that analyses a Pandas dataframe of Project Releases. It will return the summary of the project's release info.

In [1]:
import numpy as np
from scipy.stats import gmean, kurtosis, skew

np.seterr(divide = 'ignore') 

def get_project_release_metrics(project_release_df, owner, repo_name, language):

    real_release_date_diffs = project_release_df["created_at"].diff().apply(lambda x: x/np.timedelta64(1, "D")).fillna(0).tolist()[1:]

    release_dates_diffs_min = min(real_release_date_diffs)
    release_dates_diffs_max = max(real_release_date_diffs)
    release_dates_diffs_mean = np.mean(real_release_date_diffs)

    try:
        release_dates_diffs_gmean = gmean(real_release_date_diffs)
    except:
        release_dates_diffs_gmean = None
    try:
        release_dates_diffs_skewness = skew(real_release_date_diffs)
    except:
        release_dates_diffs_skewness = None
    try:
        release_dates_diffs_kurtosis = kurtosis(real_release_date_diffs)
    except:
        release_dates_diffs_kurtosis = None
    try:    
        release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
    except:
        release_dates_diffs_cv = None

    return {"owner": owner, "repo": repo_name, "release_count": len(project_release_df.index), "language": language, 
                        "release_dates_diffs": real_release_date_diffs, 
                    "release_dates_diffs_min": release_dates_diffs_min, "release_dates_diffs_max": release_dates_diffs_max, 
                    "release_dates_diffs_mean": release_dates_diffs_mean, "release_dates_diffs_gmean": release_dates_diffs_gmean, 
                    "release_dates_diffs_skewness": release_dates_diffs_skewness, 
                    "release_dates_diffs_kurtosis": release_dates_diffs_kurtosis, "release_dates_diffs_cv": release_dates_diffs_cv}


Collect the all projects with at least 5 releases using pandas DataFrames.

In [2]:
import pandas as pd
import glob
from datetime import datetime

languages = ["go", "java", "python", "ruby"]
releases_folder = "releases"

all_language_releases = []
real_language_releases = []
newer_real_lanaguage_releases_2021 = []
newer_real_lanaguage_releases_2022 = []

for language in languages:

    total_releases = 0
    more_than_2_releases = 0
    more_than_2_real_releases = 0
    more_than_2_newer_real_releases_2021 = 0
    more_than_2_newer_real_releases_2022 = 0

    # Grab all the files from within the folder, analyze the releases
    files = glob.glob(f"{releases_folder}/{language}/*_releases.jsonl")

    for file in files:

        total_releases+=1
        owner, repo_name = file.replace("_releases.jsonl", "").split(f"{releases_folder}/{language}\\")[1].split("_", 1)

        # Dataframe of all the releases
        all_releases = pd.read_json(file, lines=True).sort_values(by="created_at")
        # all_releases.sort_values(by=['created_at'])

        # Find the index of all prereleases then drop them from the dataframe - giving us real releases
        real_releases = all_releases.copy(deep=True)
        indexPrereleases = real_releases[ (all_releases['prerelease'] == True)].index
        real_releases.drop(indexPrereleases, inplace=True)

        # Find the index of all releases that are older than 2021 - then remove them from the dataset
        newer_releases = real_releases.copy(deep=True)
        old_indexes = newer_releases[(newer_releases['created_at'].dt.year < 2021)].index
        newer_releases.drop(old_indexes, inplace=True)

        # Find the index of all releases that are older than 2022 - then remove them from the dataset
        newer_releases_2022 = real_releases.copy(deep=True)
        old_indexes_2022 = newer_releases_2022[(newer_releases_2022['created_at'].dt.year < 2022)].index
        newer_releases_2022.drop(old_indexes_2022, inplace=True)

        # Collect metrics for all projects, and all releases - we need at least 2
        if len(all_releases.index) >= 2:
            more_than_2_releases+=1
            project_release_metrics = get_project_release_metrics(all_releases, owner, repo_name, language)
            all_language_releases.append(project_release_metrics)

        # Group metrics for all projects, with at least 2 "real releases" - no prereleases are considered for this grouping
        if len(real_releases.index) >= 2:
            more_than_2_real_releases+= 1
            
            project_release_metrics = get_project_release_metrics(real_releases, owner, repo_name, language)
            real_language_releases.append(project_release_metrics)

        if len(newer_releases.index) >= 2:
            more_than_2_newer_real_releases_2021+= 1

            project_release_metrics = get_project_release_metrics(newer_releases, owner, repo_name, language)
            newer_real_lanaguage_releases_2021.append(project_release_metrics)

        if len(newer_releases_2022.index) >= 2:
            more_than_2_newer_real_releases_2022+= 1

            project_release_metrics = get_project_release_metrics(newer_releases_2022, owner, repo_name, language)
            newer_real_lanaguage_releases_2022.append(project_release_metrics)

    print(f'{language} has a total of {total_releases} projects with releases.')
    print(f'{language} has {more_than_2_releases} projects, with more than 2 releases total.')
    print(f'{language} has {more_than_2_real_releases} projects, with more than 2 releases that are not prereleases.')
    print(f'{language} has {more_than_2_newer_real_releases_2021} projects, with more than 2 releases that are not prereleases, since 2021.')
    print(f'{language} has {more_than_2_newer_real_releases_2022} projects, with more than 2 releases that are not prereleases, since 2022.')

  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean


go has a total of 593 projects with releases.
go has 559 projects, with more than 2 releases total.
go has 543 projects, with more than 2 releases that are not prereleases.
go has 358 projects, with more than 2 releases that are not prereleases, since 2021.
go has 243 projects, with more than 2 releases that are not prereleases, since 2022.


  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean


java has a total of 1317 projects with releases.
java has 1190 projects, with more than 2 releases total.
java has 1152 projects, with more than 2 releases that are not prereleases.
java has 617 projects, with more than 2 releases that are not prereleases, since 2021.
java has 367 projects, with more than 2 releases that are not prereleases, since 2022.


  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
  release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean


python has a total of 2330 projects with releases.
python has 2085 projects, with more than 2 releases total.
python has 2012 projects, with more than 2 releases that are not prereleases.
python has 1203 projects, with more than 2 releases that are not prereleases, since 2021.
python has 749 projects, with more than 2 releases that are not prereleases, since 2022.
ruby has a total of 457 projects with releases.
ruby has 397 projects, with more than 2 releases total.
ruby has 394 projects, with more than 2 releases that are not prereleases.
ruby has 188 projects, with more than 2 releases that are not prereleases, since 2021.
ruby has 112 projects, with more than 2 releases that are not prereleases, since 2022.


In [3]:
def showDataFrame(releases_grouping):

    releases = pd.DataFrame(releases_grouping, columns=releases_grouping[0].keys())
    display(releases)
    return releases

all_releases = showDataFrame(all_language_releases)
real_releases = showDataFrame(real_language_releases)
real_releases_since_2021 = showDataFrame(newer_real_lanaguage_releases_2021)
real_releases_since_2022 = showDataFrame(newer_real_lanaguage_releases_2022)

Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,28,go,"[0.12423611111111112, 2.1910648148148146, 2.98...",0.002836,326.710486,44.178622,3.426886,2.426329e+00,4.980683,1.829152
1,8treenet,freedom,23,go,"[15.05900462962963, 8.676886574074073, 7.09667...",0.085775,278.917766,35.677011,13.702392,3.155394e+00,9.974249,1.662474
2,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,6.203533e-01,-1.155636,0.870864
3,abhi,libcni,3,go,"[93.98319444444445, 216.09219907407407]",93.983194,216.092199,155.037697,142.509772,0.000000e+00,-2.000000,0.393804
4,abiosoft,ishell,3,go,"[1091.229976851852, 0.02636574074074074]",0.026366,1091.229977,545.628171,5.363869,2.752415e-16,-2.000000,0.999952
...,...,...,...,...,...,...,...,...,...,...,...,...
4226,yoshoku,rumale,10,ruby,"[50.351226851851855, 12.845046296296296, 36.02...",12.845046,324.441875,65.737301,36.030668,2.249610e+00,3.440039,1.428417
4227,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263e+00,6.087186,2.824980
4228,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,8.270768e-01,-0.919225,1.050759
4229,zendesk,active_record_shards,10,ruby,"[17.87295138888889, 76.02751157407407, 131.086...",0.558657,131.086829,53.009042,31.050529,7.218432e-01,0.170475,0.675557


Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,3,go,"[277.06662037037034, 37.93743055555556]",37.937431,277.066620,157.502025,102.524122,-2.724347e-16,-2.000000,0.759131
1,8treenet,freedom,23,go,"[15.05900462962963, 8.676886574074073, 7.09667...",0.085775,278.917766,35.677011,13.702392,3.155394e+00,9.974249,1.662474
2,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,6.203533e-01,-1.155636,0.870864
3,abhi,libcni,3,go,"[93.98319444444445, 216.09219907407407]",93.983194,216.092199,155.037697,142.509772,0.000000e+00,-2.000000,0.393804
4,abiosoft,ishell,3,go,"[1091.229976851852, 0.02636574074074074]",0.026366,1091.229977,545.628171,5.363869,2.752415e-16,-2.000000,0.999952
...,...,...,...,...,...,...,...,...,...,...,...,...
4096,yoshoku,rumale,10,ruby,"[50.351226851851855, 12.845046296296296, 36.02...",12.845046,324.441875,65.737301,36.030668,2.249610e+00,3.440039,1.428417
4097,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263e+00,6.087186,2.824980
4098,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,8.270768e-01,-0.919225,1.050759
4099,zendesk,active_record_shards,10,ruby,"[17.87295138888889, 76.02751157407407, 131.086...",0.558657,131.086829,53.009042,31.050529,7.218432e-01,0.170475,0.675557


Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,2,go,[37.93743055555556],37.937431,37.937431,37.937431,37.937431,0.000000e+00,-3.000000,0.000000
1,8treenet,freedom,6,go,"[40.74811342592593, 69.89112268518518, 99.1161...",40.748113,278.917766,114.372296,91.884554,1.305288e+00,0.013935,0.738590
2,abhi,libcni,3,go,"[93.98319444444445, 216.09219907407407]",93.983194,216.092199,155.037697,142.509772,0.000000e+00,-2.000000,0.393804
3,abiosoft,ishell,2,go,[0.02636574074074074],0.026366,0.026366,0.026366,0.026366,0.000000e+00,-3.000000,0.000000
4,achannarasappa,ticker,47,go,"[3.146863425925926, 0.01, 0.5738773148148149, ...",0.002836,103.541759,12.533555,1.311554,2.333644e+00,4.266249,2.075749
...,...,...,...,...,...,...,...,...,...,...,...,...
2361,XeroAPI,xero-ruby,33,ruby,"[6.703368055555556, 5.041284722222223, 25.0371...",0.000000,76.875162,20.525638,0.000000,1.443763e+00,1.992260,0.855222
2362,xotahal,fastlane-plugin-semantic_release,3,ruby,"[232.67243055555556, 337.6452199074074]",232.672431,337.645220,285.158825,280.286878,-8.051372e-16,-2.000000,0.184060
2363,yoshoku,rumale,7,ruby,"[13.034502314814814, 30.746145833333333, 19.11...",13.034502,324.441875,82.069331,40.468766,1.623337e+00,0.862968,1.351707
2364,yuki24,rambulance,2,ruby,[144.2115162037037],144.211516,144.211516,144.211516,144.211516,0.000000e+00,-3.000000,0.000000


Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,achannarasappa,ticker,5,go,"[10.256284722222222, 60.02443287037037, 103.54...",10.256285,103.541759,46.676710,30.103997,4.407178e-01,-1.432272,0.821419
1,adjust,rmq,5,go,"[4.130902777777778, 2.0283680555555557, 161.80...",2.028368,161.802025,47.313001,13.034412,1.112311e+00,-0.702475,1.405982
2,aerospike,aerospike-client-go,12,go,"[0.020416666666666666, 0.0, 83.23706018518519,...",0.000000,83.237060,19.547164,0.000000,1.415755e+00,1.314876,1.258561
3,airbrake,gobrake,6,go,"[64.82053240740741, 21.4290625, 4.851111111111...",4.851111,74.097060,46.181097,31.856266,-4.727250e-01,-1.578934,0.599218
4,akamensky,argparse,3,go,"[5.904409722222222, 5.488900462962963]",5.488900,5.904410,5.696655,5.692865,-6.383988e-15,-2.000000,0.036470
...,...,...,...,...,...,...,...,...,...,...,...,...
1466,uploadcare,uploadcare-ruby,3,ruby,"[5.190798611111111, 89.80664351851851]",5.190799,89.806644,47.498721,21.590929,2.882347e-16,-2.000000,0.890717
1467,webtranslateit,webtranslateit,9,ruby,"[70.4894212962963, 100.06560185185185, 18.0344...",0.214398,100.065602,28.304683,9.793007,1.203767e+00,-0.173407,1.210033
1468,whomwah,rqrcode,2,ruby,[164.9725462962963],164.972546,164.972546,164.972546,164.972546,0.000000e+00,-3.000000,0.000000
1469,XeroAPI,xero-ruby,11,ruby,"[35.17753472222222, 6.931261574074074, 30.0672...",6.931262,50.090197,28.004525,25.032196,1.480967e-02,-0.180034,0.406329


In [4]:
from plotly.express import box

def showBoxPlot(release_df, plot_title):
    fig = box(release_df, x="language", y="release_dates_diffs_gmean", color="language", title=plot_title,
          labels={"language": "Language", "release_dates_diffs_gmean": "Geometric release interval mean"}, height=500, log_y=True)
    fig.update_xaxes(tickangle=30).update_traces(boxpoints=False)
    fig.show()

showBoxPlot(all_releases, "all_releases")
showBoxPlot(real_releases, "real_releases")
showBoxPlot(real_releases_since_2021, "real_releases_since_2021")
showBoxPlot(real_releases_since_2022, "real_releases_since_2022")

In [5]:
from plotly.express import violin

def plotViolin (release_df, plot_title):
    fig = violin(release_df, x="language", y="release_dates_diffs_gmean", color="language", title=plot_title,
             labels={"language": "Language", "release_dates_diffs_gmean": "Geometric release interval mean"}, height=500, log_y=False)
    fig.update_xaxes(tickangle=30)
    fig.show()

plotViolin(all_releases, "all_releases")
plotViolin(real_releases, "real_releases")
plotViolin(real_releases_since_2021, "real_releases_since_2021")
plotViolin(real_releases_since_2022, "real_releases_since_2022")

In [6]:
from plotly.express import box

def boxPlot(release_df, plot_title):
    fig_all = box(release_df, x="language", y="release_dates_diffs_gmean", color="language", title=plot_title,
          labels={"language": "Language", "release_dates_diffs_gmean": "Geometric release interval mean"}, height=500, log_y=True)
    fig_all.update_xaxes(tickangle=30)
    fig_all.show()

boxPlot(all_releases, "all_releases")
boxPlot(real_releases, "real_releases")
boxPlot(real_releases_since_2021, "real_releases_min_5_since_2021")
boxPlot(real_releases_since_2021, "real_releases_min_5_since_2022")

Now that we have analyzed the releases, lets categorize them based on their average release cadence.
We'll start looking the average for all real releases, in repositories that have at least 5 releases.

In [7]:
# Release categorization for the repositories with at least 5 real releases over their lifetime
very_rapid_limit = 7
rapid_limit = 30
modern_limit = 90

very_rapid_releases = real_releases[real_releases.release_dates_diffs_mean < very_rapid_limit]
rapid_releases = real_releases[(real_releases.release_dates_diffs_mean > very_rapid_limit) & (real_releases.release_dates_diffs_mean < rapid_limit)]
modern_releases = real_releases[(real_releases.release_dates_diffs_mean > rapid_limit) & (real_releases.release_dates_diffs_mean < modern_limit)]
slow_releases = real_releases[(real_releases.release_dates_diffs_mean > modern_limit)]

display(slow_releases)

Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,3,go,"[277.06662037037034, 37.93743055555556]",37.937431,277.066620,157.502025,102.524122,-2.724347e-16,-2.000000,0.759131
2,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,6.203533e-01,-1.155636,0.870864
3,abhi,libcni,3,go,"[93.98319444444445, 216.09219907407407]",93.983194,216.092199,155.037697,142.509772,0.000000e+00,-2.000000,0.393804
4,abiosoft,ishell,3,go,"[1091.229976851852, 0.02636574074074074]",0.026366,1091.229977,545.628171,5.363869,2.752415e-16,-2.000000,0.999952
5,abutaha,aws-es-proxy,7,go,"[92.51953703703704, 659.0286111111111, 498.567...",61.294468,659.028611,287.901985,202.390270,5.639021e-01,-1.254768,0.773000
...,...,...,...,...,...,...,...,...,...,...,...,...
4090,whomwah,rqrcode,5,ruby,"[597.832025462963, 111.99232638888888, 169.036...",111.992326,597.832025,260.958319,207.869047,1.107945e+00,-0.698698,0.750280
4092,wirecardBrasil,moip-sdk-ruby,7,ruby,"[733.8703125, 55.35835648148148, 16.7895601851...",4.019965,733.870312,201.870108,67.254115,1.336050e+00,0.273483,1.264354
4097,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263e+00,6.087186,2.824980
4098,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,8.270768e-01,-0.919225,1.050759


In [8]:
import plotly.graph_objects as go

presentation_colors = ['rgb(192, 192, 192)', 'rgb(132, 200, 226)', 'rgb(0, 57, 94)','rgb(49, 57, 77)']
language_labels = ['<b>Rapid+</b>', '<b>Rapid</b>', '<b>Modern</b>', '<b>Slow</b>']

def release_cadence_categorized_across_languages(title, very_rapid_df, rapid_df, modern_df, slow_df, width_param):
    
    values = [ len(very_rapid_df.index), len(rapid_df.index), len(modern_df.index), len(slow_df.index)]

    fig = go.Figure(data=[go.Pie(labels=language_labels, values=values, hole=.3, marker_colors=presentation_colors)])
    fig.update_layout(title_text=f'<b>{title}</b>', title_font_family="Times New Roman", font_family="Times New Roman", font=dict(
            family="Times New Roman, monospace",
            size=18,
            color="Black"
        ))
    fig.update_layout(margin=dict(t=70, b=0, l=0, r=0))
    fig.update_layout(height=600, width=width_param)
    fig.show()

release_cadence_categorized_across_languages('Release Cadence Categorized Across Languages - All Time', very_rapid_releases, 
    rapid_releases, modern_releases, slow_releases, 700)

Lanaguage Specific Breakdown of the Release Cadence Categorization

In [9]:
from plotly.subplots import make_subplots
def language_categorization_values(very_rapid_df, rapid_df, modern_df, slow_df, language):
    return [ len(very_rapid_df[(very_rapid_df.language == language)].index), len(rapid_df[(rapid_df.language == language)].index), 
        len(modern_df[(modern_df.language == language)].index), len(slow_df[(slow_df.language == language)].index)]

def release_cadence_categorized_by_language(title, very_rapid_df, rapid_df, modern_df, slow_df, width_param):

    java_values = language_categorization_values(very_rapid_df, rapid_df, modern_df, slow_df, "java")
    ruby_values = language_categorization_values(very_rapid_df, rapid_df, modern_df, slow_df, "ruby")
    go_values = language_categorization_values(very_rapid_df, rapid_df, modern_df, slow_df, "go")
    python_values = language_categorization_values(very_rapid_df, rapid_df, modern_df, slow_df, "python")

    # 2 x 2 subplot
    #fig = make_subplots(rows=2, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}], [{'type':'domain'}, {'type':'domain'}]], subplot_titles=['Java', 'Ruby', 'Go', 'Python'])

    # 4 x 1 subplot
    fig = make_subplots(rows=1, cols=4, specs=[[{'type':'domain'}, {'type':'domain'},{'type':'domain'}, {'type':'domain'}]], subplot_titles=['Java', 'Ruby', 'Go', 'Python'])


    fig.update_layout(title_text='<b>Release Cadence Categorized Across Languages</b>', title_font_family="Times New Roman", font_family="Times New Roman", font=dict(
            family="Times New Roman, monospace",
            size=18,
            color="Black"
        ))

    fig.add_trace(go.Pie(labels=language_labels, values=java_values, hole=.3, name="<b>Java</b>", marker_colors=presentation_colors), 1, 1)
    fig.add_trace(go.Pie(labels=language_labels, values=ruby_values, hole=.3, name="<b>Ruby</b>", marker_colors=presentation_colors), 1, 2)
    fig.add_trace(go.Pie(labels=language_labels, values=go_values, hole=.3, name="<b>Go</b>", marker_colors=presentation_colors), 1, 3)
    fig.add_trace(go.Pie(labels=language_labels, values=python_values, hole=.3, name="<b>Python</b>", marker_colors=presentation_colors), 1, 4)

    fig.update_layout(title_text=f'<b>{title}</b>', title_font_family="Times New Roman", font_family="Times New Roman", font=dict(
            family="Times New Roman, monospace",
            size=16,
            color="Black"))
    fig.update_layout(margin=dict(t=70, b=0, l=20, r=10))
    fig.update_layout(height=500, width=width_param)

    fig.show()

release_cadence_categorized_by_language('Release Cadence Categorized by Language - All Time', very_rapid_releases,
    rapid_releases, modern_releases, slow_releases, 1400)

Now we've successfully categorized the projects - what if we only looked at the releases in the last 2 years? CI/CD has become more widely used in the last 2 years, lets see if we can see a trend

Lets see the analysis for the last 2 years worth of releases.

In [10]:
# Find the index of all releases that are older than 2021 - then remove them from the dataset
very_rapid_releases_2021 = real_releases_since_2021[real_releases_since_2021.release_dates_diffs_mean < very_rapid_limit]
rapid_releases_2021 = real_releases_since_2021[(real_releases_since_2021.release_dates_diffs_mean > very_rapid_limit) & (real_releases_since_2021.release_dates_diffs_mean < rapid_limit)]
modern_releases_2021 = real_releases_since_2021[(real_releases_since_2021.release_dates_diffs_mean > rapid_limit) & (real_releases_since_2021.release_dates_diffs_mean < modern_limit)]
slow_releases_2021 = real_releases_since_2021[(real_releases_since_2021.release_dates_diffs_mean > modern_limit)]

#display(very_rapid_releases_2021)

release_cadence_categorized_across_languages('Release Cadence Categorized Across Languages - Since 2021',
    very_rapid_releases_2021, rapid_releases_2021, modern_releases_2021, slow_releases_2021, 750)

release_cadence_categorized_by_language('Release Cadence Categorized by Language - Since 2021',
    very_rapid_releases_2021, rapid_releases_2021, modern_releases_2021, slow_releases_2021, 1400)

In [11]:
rapid_plus_releases_2022 = real_releases_since_2022[real_releases_since_2022.release_dates_diffs_mean < very_rapid_limit]
rapid_releases_2022 = real_releases_since_2022[(real_releases_since_2022.release_dates_diffs_mean > very_rapid_limit) & (real_releases_since_2022.release_dates_diffs_mean < rapid_limit)]
modern_releases_2022 = real_releases_since_2022[(real_releases_since_2022.release_dates_diffs_mean > rapid_limit) & (real_releases_since_2022.release_dates_diffs_mean < modern_limit)]
slow_releases_2022 = real_releases_since_2022[(real_releases_since_2022.release_dates_diffs_mean > modern_limit)]

#display(very_rapid_releases_2022)

release_cadence_categorized_across_languages('Release Cadence Categorized Across Languages - Since 2022',
    rapid_plus_releases_2022, rapid_releases_2022, modern_releases_2022, slow_releases_2022, 750)

release_cadence_categorized_by_language('Release Cadence Categorized by Language - Since 2022',
    rapid_plus_releases_2022, rapid_releases_2022, modern_releases_2022, slow_releases_2022, 1400)

Write all the categorized releases to a CSV file so they can be compared against the biterms.

In [12]:
import os

pd.options.mode.chained_assignment = None
cadence = 'cadence'
release_categorization_folder = 'release_categorization'

def print_releases_to_file(file_name, rapid_plus_df, rapid_df, modern_df, slow_df):

    all_releases_file = f'{release_categorization_folder}/{file_name}'

    rapid_plus_df[cadence] = 'rapid+'
    rapid_df[cadence] = 'rapid'
    modern_df[cadence] = 'modern'
    slow_df[cadence] = 'slow'

    all_releases_results_df = pd.concat([rapid_plus_df, rapid_df, modern_df, slow_df])

    headers_to_print = ["owner", "repo", "release_count", "language", "cadence"]

    if not os.path.exists(release_categorization_folder):
        os.makedirs(release_categorization_folder)

    if os.path.exists(all_releases_file):
        os.remove(all_releases_file)

    all_releases_results_df.to_csv(all_releases_file, columns=headers_to_print)

print_releases_to_file('all_releases_results.csv', very_rapid_releases, rapid_releases, modern_releases, slow_releases)


Write the 2021, and 2022 categorized releases to a CSV file so they can be compared against the biterms.

In [13]:
print_releases_to_file('releases_results_2021.csv', very_rapid_releases_2021, rapid_releases_2021, modern_releases_2021, slow_releases_2021)
print_releases_to_file('releases_results_2022.csv', rapid_plus_releases_2022, rapid_releases_2022, modern_releases_2022, slow_releases_2022)