Method that analyses a Pandas dataframe of Project Releases. It will return the summary of the project's release info.

In [26]:
import numpy as np
from scipy.stats import gmean, kurtosis, skew

np.seterr(divide = 'ignore') 

def get_project_release_metrics(project_release_df, owner, repo_name, language):

    real_release_date_diffs = project_release_df["created_at"].diff().apply(lambda x: x/np.timedelta64(1, "D")).fillna(0).tolist()[1:]

    release_dates_diffs_min = min(real_release_date_diffs)
    release_dates_diffs_max = max(real_release_date_diffs)
    release_dates_diffs_mean = np.mean(real_release_date_diffs)

    try:
        release_dates_diffs_gmean = gmean(real_release_date_diffs)
    except:
        release_dates_diffs_gmean = None
    try:
        release_dates_diffs_skewness = skew(real_release_date_diffs)
    except:
        release_dates_diffs_skewness = None
    try:
        release_dates_diffs_kurtosis = kurtosis(real_release_date_diffs)
    except:
        release_dates_diffs_kurtosis = None
    try:    
        release_dates_diffs_cv = np.std(real_release_date_diffs)/release_dates_diffs_mean
    except:
        release_dates_diffs_cv = None

    #release_dates = [date[:-6] for date in real_releases["created_at"].astype(str).tolist()] "release_dates": release_dates,
    return {"owner": owner, "repo": repo_name, "release_count": len(project_release_df.index), "language": language, 
                        "release_dates_diffs": real_release_date_diffs, 
                    "release_dates_diffs_min": release_dates_diffs_min, "release_dates_diffs_max": release_dates_diffs_max, 
                    "release_dates_diffs_mean": release_dates_diffs_mean, "release_dates_diffs_gmean": release_dates_diffs_gmean, 
                    "release_dates_diffs_skewness": release_dates_diffs_skewness, 
                    "release_dates_diffs_kurtosis": release_dates_diffs_kurtosis, "release_dates_diffs_cv": release_dates_diffs_cv}


Collect the all projects with at least 5 releases using pandas DataFrames.

In [27]:
import pandas as pd
import glob
from datetime import datetime

languages = ["go", "java", "python", "ruby"]
releases_folder = "releases"

all_language_releases = []
all_language_releases_min_5 = []
all_real_language_releases_min_5 = []
all_newer_lanaguage_release_min_5 = []

for language in languages:

    total_releases = 0
    more_than_5_releases = 0
    more_than_5_real_releases = 0
    more_than_5_newer_real_releases = 0

    # Grab all the files from within the folder, analyze the releases
    files = glob.glob(f"{releases_folder}/{language}/*_releases.jsonl")

    for file in files:

        total_releases+=1
        owner, repo_name = file.replace("_releases.jsonl", "").split(f"{releases_folder}/{language}\\")[1].split("_", 1)

        # Dataframe of all the releases
        all_releases = pd.read_json(file, lines=True).sort_values(by="created_at")
        # all_releases.sort_values(by=['created_at'])

        # Find the index of all prereleases then drop them from the dataframe - giving us real releases
        real_releases = all_releases.copy(deep=True)
        indexPrereleases = real_releases[ (all_releases['prerelease'] == True)].index
        real_releases.drop(indexPrereleases, inplace=True)

        # Find the index of all releases that are older than 2021 - then remove them from the dataset
        newer_releases = all_releases.copy(deep=True)
        newer_releases.drop(pd.date_range('2000-1-1', '2021-1-1'), errors='ignore', inplace= True)

        # Collect metrics for all projects, and all releases - we need at least 2
        if len(all_releases.index) >= 2:
            project_release_metrics = get_project_release_metrics(all_releases, owner, repo_name, language)
            all_language_releases.append(project_release_metrics)

        # Group metrics for all projects with at least 5 releases
        if len(all_releases.index) >= 5:
            more_than_5_releases += 1
            all_language_releases_min_5.append(project_release_metrics)

        # Group metrics for all projects, with at least 5 "real releases" - no prereleases are considered for this grouping
        if len(real_releases.index) >= 5:
            more_than_5_real_releases+= 1
            
            project_release_metrics = get_project_release_metrics(real_releases, owner, repo_name, language)
            all_real_language_releases_min_5.append(project_release_metrics)

        if len(newer_releases.index) >= 5:
            more_than_5_newer_real_releases+= 1

            project_release_metrics = get_project_release_metrics(newer_releases, owner, repo_name, language)
            all_newer_lanaguage_release_min_5.append(project_release_metrics)


    print(f'{language} has a total of {total_releases} projects with releases.')
    print(f'{language} has {more_than_5_releases} projects, with more than 5 releases total.')
    print(f'{language} has {more_than_5_real_releases} projects, with more than 5 releases that are not prereleases.')
    print(f'{language} has {more_than_5_newer_real_releases} projects, with more than 5 releases that are not prereleases, since 2021.')


invalid value encountered in double_scalars


invalid value encountered in double_scalars



go has a total of 593 projects with releases.
go has 467 projects, with more than 5 releases total.
go has 447 projects, with more than 5 releases that are not prereleases.
go has 467 projects, with more than 5 releases that are not prereleases, since 2021.



invalid value encountered in double_scalars


invalid value encountered in double_scalars



java has a total of 1317 projects with releases.
java has 999 projects, with more than 5 releases total.
java has 950 projects, with more than 5 releases that are not prereleases.
java has 999 projects, with more than 5 releases that are not prereleases, since 2021.



invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars



python has a total of 2330 projects with releases.
python has 1621 projects, with more than 5 releases total.
python has 1544 projects, with more than 5 releases that are not prereleases.
python has 1621 projects, with more than 5 releases that are not prereleases, since 2021.
ruby has a total of 457 projects with releases.
ruby has 291 projects, with more than 5 releases total.
ruby has 287 projects, with more than 5 releases that are not prereleases.
ruby has 291 projects, with more than 5 releases that are not prereleases, since 2021.


In [30]:
def showDataFrame(releases_grouping):

    releases = pd.DataFrame(releases_grouping, columns=releases_grouping[0].keys())
    display(releases)
    return releases

all_releases = showDataFrame(all_language_releases)
all_releases_min_5 = showDataFrame(all_language_releases_min_5)
real_releases_min_5 = showDataFrame(all_real_language_releases_min_5)
real_newer_releases_min_5 = showDataFrame(all_newer_lanaguage_release_min_5)

Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,28,go,"[0.12423611111111112, 2.1910648148148146, 2.98...",0.002836,326.710486,44.178622,3.426886,2.426329e+00,4.980683,1.829152
1,8treenet,freedom,23,go,"[15.05900462962963, 8.676886574074073, 7.09667...",0.085775,278.917766,35.677011,13.702392,3.155394e+00,9.974249,1.662474
2,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,6.203533e-01,-1.155636,0.870864
3,abhi,libcni,3,go,"[93.98319444444445, 216.09219907407407]",93.983194,216.092199,155.037697,142.509772,0.000000e+00,-2.000000,0.393804
4,abiosoft,ishell,3,go,"[1091.229976851852, 0.02636574074074074]",0.026366,1091.229977,545.628171,5.363869,2.752415e-16,-2.000000,0.999952
...,...,...,...,...,...,...,...,...,...,...,...,...
4226,yoshoku,rumale,10,ruby,"[50.351226851851855, 12.845046296296296, 36.02...",12.845046,324.441875,65.737301,36.030668,2.249610e+00,3.440039,1.428417
4227,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263e+00,6.087186,2.824980
4228,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,8.270768e-01,-0.919225,1.050759
4229,zendesk,active_record_shards,10,ruby,"[17.87295138888889, 76.02751157407407, 131.086...",0.558657,131.086829,53.009042,31.050529,7.218432e-01,0.170475,0.675557


Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,28,go,"[0.12423611111111112, 2.1910648148148146, 2.98...",0.002836,326.710486,44.178622,3.426886,2.426329,4.980683,1.829152
1,8treenet,freedom,23,go,"[15.05900462962963, 8.676886574074073, 7.09667...",0.085775,278.917766,35.677011,13.702392,3.155394,9.974249,1.662474
2,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,0.620353,-1.155636,0.870864
3,abutaha,aws-es-proxy,13,go,"[1.1247569444444445, 92.51953703703704, 75.422...",0.611875,498.567627,144.044722,39.040102,1.019148,-0.007584,1.048869
4,achannarasappa,ticker,51,go,"[0.7200462962962964, 0.010092592592592592, 0.2...",0.002836,103.541759,11.606095,1.153167,2.478227,5.011811,2.167268
...,...,...,...,...,...,...,...,...,...,...,...,...
3373,yoshoku,rumale,10,ruby,"[50.351226851851855, 12.845046296296296, 36.02...",12.845046,324.441875,65.737301,36.030668,2.249610,3.440039,1.428417
3374,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263,6.087186,2.824980
3375,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,0.827077,-0.919225,1.050759
3376,zendesk,active_record_shards,10,ruby,"[17.87295138888889, 76.02751157407407, 131.086...",0.558657,131.086829,53.009042,31.050529,0.721843,0.170475,0.675557


Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,8treenet,freedom,23,go,"[15.05900462962963, 8.676886574074073, 7.09667...",0.085775,278.917766,35.677011,13.702392,3.155394,9.974249,1.662474
1,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,0.620353,-1.155636,0.870864
2,abutaha,aws-es-proxy,7,go,"[92.51953703703704, 659.0286111111111, 498.567...",61.294468,659.028611,287.901985,202.390270,0.563902,-1.254768,0.773000
3,achannarasappa,ticker,47,go,"[3.146863425925926, 0.01, 0.5738773148148149, ...",0.002836,103.541759,12.533555,1.311554,2.333644,4.266249,2.075749
4,adjust,rmq,5,go,"[4.130902777777778, 2.0283680555555557, 161.80...",2.028368,161.802025,47.313001,13.034412,1.112311,-0.702475,1.405982
...,...,...,...,...,...,...,...,...,...,...,...,...
3223,xotahal,fastlane-plugin-semantic_release,23,ruby,"[0.05168981481481481, 67.12336805555556, 0.087...",0.001377,337.645220,57.694966,2.960668,1.821206,2.295284,1.584347
3224,yoshoku,rumale,10,ruby,"[50.351226851851855, 12.845046296296296, 36.02...",12.845046,324.441875,65.737301,36.030668,2.249610,3.440039,1.428417
3225,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263,6.087186,2.824980
3226,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,0.827077,-0.919225,1.050759


Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
0,1dustindavis,gorilla,28,go,"[0.12423611111111112, 2.1910648148148146, 2.98...",0.002836,326.710486,44.178622,3.426886,2.426329,4.980683,1.829152
1,8treenet,freedom,23,go,"[15.05900462962963, 8.676886574074073, 7.09667...",0.085775,278.917766,35.677011,13.702392,3.155394,9.974249,1.662474
2,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,0.620353,-1.155636,0.870864
3,abutaha,aws-es-proxy,13,go,"[1.1247569444444445, 92.51953703703704, 75.422...",0.611875,498.567627,144.044722,39.040102,1.019148,-0.007584,1.048869
4,achannarasappa,ticker,51,go,"[0.7200462962962964, 0.010092592592592592, 0.2...",0.002836,103.541759,11.606095,1.153167,2.478227,5.011811,2.167268
...,...,...,...,...,...,...,...,...,...,...,...,...
3373,yoshoku,rumale,10,ruby,"[50.351226851851855, 12.845046296296296, 36.02...",12.845046,324.441875,65.737301,36.030668,2.249610,3.440039,1.428417
3374,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263,6.087186,2.824980
3375,yuki24,rambulance,17,ruby,"[2.067662037037037, 75.40505787037037, 12.6325...",1.480775,513.022454,174.705454,54.857229,0.827077,-0.919225,1.050759
3376,zendesk,active_record_shards,10,ruby,"[17.87295138888889, 76.02751157407407, 131.086...",0.558657,131.086829,53.009042,31.050529,0.721843,0.170475,0.675557


In [31]:
from plotly.express import box

def showBoxPlot(release_df, plot_title):
    fig = box(release_df, x="language", y="release_dates_diffs_gmean", color="language", title=plot_title,
          labels={"language": "Language", "release_dates_diffs_gmean": "Geometric release interval mean"}, height=500, log_y=True)
    fig.update_xaxes(tickangle=30).update_traces(boxpoints=False)
    fig.show()

showBoxPlot(all_releases, "all_releases")
showBoxPlot(all_releases_min_5, "all_releases_min_5")
showBoxPlot(real_releases_min_5, "real releases min 5")

In [33]:
from plotly.express import violin

def plotViolin (release_df, plot_title):
    fig = violin(release_df, x="language", y="release_dates_diffs_gmean", color="language", title="plot_title",
             labels={"language": "Language", "release_dates_diffs_gmean": "Geometric release interval mean"}, height=500, log_y=False)
    fig.update_xaxes(tickangle=30)
    fig.show()

plotViolin(all_releases, "all_releases")
plotViolin(all_releases_min_5, "all_releases_min_5")
plotViolin(real_releases_min_5, "real_releases_min_5")

In [34]:
from plotly.express import box

def boxPlot(release_df, plot_title):
    fig_all = box(release_df, x="language", y="release_dates_diffs_gmean", color="language", title=plot_title,
          labels={"language": "Language", "release_dates_diffs_gmean": "Geometric release interval mean"}, height=500, log_y=True)
    fig_all.update_xaxes(tickangle=30)
    fig_all.show()

boxPlot(all_releases, "all_releases")
boxPlot(all_releases_min_5, "all_releases_min_5")
boxPlot(real_releases_min_5, "real_releases_min_5")

Now that we have analyzed the releases, lets categorize them based on their average release cadence.

In [40]:
# Release categorization for the repositories with at least 5 real releases over their lifetime
very_rapid_releases = real_releases_min_5[real_releases_min_5.release_dates_diffs_mean < 7]
rapid_releases = real_releases_min_5[(real_releases_min_5.release_dates_diffs_mean > 7) & (real_releases_min_5.release_dates_diffs_mean < 30)]
modern_releases = real_releases_min_5[(real_releases_min_5.release_dates_diffs_mean > 30) & (real_releases_min_5.release_dates_diffs_mean < 90)]
slow_releases = real_releases_min_5[(real_releases_min_5.release_dates_diffs_mean > 90)]

display(slow_releases)



Unnamed: 0,owner,repo,release_count,language,release_dates_diffs,release_dates_diffs_min,release_dates_diffs_max,release_dates_diffs_mean,release_dates_diffs_gmean,release_dates_diffs_skewness,release_dates_diffs_kurtosis,release_dates_diffs_cv
1,a8m,documentdb,5,go,"[111.67515046296296, 315.0367939814815, 31.587...",31.587199,664.260185,280.639832,164.832100,0.620353,-1.155636,0.870864
2,abutaha,aws-es-proxy,7,go,"[92.51953703703704, 659.0286111111111, 498.567...",61.294468,659.028611,287.901985,202.390270,0.563902,-1.254768,0.773000
7,agola-io,agola,5,go,"[26.004074074074072, 90.97255787037037, 419.22...",26.004074,419.226019,221.551727,136.495502,0.007564,-1.844016,0.751436
10,akamensky,argparse,9,go,"[53.79297453703704, 32.94158564814815, 176.180...",5.488900,349.382616,122.068271,53.782796,0.771231,-0.982603,1.015151
11,akavel,rsrc,5,go,"[1778.572789351852, 237.4616898148148, 0.01234...",0.012350,1778.572789,526.939028,26.298580,1.108515,-0.703483,1.380757
...,...,...,...,...,...,...,...,...,...,...,...,...
3218,westonganger,protected_attributes_continued,13,ruby,"[2.123611111111111, 0.945324074074074, 28.0572...",0.945324,601.886123,149.002860,37.979865,1.314972,0.662111,1.227019
3219,whomwah,rqrcode,5,ruby,"[597.832025462963, 111.99232638888888, 169.036...",111.992326,597.832025,260.958319,207.869047,1.107945,-0.698698,0.750280
3221,wirecardBrasil,moip-sdk-ruby,7,ruby,"[733.8703125, 55.35835648148148, 16.7895601851...",4.019965,733.870312,201.870108,67.254115,1.336050,0.273483,1.264354
3225,yujinakayama,guard-rubocop,12,ruby,"[3.795150462962963, 6.819120370370371, 44.1720...",3.795150,2754.523414,277.390703,30.969192,2.842263,6.087186,2.824980
