First off, just read the data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("results.csv")

In [26]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [6]:
df

Unnamed: 0,App,Key,Logical Lines of Code,Halstead Volume,Halstead Difficulty,Halstead Effort,Maintainability Index,Cyclomatic Complexity,Cognitive Complexity,Number of Words
0,insurance_claims,fbp_app_min,210,543.658677,6.210526,3376.406518,43.3,1.934783,0.424242,846
1,insurance_claims,fbp_app_data,239,610.424232,6.190476,3778.816672,40.8,2.125,0.657143,974
2,insurance_claims,fbp_app_ml,263,543.658677,6.848214,3723.091116,41.85,2.294118,0.918919,1124
3,insurance_claims,soa_app_min,118,137.548875,2.0,275.09775,59.23,1.388889,0.444444,466
4,insurance_claims,soa_app_data,131,155.114511,2.0,310.229021,57.55,1.4,0.45,513
5,insurance_claims,soa_app_ml,184,226.997735,2.068966,469.650487,51.36,1.541667,0.75,706
6,mblogger,fbp_app_min,172,155.114511,2.0,310.229021,44.1,1.810811,0.307692,529
7,mblogger,fbp_app_data,275,372.494509,3.0,1117.483526,31.51,1.931034,0.525,830
8,mblogger,fbp_app_ml,340,560.046581,4.066667,2277.522764,26.5,1.970149,0.851064,1055
9,mblogger,soa_app_min,143,155.114511,2.0,310.229021,54.83,1.777778,1.05556,555


Some metrics scale with the size of the codebase, while others don't. For the former we calculate change in percentages, for the latter in absolute values.

In [46]:
absolute_diff_metrics = ["Halstead Difficulty", "Maintainability Index", "Cyclomatic Complexity", "Cognitive Complexity"]
percentage_diff_metrics = ["Logical Lines of Code", "Halstead Volume", "Halstead Effort", "Number of Words"]

Here we calculate the differences and re-organize the dataframe a bit.

In [47]:
all_dfs = []

for i in range(0, df.shape[0], 3):
    df_change = pd.concat([
        df.iloc[i:i+3, 2:][percentage_diff_metrics].pct_change(),
        df.iloc[i:i+3, 2:][absolute_diff_metrics].diff(),
    ], axis=1)
    df_change = df_change.iloc[1: , :]
    df_change.insert(0, "Keys", ["min/data", "data/ml"])
    all_dfs.append(df_change)

fbp_dfs = [all_dfs[0], all_dfs[2], all_dfs[4]]
soa_dfs = [all_dfs[1], all_dfs[3], all_dfs[5]]
    
for fbp_df in fbp_dfs:
    fbp_df.insert(0, "Paradigm", ["fbp", "fbp"])
for soa_df in soa_dfs:
    soa_df.insert(0, "Paradigm", ["soa", "soa"])

all_changes_df = pd.concat(fbp_dfs + soa_dfs, axis=0)
all_changes_df

Unnamed: 0,Paradigm,Keys,Logical Lines of Code,Halstead Volume,Halstead Effort,Number of Words,Halstead Difficulty,Maintainability Index,Cyclomatic Complexity,Cognitive Complexity
1,fbp,min/data,0.138095,0.122808,0.119183,0.1513,-0.02005,-2.5,0.190217,0.232901
2,fbp,data/ml,0.100418,-0.109376,-0.014747,0.154004,0.657738,1.05,0.169118,0.261776
7,fbp,min/data,0.598837,1.401416,2.602124,0.568998,1.0,-12.59,0.120224,0.217308
8,fbp,data/ml,0.236364,0.503503,1.038082,0.271084,1.066667,-5.01,0.039115,0.326064
13,fbp,min/data,0.171533,0.223463,0.451884,0.207113,1.094453,-4.47,0.272904,0.653398
14,fbp,data/ml,0.121495,0.148057,0.157376,0.126516,0.056465,-2.71,0.023392,0.00976
4,soa,min/data,0.110169,0.127705,0.127705,0.100858,0.0,-1.68,0.011111,0.005556
5,soa,data/ml,0.40458,0.46342,0.513883,0.376218,0.068966,-6.19,0.141667,0.3
10,soa,min/data,0.020979,0.0,0.0,0.010811,0.0,-0.36,0.0,0.0
11,soa,data/ml,0.239726,0.353839,0.353839,0.254902,0.0,-3.61,-0.082126,-0.142517


In the resulting dataframe columns contain measured changes for different apps. "Keys" column signifies which pair is considered. E.g. "min/data" means that changes are measured for transition between "min" and "data" stages.

Let's look at individual stages, or rather pairs of stages.

In [51]:
min_data_changes_df = all_changes_df[all_changes_df["Keys"] == 'min/data']
data_ml_changes_df = all_changes_df[all_changes_df["Keys"] == 'data/ml']

In [52]:
min_data_changes_df

Unnamed: 0,Paradigm,Keys,Logical Lines of Code,Halstead Volume,Halstead Effort,Number of Words,Halstead Difficulty,Maintainability Index,Cyclomatic Complexity,Cognitive Complexity
1,fbp,min/data,0.138095,0.122808,0.119183,0.1513,-0.02005,-2.5,0.190217,0.232901
7,fbp,min/data,0.598837,1.401416,2.602124,0.568998,1.0,-12.59,0.120224,0.217308
13,fbp,min/data,0.171533,0.223463,0.451884,0.207113,1.094453,-4.47,0.272904,0.653398
4,soa,min/data,0.110169,0.127705,0.127705,0.100858,0.0,-1.68,0.011111,0.005556
10,soa,min/data,0.020979,0.0,0.0,0.010811,0.0,-0.36,0.0,0.0
16,soa,min/data,0.276923,0.213559,0.376629,0.287152,0.494426,-4.26,-0.002564,-0.00256


In [53]:
data_ml_changes_df

Unnamed: 0,Paradigm,Keys,Logical Lines of Code,Halstead Volume,Halstead Effort,Number of Words,Halstead Difficulty,Maintainability Index,Cyclomatic Complexity,Cognitive Complexity
2,fbp,data/ml,0.100418,-0.109376,-0.014747,0.154004,0.657738,1.05,0.169118,0.261776
8,fbp,data/ml,0.236364,0.503503,1.038082,0.271084,1.066667,-5.01,0.039115,0.326064
14,fbp,data/ml,0.121495,0.148057,0.157376,0.126516,0.056465,-2.71,0.023392,0.00976
5,soa,data/ml,0.40458,0.46342,0.513883,0.376218,0.068966,-6.19,0.141667,0.3
11,soa,data/ml,0.239726,0.353839,0.353839,0.254902,0.0,-3.61,-0.082126,-0.142517
17,soa,data/ml,0.118072,0.09925,0.106652,0.102826,0.028107,-2.58,0.054945,0.12637
