In [14]:
import os
os.chdir("/home/jovyan/work/JSS20221/1-Projects")

In [1]:
from __future__ import division
import pandas as pd
from scipy.stats import wilcoxon

In [2]:
def cliffsDelta(lst1, lst2, **dull):
    """Returns delta and true if there are more than 'dull' differences"""
    if not dull:
        dull = {'small': 0.147, 'medium': 0.33, 'large': 0.474} # effect sizes from (Hess and Kromrey, 2004)
    m, n = len(lst1), len(lst2)
    lst2 = sorted(lst2)
    j = more = less = 0
    for repeats, x in runs(sorted(lst1)):
        while j <= (n - 1) and lst2[j] < x:
            j += 1
        more += j*repeats
        while j <= (n - 1) and lst2[j] == x:
            j += 1
        less += (n - j)*repeats
    d = (more - less) / (m*n)
    size = lookup_size(d, dull)
    return d, size


In [3]:
def lookup_size(delta: float, dull: dict) -> str:
    """
    :type delta: float
    :type dull: dict, a dictionary of small, medium, large thresholds.
    """
    delta = abs(delta)
    if delta < dull['small']:
        return 'negligible'
    if dull['small'] <= delta < dull['medium']:
        return 'small'
    if dull['medium'] <= delta < dull['large']:
        return 'medium'
    if delta >= dull['large']:
        return 'large'

In [4]:
def runs(lst):
    """Iterator, chunks repeated values"""
    for j, two in enumerate(lst):
        if j == 0:
            one, i = two, 0
        if one != two:
            yield j - i, one
            i = j
        one = two
    yield j - i + 1, two

In [8]:
def analyze(title, x, y):
    rst = wilcoxon(x, y)
    if rst.pvalue <= 0.05:
        print(title)
        print("Significant difference")
        print("p-value %.16f" % rst.pvalue)
        print("effect size: ", end="")
        print(cliffsDelta(x, y))
    else:
        print(title)
        print("No significant difference")
    print("########")

## RQ1

In [20]:
android_samples = pd.read_csv("5-UnderstandMetrics/android_understandmetrics_output.csv")

In [21]:
android_general_projects = pd.read_csv("5-UnderstandMetrics/conventional_android_understandmetrics_output.csv")

In [22]:
spring_samples = pd.read_csv("5-UnderstandMetrics/spring_understandmetrics_output.csv")

In [23]:
spring_general_projects = pd.read_csv("5-UnderstandMetrics/conventional_spring_understandmetrics_output.csv")

### Number of Files

In [25]:
analyze("Android Number of Java Files", android_samples["numberOfJavaFiles"], android_general_projects["numberOfJavaFiles"])

Android Number of Java Files
Significant difference
p-value 0.0000000000024141
effect size: (-0.42671745867768596, 'medium')
########


In [26]:
analyze("Spring Number of Java Files", spring_samples["numberOfJavaFiles"], spring_general_projects["numberOfJavaFiles"])

Spring Number of Java Files
Significant difference
p-value 0.0000000000541972
effect size: (-0.9824561403508771, 'large')
########


### Lines of Code per file

In [28]:
android_samples["codeLinePerFile"] = android_samples["CountLineCode"]/android_samples["numberOfJavaFiles"]

In [29]:
android_general_projects["codeLinePerFile"] = android_general_projects["CountLineCode"]/android_general_projects["numberOfJavaFiles"]

In [30]:
spring_samples["codeLinePerFile"] = spring_samples["CountLineCode"]/spring_samples["numberOfJavaFiles"]

In [31]:
spring_general_projects["codeLinePerFile"] = spring_general_projects["CountLineCode"]/spring_general_projects["numberOfJavaFiles"]

In [32]:
analyze("Android Lines of code per file", android_samples["codeLinePerFile"], android_general_projects["codeLinePerFile"])

Android Lines of code per file
No significant difference
########


In [33]:
analyze("Spring Lines of code per file", spring_samples["codeLinePerFile"], spring_general_projects["codeLinePerFile"])

Spring Lines of code per file
Significant difference
p-value 0.0000000006516612
effect size: (-0.8307171437365343, 'large')
########


### Relative comment lines

In [44]:
android_samples["RelativeLineCode"] = android_samples["CountLineComment"]/android_samples["numberOfJavaFiles"]

In [45]:
android_general_projects["RelativeLineCode"] = android_general_projects["CountLineComment"]/android_general_projects["numberOfJavaFiles"]

In [46]:
spring_samples["RelativeLineCode"] = spring_samples["CountLineComment"]/spring_samples["numberOfJavaFiles"]

In [47]:
spring_general_projects["RelativeLineCode"] = spring_general_projects["CountLineComment"]/spring_general_projects["numberOfJavaFiles"]

In [48]:
analyze("Android Relative Commented Line of Code", android_samples["RelativeLineCode"], android_general_projects["RelativeLineCode"])

Android Relative Commented Line of Code
Significant difference
p-value 0.0000000000000000
effect size: (0.897630423553719, 'large')
########


In [49]:
analyze("Spring Relative Commented Line of Code", spring_samples["RelativeLineCode"], spring_general_projects["RelativeLineCode"])

Spring Relative Commented Line of Code
Significant difference
p-value 0.0000000085975876
effect size: (-0.7466912896275777, 'large')
########


### Cyclomatic Complexity per method

In [50]:
android_samples["cyclomaticComplexity"] = android_general_projects["SumCyclomaticStrict"]/android_general_projects["CountDeclMethod"]

In [51]:
android_general_projects["cyclomaticComplexity"] = android_samples["SumCyclomaticStrict"]/android_samples["CountDeclMethod"]

In [52]:
spring_samples["cyclomaticComplexity"] = spring_samples["SumCyclomaticStrict"]/spring_samples["CountDeclMethod"]

In [53]:
spring_general_projects["cyclomaticComplexity"] = spring_general_projects["SumCyclomaticStrict"]/spring_general_projects["CountDeclMethod"]

In [54]:
analyze("Android Cyclomatic Complexity per method", android_samples["cyclomaticComplexity"], android_general_projects["cyclomaticComplexity"])

Android Cyclomatic Complexity per method
Significant difference
p-value 0.0004454254074278
effect size: (-0.7153279958677686, 'large')
########


In [55]:
analyze("Spring Cyclomatic Complexity per method", spring_samples["cyclomaticComplexity"], spring_general_projects["cyclomaticComplexity"])

Spring Cyclomatic Complexity per method
Significant difference
p-value 0.0000163051367161
effect size: (-0.6051092643890428, 'large')
########
