In [306]:
from __future__ import division
import pandas as pd
from scipy.stats import wilcoxon

# Métodos auxiliares

In [307]:
def cliffsDelta(lst1, lst2, **dull):

    """Returns delta and true if there are more than 'dull' differences"""
    if not dull:
        dull = {'small': 0.147, 'medium': 0.33, 'large': 0.474} # effect sizes from (Hess and Kromrey, 2004)
    m, n = len(lst1), len(lst2)
    lst2 = sorted(lst2)
    j = more = less = 0
    for repeats, x in runs(sorted(lst1)):
        while j <= (n - 1) and lst2[j] < x:
            j += 1
        more += j*repeats
        while j <= (n - 1) and lst2[j] == x:
            j += 1
        less += (n - j)*repeats
    d = (more - less) / (m*n)
    size = lookup_size(d, dull)
    return d, size

In [308]:
def lookup_size(delta: float, dull: dict) -> str:
    """
    :type delta: float
    :type dull: dict, a dictionary of small, medium, large thresholds.
    """
    delta = abs(delta)
    if delta < dull['small']:
        return 'negligible'
    if dull['small'] <= delta < dull['medium']:
        return 'small'
    if dull['medium'] <= delta < dull['large']:
        return 'medium'
    if delta >= dull['large']:
        return 'large'

In [309]:
def runs(lst):
    """Iterator, chunks repeated values"""
    for j, two in enumerate(lst):
        if j == 0:
            one, i = two, 0
        if one != two:
            yield j - i, one
            i = j
        one = two
    yield j - i + 1, two

In [310]:
def analyze(x, y):
    rst = wilcoxon(x, y)
    if rst.pvalue <= 0.05:
        print("Diferenca significante")
        print("p-value %.16f" % rst.pvalue)
        print("effect size: ", end="")
        print(cliffsDelta(x, y))
    else:
        print(title)
        print("Diferenca não significante")
    print("########")


# RQ2

## Delay

### Android

In [311]:
samples = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic\\frameworkCodeSamples\\Graficos\\BoxplotDelayAtualizarOsSamples\\usandoTodosOsPontosSemMedia\\android.csv", usecols=[1])
samples.dropna(inplace=True)

In [312]:
general_projects = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic2\\analiseDosProjetosGerais\\RQ2\\delay\\android.csv", usecols=[4])

In [313]:
general_projects = general_projects.append(general_projects, ignore_index=True)
general_projects = general_projects.append(general_projects[:164], ignore_index=True)

In [314]:
analyze(samples["delay"], general_projects["delay"])

Diferenca significante
p-value 0.0000000000000000
effect size: (-0.5633886255924171, 'large')
########


### Spring

In [315]:
samples = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic\\frameworkCodeSamples\\Graficos\\BoxplotDelayAtualizarOsSamples\\usandoTodosOsPontosSemMedia\\spring.csv", usecols=[1])
samples.dropna(inplace=True)

In [316]:
general_projects = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic2\\analiseDosProjetosGerais\\RQ2\\delay\\spring.csv", usecols=[4])

In [317]:
general_projects = general_projects.append(general_projects, ignore_index=True)
general_projects = general_projects.append(general_projects, ignore_index=True)
general_projects = general_projects.append(general_projects, ignore_index=True)
general_projects = general_projects.append(general_projects, ignore_index=True)

In [318]:
general_projects = general_projects.append(general_projects[:164], ignore_index=True)

In [319]:
analyze(samples["delay"], general_projects["delay"])

Diferenca significante
p-value 0.0000000000000000
effect size: (-0.5043995258140997, 'large')
########


## Imports distinct

### Android

In [320]:
samples = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic\\frameworkCodeSamples\\Graficos\\boxplotNumeroDeImports\\importsRelativosAoNumeroDeJava\\android.csv", usecols=[5])

In [321]:
general_projects = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic2\\analiseDosProjetosGerais\\RQ4\\numeroDeImports\\android.csv", usecols=[2])

In [322]:
analyze(samples["apenas do framework distinto"], general_projects["imports"])

Diferenca significante
p-value 0.0000000015835930
effect size: (0.42842846074380164, 'medium')
########


### Spring

In [323]:
samples = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic\\frameworkCodeSamples\\Graficos\\boxplotNumeroDeImports\\importsRelativosAoNumeroDeJava\\spring.csv", usecols=[5])

In [324]:
general_projects = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic2\\analiseDosProjetosGerais\\RQ4\\numeroDeImports\\spring.csv", usecols=[2])

In [325]:
analyze(samples["apenas do framework distinto"], general_projects["imports"])

Diferenca significante
p-value 0.0000000001319190
effect size: (0.8910433979686058, 'large')
########


## Contributors inside

### Android

In [326]:
samples = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic\\frameworkCodeSamples\\Graficos\\BoxplotRazaoDosMantenedoresEmComum\\novos\\comAnonimos\\android.csv", usecols=[6], decimal=",")

In [327]:
general_projects = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic2\\analiseDosProjetosGerais\\RQ4\\mantenedores\\android.csv", usecols=[3,4])
general_projects["common/sample"] = general_projects["contribuidores_comum"]/general_projects["contribuidores_do_projeto"]

In [328]:
analyze(samples["common/sample"], general_projects["common/sample"])

Diferenca significante
p-value 0.0000078644330038
effect size: (0.1437887396694215, 'negligible')
########


### Spring

In [329]:
samples = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic\\frameworkCodeSamples\\Graficos\\BoxplotRazaoDosMantenedoresEmComum\\novos\\comAnonimos\\spring.csv", usecols=[6], decimal=",")

In [330]:
general_projects = pd.read_csv("C:\\Users\\Gabriel\\Documents\\gabrielsmenezes\\ic2\\analiseDosProjetosGerais\\RQ4\\mantenedores\\spring.csv", usecols=[3,4])
general_projects["common/sample"] = general_projects["contribuidores_comum"]/general_projects["contribuidores_do_projeto"]

In [331]:
analyze(samples["common/sample"], general_projects["common/sample"])

Diferenca significante
p-value 0.0000000000610239
effect size: (0.9778393351800554, 'large')
########
