In [None]:
import pandas as pd

Loading lists of samples

In [None]:
androidListSamples = pd.read_csv("..\\..\\1-GettingQuestions\\samplesWithQuestions\\androidSamples.csv")
awsListSamples = pd.read_csv("..\\..\\1-GettingQuestions\\samplesWithQuestions\\awsSamples.csv")
azureListSamples = pd.read_csv("..\\..\\1-GettingQuestions\\samplesWithQuestions\\azureSamples.csv")
springListSamples = pd.read_csv("..\\..\\1-GettingQuestions\\samplesWithQuestions\\springSamples.csv")

In [None]:
def getSampleDataframe(name):
    name = name.replace("/", "\\")
    sample = pd.read_csv("..\\..\\2-ExtractingMetrics\\metrics\\"+name+".csv", parse_dates=True)
    sampleAverage = sample[sample["type"] == "average"]
    sampleAverage["numberJavaFiles"] = sample[sample["type"] == "absolute"]["numberJavaFiles"].values
    sampleAverage["CountDeclClass"] = sample[sample["type"] == "absolute"]["CountDeclClass"].values
    sampleAverage["readability"] = (sampleAverage["readability"] * sampleAverage["numberJavaFiles"]) / sampleAverage["CountDeclClass"]
    sampleAverage['commitDate'] = pd.to_datetime(sampleAverage['commitDate'].astype(str).str[:-6])
    sampleAverage.set_index("commitDate", inplace=True)
    sampleAverage.dropna(axis=1, how="all", inplace=True)
    sampleAverage = sampleAverage.groupby([(sampleAverage.index.year), (sampleAverage.index.month)]).mean()
    generic = pd.DataFrame(index=pd.date_range(start="2013-04-15", end="2020-09-30", freq="M", name="commitDate"))
    generic = generic.groupby([(generic.index.year), (generic.index.month)]).sum()
    return pd.concat([generic, sampleAverage], axis=1).fillna(method='ffill').dropna(how="all")

For each framework, load metrics per samples and group by month

In [None]:
androidSamples = pd.concat([getSampleDataframe(sample) for sample in androidListSamples["path"]], axis=0)
awsSamples = pd.concat([getSampleDataframe(sample) for sample in awsListSamples["path"]], axis=0)
azureSamples = pd.concat([getSampleDataframe(sample) for sample in azureListSamples["path"]], axis=0)
springSamples = pd.concat([getSampleDataframe(sample) for sample in springListSamples["path"]], axis=0)

In [None]:
androidSamples = androidSamples.groupby(level=[0,1]).mean()
awsSamples = awsSamples.groupby(level=[0,1]).mean()
azureSamples = azureSamples.groupby(level=[0,1]).mean()
springSamples = springSamples.groupby(level=[0,1]).mean()

In [None]:
def generateStats(dataframe, framework):
    stats = pd.DataFrame(index = dataframe.min().index)
    stats["min"] = dataframe.min()
    stats["max"] = dataframe.max()
    stats["avg"] = dataframe.mean()
    stats["med"] = dataframe.median()
    stats["std"] = dataframe.std()
    stats.to_csv(framework+"_stats.csv")

Obtaining stats for each framework

In [None]:
Obtaining statsgenerateStats(androidSamples, "android")
generateStats(awsSamples, "aws")
generateStats(azureSamples, "azure")
generateStats(springSamples, "spring")

Loading questions

In [None]:
androidQuestions = pd.read_csv("..\\..\\1-GettingQuestions\\questions\\androidQuestions.csv")
awsQuestions = pd.read_csv("..\\..\\1-GettingQuestions\\questions\\awsQuestions.csv")
azureQuestions = pd.read_csv("..\\..\\1-GettingQuestions\\questions\\azureQuestions.csv")
springQuestions = pd.read_csv("..\\..\\1-GettingQuestions\\questions\\springQuestions.csv")

In [None]:
def groupBy(dataframe):
    dataframe['creationDate'] = pd.to_datetime(dataframe['creationDate'])
    dataframe.set_index(dataframe['creationDate'], inplace=True)
    dataframe["questions"] = 1
    del dataframe['creationDate']
    return dataframe.groupby([(dataframe.index.year), (dataframe.index.month)]).sum()

In [None]:
androidQuestions = groupBy(androidQuestions)
awsQuestions = groupBy(awsQuestions)
azureQuestions = groupBy(azureQuestions)
springQuestions = groupBy(springQuestions)

In [None]:
androidQuestions["questions"]=androidQuestions["questions"].cumsum()
awsQuestions["questions"]=awsQuestions["questions"].cumsum()
azureQuestions["questions"]=azureQuestions["questions"].cumsum()
springQuestions["questions"]=springQuestions["questions"].cumsum()

Merging metrics and questions

In [None]:
def merging(dataframe1, dataframe2):
    df = pd.concat([dataframe1, dataframe2], axis=1)
    df.fillna(method="ffill", inplace=True)
    df.fillna(0, inplace=True)
    return df

In [None]:
android = merging(androidSamples, androidQuestions)
aws = merging(awsSamples, awsQuestions).drop((2020, 9))
azure = merging(azureSamples, azureQuestions).drop((2020, 9))
spring = merging(springSamples, springQuestions).drop((2020, 9))

Saving

In [None]:
def save(dataframe, framework):
    dataframe.to_csv(framework+".csv")

In [None]:
save(android, "android")
save(aws, "aws")
save(azure, "azure")
save(spring, "spring")