In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
os.chdir("/content/drive/MyDrive/VEM2021-Matheus")

In [4]:
import pandas as pd

In [7]:
android = pd.read_csv("1-ExtrairDadosDePR/android.csv")
aws = pd.read_csv("1-ExtrairDadosDePR/aws.csv")
azure = pd.read_csv("1-ExtrairDadosDePR/azure.csv")
spring = pd.read_csv("1-ExtrairDadosDePR/spring.csv")

In [8]:
all = pd.concat([android, aws, azure, spring], ignore_index=True)

## Number of Code Samples

In [9]:
android["sample"].value_counts()

googlesamples/easypermissions                                           114
googlesamples/google-services                                           101
googlesamples/mlkit                                                      57
googlesamples/unity-jar-resolver                                         57
googlearchive/androidtv-Leanback                                         51
                                                                       ... 
googlearchive/android-RepeatingAlarm                                      1
googlearchive/android-ui-toolkit-demos                                    1
googlearchive/android-RuntimePermissionsBasic                             1
googlearchive/solutions-apache-hive-and-pig-on-google-compute-engine      1
googlesamples/maps-deckgl-scatterplot-example                             1
Name: sample, Length: 204, dtype: int64

In [None]:
aws["sample"].value_counts()

aws-samples/aws-greengrass-lambda-functions       4310
aws-samples/iot-reference-architectures           2489
aws-samples/eks-workshop                           855
aws-samples/aws-secure-environment-accelerator     629
aws-samples/aws-reinvent-2019-trivia-game          400
                                                  ... 
aws-samples/jsii-native-python                       1
aws-samples/kda-flink-app-autoscaling                1
aws-samples/eb-wif-sample                            1
aws-samples/amazon-congnito-user-pool-exporter       1
aws-samples/aws-mnpbatch-template                    1
Name: sample, Length: 1221, dtype: int64

In [None]:
azure["sample"].value_counts()

Azure-Samples/MyDriving                                                             413
Azure-Samples/openhack-devops-proctor                                               320
Azure-Samples/Cognitive-Services-Voice-Assistant                                    308
Azure-Samples/azure-cli-samples                                                     294
Azure-Samples/openhack-devops-team                                                  269
                                                                                   ... 
Azure-Samples/batchai-dotnet-run-batchai-job                                          1
Azure-Samples/app-service-java-cosmosdb-mongodb-apis                                  1
Azure-Samples/app-service-dotnet-configure-deployment-sources-for-web-apps-async      1
Azure-Samples/iot-edge-industrial-configs                                             1
Azure-Samples/app-service-java-manage-authentication-for-web-apps                     1
Name: sample, Length: 1386, dtyp

In [None]:
spring["sample"].value_counts()

spring-guides/tut-spring-security-and-angular-js    76
spring-guides/gs-spring-boot                        76
spring-guides/gs-crud-with-vaadin                   74
spring-guides/gs-rest-service                       64
spring-guides/tut-spring-boot-oauth2                48
                                                    ..
spring-guides/gs-spring-cloud-task                   1
spring-guides/tut-spring-webflux-kotlin-rsocket      1
spring-guides/gs-spring-boot-kubernetes              1
spring-guides/gs-cloud-circuit-breaker               1
spring-guides/spring-guides-ci                       1
Name: sample, Length: 83, dtype: int64

In [14]:
all["sample"].value_counts()

aws-samples/aws-greengrass-lambda-functions         4310
aws-samples/iot-reference-architectures             2489
aws-samples/eks-workshop                             855
aws-samples/aws-secure-environment-accelerator       629
Azure-Samples/MyDriving                              413
                                                    ... 
Azure-Samples/hdinsight-diagnostic-scripts             1
Azure-Samples/virtual-machines-powershell-create       1
aws-samples/aws-hpc-workshops                          1
aws-samples/vpn-gateway-strongswan                     1
aws-samples/aws-application-auto-scaling-kinesis       1
Name: sample, Length: 2894, dtype: int64

## Analysis at Pull Request level

### Pulls Open X Pulls Closed

In [13]:
android["status"].value_counts()

closed    1491
open       166
Name: status, dtype: int64

In [None]:
aws["status"].value_counts()

closed    22785
open       1410
Name: status, dtype: int64

In [None]:
azure["status"].value_counts()

closed    13828
open       1685
Name: status, dtype: int64

In [None]:
spring["status"].value_counts()

closed    1427
open       134
Name: status, dtype: int64

In [15]:
all["status"].value_counts()

closed    39531
open       3395
Name: status, dtype: int64

### Pulls Merged x Not Merged

In [16]:
android_closed = android[android["status"] == "closed"]
aws_closed = aws[aws["status"] == "closed"]
azure_closed = azure[azure["status"] == "closed"]
spring_closed = spring[spring["status"] == "closed"]
all_closed = all[all["status"] == "closed"]

In [17]:
android_closed["merged"].value_counts()

True     956
False    535
Name: merged, dtype: int64

In [None]:
aws_closed["merged"].value_counts()

True     15510
False     7275
Name: merged, dtype: int64

In [None]:
azure_closed["merged"].value_counts()

True     11855
False     1973
Name: merged, dtype: int64

In [None]:
spring_closed["merged"].value_counts()

True     952
False    475
Name: merged, dtype: int64

In [18]:
all_closed["merged"].value_counts()

True     29273
False    10258
Name: merged, dtype: int64

## Analysis at code sample level

In [19]:
def set_index(dataframe):
  dataframe.set_index(dataframe["sample"], inplace=True)
  del dataframe["sample"]

In [20]:
set_index(android)
set_index(aws)
set_index(azure)
set_index(spring)

In [21]:
def calculate_stats(dataframe):
  for sample in dataframe.index.unique():
    aux = dataframe.loc[[sample]]
    dataframe.loc[sample, "closed_count"] = aux[aux["status"] == "closed"]["framework"].count()
    dataframe.loc[sample, "open_count"] = aux[aux["status"] == "open"]["framework"].count()
    dataframe.loc[sample, "merged_count"] = aux[aux["merged"] == True]["framework"].count()
    dataframe.loc[sample, "not_merged_count"] = aux[aux["merged"] == True]["framework"].count()

In [22]:
calculate_stats(android)
calculate_stats(aws)
calculate_stats(azure)
calculate_stats(spring)

In [23]:
def aggregate(dataframe):
  dataframe = dataframe.groupby(by="sample").mean()
  del dataframe["merge user cargo"]
  del dataframe["merged"]
  return dataframe

In [24]:
android = aggregate(android)

In [None]:
aws = aggregate(aws)

In [None]:
azure = aggregate(azure)

In [None]:
spring = aggregate(spring)

In [25]:
android.to_csv("2-AgregarDadosPorCodeSample/android.csv")

In [None]:
aws.to_csv("2-AgregarDadosPorCodeSample/aws.csv")

In [None]:
azure.to_csv("2-AgregarDadosPorCodeSample/azure.csv")

In [None]:
spring.to_csv("2-AgregarDadosPorCodeSample/spring.csv")