In [1]:
import os
import time
import requests
import pandas as pd
import numpy as np
import csv

# SonarQube Script

In [8]:
'''
Checks if the API with the link for the specified project name returns non-zero outputs.

PARAMS: - linkApache: It contains the apache extention in the project file.
        - linkNotApache: It doesn't contain the apache extention in the project file.
        - headers: For the API
'''
def doesExist(linkApache, linkNotApache, headers):

    resultApache = requests.get(linkApache, headers=headers)
    resultNotApache = requests.get(linkNotApache, headers=headers)

    if resultApache.json()['total'] != 0: # The 'apache_' works.
        return linkApache
    elif resultNotApache.json()['total'] != 0: # The non 'apache_' prefix works.
        return linkNotApache
    else: # The project doesn't exist in SQ.
        return ""

In [12]:
'''
Performs the data processing from all Apache projects in SQ that had some activity before 07/19
PARAMS: - token
        - projectfullname: Project name provided by projects API in SQ.
        -  sonarQubeTable: CSV path for results.
'''
def sonarCrawler(token, projectfullname, sonarQubeTable):

    headers = {'Authorization': f'token {token}'}
    page = 1
    issueCount = 0
    projectissuedataitems = []

    # Only issues that got created before 19/07/01 will be considered as that will be the start of the control window.
    linkApache = f"https://sonarcloud.io/api/issues/search?projects=apache_{projectfullname}&createdBefore=2019-07-01"
    linkNotApache = f"https://sonarcloud.io/api/issues/search?projects={projectfullname}&createdBefore=2019-07-01"

    # Checks if the project exists in SQ
    projectLink = doesExist(linkApache, linkNotApache, headers)

    if projectLink == "": # The project doesn't exist in SQ.
        print(f"Project: {projectfullname} doesn't use sonarQube")
        return False

    while 1 == 1:

        # API call for the project's issues through pagination specified in the URL link.
        issueQuery = f"{projectLink}&p={page}"
        theResult = requests.get(issueQuery, headers=headers)
        theItemListPerPage = theResult.json()['issues']

        if len(theItemListPerPage) == 0:
            break
        else:
            print(projectfullname+' '+'page '+str(page))
            for item in theItemListPerPage:
                issueitem = {}
                issueitem['project_name'] = item['project']
                issueitem['issue_id'] = item['key']
                issueitem['issue'] = item['message']
                issueitem['created_at'] = pd.to_datetime(item['creationDate'], utc=True)
                if item['updateDate'] == None:
                    issueitem['updated_at'] = np.NaN
                else:
                    issueitem['updated_at'] = pd.to_datetime(item['updateDate'], utc=True)

                issueitem['issueType'] = item['type']
                issueitem['severity'] = item['severity']

                projectissuedataitems.append(issueitem)
            issueCount += len(theItemListPerPage)
            page += 1

        if page == 101:
            break

    print(f"Total issues scanned: {issueCount} for project {projectfullname}")

    outputFeatures = ['project_name', 'issue_id', 'created_at', 'updated_at', 'issueType', 'severity']

    # For all issues stored for the current project, all its issues will be stored in the disk inside the CSV file.
    for x in projectissuedataitems:
        with open(sonarQubeTable, "a", encoding='utf-8') as file:
            writer = csv.writer(file, delimiter=',')
            writer.writerow([x[y] for y in outputFeatures])

    if page > 1:
        return True

# NOTE: CREATION OF NEW PROJECT ONLY BASED ON THEIR ACTIVITY BEFORE 07-2019

In [10]:
'''
Checks the number of Apache projects in SQ and from its' names it executes the scrapping function sonarCrawler.

PARAMS: - csvFilePath: Path in the disk for the csv file.
        - sonarQubeToken: SQ token.
        - organization: In our case, apache.

NOTE: We do not loop over different pages as APACHE so far only has 418 projects registered in SQ.
'''
def sonarProjectCrawler(csvFilePath, sonarQubeToken, organization):

    token = os.getenv('GITHUB_TOKEN', sonarQubeToken)
    headers = {'Authorization': f'token {token}'}

    # API call for the list of projects.
    request = requests.get(f"https://sonarcloud.io/api/components/search?organization={organization}&qualifiers=TRK&p=1&ps=500", headers=headers)
    requestJson = request.json()

    # Creation of the CSV file.
    features = ['project_name', 'issue_id', 'creation_date', 'lastUpdate_date', 'issueType', 'severity']
    with open(sonarQubeTable, "w", encoding='utf-8') as file:
        writer = csv.writer(file, delimiter=',')
        writer.writerow([x for x in features])

    # Execution of the issue scrapping through the list of projects.
    for apacheProject in requestJson['components']:

        flag = sonarCrawler(token, apacheProject['key'], csvFilePath)
        if flag == True:
            time.sleep(15)

    # Time for all the data to be stored.
    time.sleep(10)
    projectsSQ = pd.read_csv(csvFilePath)
    projectNames = list(projectsSQ.project_name.unique())

    return projectNames

In [13]:
sonarQubeTable = "/home/mikel/Desktop/project1/sonarQubeData/sonarQubeIssueDatesFixed.csv"
sonarQubeToken = ""
organization = 'apache'

# Function call for issue download:
apacheProjects = sonarProjectCrawler(sonarQubeTable, sonarQubeToken, organization)

# So far, this pipeline throws 251 projects that registered some issue activity before 07/19.

Project: commons-statistics doesn't use sonarQube
commons-numbers page 1
Total issues scanned: 14 for project commons-numbers
commons-geometry page 1
Total issues scanned: 7 for project commons-geometry
Project: apache_sling-org-apache-sling-bnd-plugin-headers-parameters-remove doesn't use sonarQube
Project: directory-api-parent doesn't use sonarQube
cxf page 1
cxf page 2
cxf page 3
cxf page 4
cxf page 5
cxf page 6
cxf page 7
cxf page 8
cxf page 9
cxf page 10
cxf page 11
cxf page 12
cxf page 13
cxf page 14
cxf page 15
cxf page 16
cxf page 17
cxf page 18
cxf page 19
cxf page 20
cxf page 21
cxf page 22
cxf page 23
cxf page 24
cxf page 25
cxf page 26
cxf page 27
cxf page 28
cxf page 29
cxf page 30
cxf page 31
cxf page 32
cxf page 33
cxf page 34
cxf page 35
cxf page 36
cxf page 37
cxf page 38
cxf page 39
cxf page 40
cxf page 41
cxf page 42
cxf page 43
cxf page 44
cxf page 45
cxf page 46
cxf page 47
cxf page 48
cxf page 49
cxf page 50
cxf page 51
cxf page 52
cxf page 53
cxf page 54
cxf page