# Configuration

Settings to be configured per individual. 

TODO: configure these settings outside of the notebook so they don't mess with source control. (environment variables?)

In [None]:
# get an auth token using the steps here: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
# set it in this cell, then delete it to avoid accidentally committing it
authtoken = ''  

In [None]:
import os

resultsDownloadLocation = 'c:\\temp\\testResults'
if (not os.path.exists(resultsDownloadLocation)):
    os.makedirs(resultsDownloadLocation)


# Loading Data

In [None]:
import requests

def getRuns():
    runsResponse = requests.get(
        "https://api.github.com/repos/microsoft/vscode-jupyter/actions/workflows/aggregate-test-results.yml/runs?per_page=30",
        headers={
            "Accept": "application/vnd.github+json",
            "Authorization": f"Bearer {authtoken}",
            },   
    )
    
    if runsResponse.status_code != 200:
        print(f"Error {runsResponse.status_code}")
        raise("Error getting runs")

    print(f"Found {len(runsResponse.json()['workflow_runs'])} runs")

    return runsResponse.json()["workflow_runs"]

runs = getRuns()

In [None]:
from datetime import datetime

alreadyDownloaded = {}
for file in os.listdir(resultsDownloadLocation):
    path = os.path.join(resultsDownloadLocation, file)
    lastModified = datetime.fromtimestamp(os.path.getmtime(path))
    alreadyDownloaded[file] = lastModified

print(f"Already downloaded {len(alreadyDownloaded)} result files, they will be skipped unless there is a newer version")

def shouldDownload(name, timestamp):
    if name in alreadyDownloaded:
        fileDate = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
        if alreadyDownloaded[name] < fileDate:
            alreadyDownloaded[name] = fileDate
            return True
        else:
            return False
    return True
    

In [None]:
import zipfile
import json
import io

def getArtifactData(id):
    testResultsResponse = requests.get(
        f"https://api.github.com/repos/microsoft/vscode-jupyter/actions/artifacts/{id}/zip",
        headers={
            "Accept": "application/vnd.github+json",
            "Authorization": f"Bearer {authtoken}",
        },
    )

    if testResultsResponse.status_code != 200:
        print(f"Error {testResultsResponse.status_code} getting artifact {id}")

    return testResultsResponse.content

def saveResultsFile(zipData, timeStamp):
    with zipfile.ZipFile(io.BytesIO(zipData)) as artifact:
        for name in artifact.namelist():
            print(f'checking {name} at {timeStamp}')
            if shouldDownload(name, timeStamp):
                content = artifact.read(name)
                print(f"    saving {name}")
                with open(f'{resultsDownloadLocation}\\{name}', 'wb') as f:
                    f.write(content) 

print(f"Getting artifacts from {len(runs)} runs")
for run in runs:
    artifactUrl = run["artifacts_url"]
    print(f"Getting artifacts from {artifactUrl} from {run['created_at']}")
    artifactsResponse = requests.get(
        artifactUrl, headers={"Accept": "application/vnd.github+json"}
    )

    artifacts = artifactsResponse.json()["artifacts"]
    for artifact in artifacts:
        rawData = getArtifactData(artifact["id"])
        testRunResults = saveResultsFile(rawData, run["created_at"])

In [None]:
import pandas as pd

testResults = []
for file in os.listdir(resultsDownloadLocation): 
    with open(f'{resultsDownloadLocation}\\{file}', 'r') as f:
        df = pd.read_json(f)
        testResults.append(df)
        
df = pd.concat(testResults)
# strip off the time to help grouping, but keep as datetime type
df["datetime"] = pd.to_datetime(df["date"])
df["date"] = pd.to_datetime(df["date"]).dt.date

df.head()

# Reporting

In [None]:
from datetime import date, timedelta
recentFailures = df[df['date'] > date.today() - timedelta(days=7)]
recentFailures = recentFailures[recentFailures['status'] == 'failed'].dropna()
recentFailures = recentFailures.groupby(['testName']).agg(testName_count=('testName', 'count'))
recentFaiulres = recentFailures.rename(columns={'testName_count': 'failureCount'}, inplace=True)

recentFailures.sort_values(by=['failureCount'], ascending=False).head(20)

In [None]:
import matplotlib.pyplot as plt

testName= 'Cells from python files and the input box are executed in correct order'

testData = df.where(df['testName'] == testName).dropna()
passes = testData.where(testData['status'] == 'passed').dropna()
fails = testData.where(testData['status'] == 'failed').dropna()
successRate = len(passes) / (len(passes) + len(fails))
print(f"'{testName}' failed {len(fails)} times between {testData['date'].min()} and {testData['date'].max()}")
print(f"Success rate: {successRate}")

testData['fail'] = testData['status'] == 'failed'
testData['pass'] = testData['status'] == 'passed'

passfailcounts = testData.groupby(['date']).sum()

passfailcounts.sort_values(by=['date'], ascending=False)

# line chart not working
# ax=testData.plot(kind='line', x='date', y='pass', color='Green')

# ax2=testData.plot(kind='line', x='date', y='fail', secondary_y=True,color='Red', ax=ax)

# ax.set_ylabel('Passes')
# ax2.set_ylabel('Failures')
# plt.tight_layout()
# plt.show()

In [None]:
failures = testData.where(testData['status'] == 'failed').dropna()
failures = failures[['date', 'status', 'scenario', 'runUrl']].sort_values(by=['date'], ascending=False).head(10)

for index, row in failures.iterrows():
    print(f"{row['date']} - {row['scenario']}\n{row['runUrl']}")