## Distribution Monitoring Data Retrieval

This Jupyter Notebook can be used to extract metrics for monitoring purposes, mainly based on the PackageDistro repository from the gap-system organisation on GitHub. As the distribution of GAP is managed through this repository, pulling and analysing data from its current release workflows can be useful in obtaining an overview for the purpose of redistribution. The PackageDistro repository is automatically accessing information on GAP package distribution, as it contains metadata from all the GAP packages. 

In [None]:
# Import required modules and libraries
import os
import sys
import json
import requests
from datetime import datetime
from github import RateLimitExceededException

# Get current working directory and append parent directory for module imports
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
sys.path.append(parent_dir)

# Import modules from other project scripts
from data_constants import *


### Updates to Package Distribution

To check for detailed updates to packages distributed under GAP, the PackageDistro repository will scan for updates in GAP packages hosted GitHub. As such, extracting, analysing and combining this information can provide data what packages could and should be considered in a new release of the system. By comparing current versions to what is in the repository, the user can obtain some predictions as to what the next released GAP version will look like. Running the script will export the data to a 'monitoring_data.json' file in the 'collected_data' folder, displaying the results of the generated data per package.

In [None]:
# Define global variables for the Jupyter Notebook
org = g.get_organization(ORG_NAME_SYSTEM)
repo = org.get_repo(DISTRO_REPO)
labels = ["automatic pr", "new package", "update package"]


##### Functions to Retrieve Monitoring Metrics

In [None]:
def get_latest_release() -> tuple:
    """Get the latest release information for GAP and its associated commit,
    based on the PackageDistro repository in the GAP-system organization on GitHub.

    Args:
        None.

    Returns:
        tuple: The latest release version of GAP and its associated commit.

    Raises:
        requests.exceptions.RequestException: If there is an error making the HTTP request.
    """
    repo_url = "https://api.github.com/repos/gap-system/PackageDistro/releases/latest"

    try:
        response = requests.get(repo_url)
        response.raise_for_status()
        latest_release = response.json()
        latest_version = latest_release.get("name")
        version_commit = latest_release.get("target_commitish")
        return latest_version, version_commit
    except requests.exceptions.RequestException as e:
        raise requests.exceptions.RequestException(f"Error retrieving latest release: {str(e)}")
    

In [None]:
def get_version_from_meta(meta_json_url: str) -> str:
    """Get the listed version from a meta.json file.

    Args:
        meta_json_url (str): The URL to the meta.json file.

    Returns:
        str: The version listed in the meta.json file.
    """
    response = requests.get(meta_json_url)
    meta_json = response.json()
    version = meta_json.get("Version")
    return version


In [None]:
def get_meta(branch: str) -> list:
    """Get all meta.json files and versions based on the branch.

    Args:
        branch (str): The branch name to retrieve meta.json files from.

    Returns:
        list: List of tuples containing package names and their corresponding versions.
    """
    api_url = f"https://api.github.com/repos/gap-system/PackageDistro/contents/packages?ref={branch}"
    response = requests.get(api_url)
    package_folders = response.json()

    meta_json_data = []
    for folder in package_folders:
        if folder.get("type") == "dir":
            package_name = folder.get("name")
            meta_json_url = f"https://raw.githubusercontent.com/gap-system/PackageDistro/{branch}/packages/{package_name}/meta.json"
            version = get_version_from_meta(meta_json_url)
            meta_json_data.append((package_name, version))

    return meta_json_data


In [None]:
def get_open_pull_requests(labels: list = []) -> list:
    """Retrieve open pull requests with specified labels.

    Args:
        labels (list, optional): Labels to filter pull requests by. Defaults to an empty list.

    Returns:
        list: Pull requests matching the specified labels.

    Raises:
        requests.exceptions.RequestException: If there is an error making the HTTP request.
    """
    api_url = "https://api.github.com/repos/gap-system/PackageDistro/pulls"

    params = {
        "state": "open",
        "labels": ",".join(labels)
    }

    response = requests.get(api_url, params=params)
    pull_requests = response.json()
    return pull_requests


In [None]:
def export_monitoring_data() -> None:
    """Export the monitoring data to a JSON file, while instructing the program to sleep for the
    duration of the time it takes for the GitHub API calls limit to reset in the event that it runs out.

    Args:
        None.
        
    Returns:
        None.
    """
    while True:
        try:
            # Get lastest release for GAP, its commit and meta.json files and versions for the latest release
            latest_meta = get_meta(get_latest_release()[1])

            # Get meta.json files and versions for the main branch
            main_meta = get_meta("main")

            # Compare versions and print package names if they are different
            # For the packages with different version, the package in the main branch will be the new version in the next release
            packages_with_different_versions = []
            for latest_package, latest_version in latest_meta:
                for main_package, main_version in main_meta:
                    if latest_package == main_package and latest_version != main_version:
                        packages_with_different_versions.append({
                            'package_name': latest_package,
                            'latest_version': latest_version,
                            'main_branch_version': main_version
                        })

            # Get all packages that might be in the next release based on open PRs, regardless of labels
            all_pull_requests = get_open_pull_requests()
            all_package_names = {pr["head"]["ref"].split("/")[1] for pr in all_pull_requests}
            all_maybe_next = list(all_package_names)

            # Find the packages in unmerged PRs, as these may be in the next release but have not yet been merged
            # Only retrieve PRs with specified labels and extract the package names, as these labels indicate release relation
            labels_pull_requests = get_open_pull_requests(labels)
            labels_package_names = {pr["head"]["ref"].split("/")[1] for pr in labels_pull_requests}
            in_latest_release_and_maybe_next = list(labels_package_names)

            data_folder = "collected_data"
            data = {
                'packages_with_different_versions': packages_with_different_versions,
                'all_previous_and_maybe_next': all_maybe_next,
                'previous_and_maybe_next_labels': in_latest_release_and_maybe_next
            }

            file_path = os.path.join(data_folder, "monitoring_data.json")

            with open(file_path, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False, indent=4)

            print("Distro data has been exported to the 'monitoring_data' file in the 'collected_data' folder.")
            break

        except RateLimitExceededException:
            remaining_requests, _ = g.rate_limiting
            reset_time = g.rate_limiting_resettime
            if remaining_requests < 100:
                wait_until_reset(reset_time)


##### Get and Export Monitoring Metrics

In [None]:
# Call the function to export the data
export_monitoring_data()
