## GAP Data Analytics, Package Actions

This Jupyter Notebook investigates tests, actions and workflows for the GAP packages hosted on GitHub.

In [None]:
# Import required libraries and packages
import os
import sys
import re
import requests
import json
from collections import Counter

# Get current working directory and append parent directory for module imports
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
sys.path.append(parent_dir)

# Import modules from other project scripts
from data_constants import *

In [None]:
# Define repositories that are public for gap-packages organisation on GitHub
org = g.get_organization(ORG_NAME_PACKAGES)
repos = org.get_repos(type="public")

In [None]:
# Get test information based on what is listed in GitHub Actions CI.yml files
count_repos_with_ci_file = 0
version_info = []

for repo in repos:
    repo_name = repo.name

    try:
        # Check if the repository has a .github/workflows directory
        contents = repo.get_contents("")
        has_workflows = any(content.name == ".github" and content.type == "dir" for content in contents)

        if has_workflows:
            workflows_contents = repo.get_contents(".github/workflows")
            if isinstance(workflows_contents, list):
                if any(file.name.lower() == "ci.yml" for file in workflows_contents):
                    count_repos_with_ci_file += 1

                    # Get the contents of the CI file
                    ci_file = next(file for file in workflows_contents if file.name.lower() == "ci.yml")

                    # Extract version numbers from the CI file contents
                    pattern = r"stable-(\d+\.\d+)"
                    ci_file_contents = requests.get(ci_file.download_url).text

                    # Add version info to the list along with the package name
                    matches = re.findall(pattern, ci_file_contents)
                    for version in matches:
                        version_info.append((repo_name, version))

    except Exception as e:
        print(f"Error occurred while analyzing repository '{repo_name}': {str(e)}")

print(f"Number of repositories with 'CI.yml' file: {count_repos_with_ci_file}")

# Print the versions each package was tested on in the CI file
if version_info:
    print("Versions each package was tested on in the 'CI.yml' files:")
    for package, version in version_info:
        print(f"Package: {package} | Version: {version}")
else:
    print("No version info found in 'CI.yml' files.")

In [None]:
# See what versions of GAP the packages are most commonly tested on
if version_info:
    print("Version info found in 'CI.yml' files:")

    # Count the frequency of each version
    version_counts = Counter(version for _, version in version_info)

    # Sort versions based on their frequency in descending order
    sorted_versions = sorted(version_counts.items(), key=lambda x: x[1], reverse=True)

    for version, count in sorted_versions:
        packages = [package for package, ver in version_info if ver == version]
        package_list = ", ".join(packages)
        print(f"Version: {version} | Count: {count} | Packages: {package_list}")
else:
    print("No version info found in 'CI.yml' files.")

In [None]:
# Get test information based on what is listed in the PackageInfo.g files
# Confirm that all repositories with a PackageInfo.g file also has dependencies, as this is where test info would be
count_repos_with_packageinfo = 0
count_packages_with_dependencies = 0

# List to hold the tested GAP versions
gap_versions = []

for repo in repos:
    repo_name = repo.name

    try:
        # Check if the repository has a PackageInfo.g file
        contents = repo.get_contents("", ref="HEAD")
        packageinfo_file = next((file for file in contents if file.name.lower() == "packageinfo.g"), None)
        if packageinfo_file:
            count_repos_with_packageinfo += 1

            # Read the contents of the PackageInfo.g file
            packageinfo_content = packageinfo_file.decoded_content.decode("utf-8")

            # Check if the PackageInfo.g file contains the "Dependencies" section
            if "Dependencies" in packageinfo_content:
                count_packages_with_dependencies += 1

            # Extract the tested GAP version from the PackageInfo.g file
            version_pattern = r'GAP\s+:=\s+"[^"]*?([\d.]+)"'
            version_match = re.search(version_pattern, packageinfo_content)
            if version_match:
                gap_version = version_match.group(1)
                gap_versions.append((repo_name, gap_version))

    except Exception as e:
        pass

print(f"Number of repositories with 'PackageInfo.g' file: {count_repos_with_packageinfo}")
print(f"Number of packages with 'Dependencies' section: {count_packages_with_dependencies}")

if gap_versions:
    print("Tested GAP Versions:")
    for package, version in gap_versions:
        print(f"Package: {package} | Tested GAP Version: {version}")
else:
    print("No Tested GAP Versions found in 'PackageInfo.g' files.")

In [None]:
# Display alternative 1: Export collected data to JSON file to store them for later use and better overview
# Create a dictionary to hold the version testing data
version_testing_data = {}

# Add version info from CI.yml files to the dictionary
for package, version in version_info:
    if package not in version_testing_data:
        version_testing_data[package] = {"CI_Version": [], "PackageInfo_Version": []}
    version_testing_data[package]["CI_Version"].append(version)

# Add GAP version info from PackageInfo.g files to the dictionary
for package, version in gap_versions:
    if package not in version_testing_data:
        version_testing_data[package] = {"CI_Version": [], "PackageInfo_Version": []}
    version_testing_data[package]["PackageInfo_Version"].append(version)

# Define the path for the JSON file
json_path = os.path.join("collected_data", "version_testing.json")

# Write the data to the JSON file
with open(json_path, "w") as json_file:
    json.dump(version_testing_data, json_file, indent=4)

print(f"Version testing data exported to: {json_path}")