## GAP Data Analytics, Package Actions

This Jupyter Notebook investigates tests, actions and workflows for the GAP packages hosted on GitHub.

In [None]:
# Import required libraries and packages
import os
import sys
import re
import requests
import json
from collections import Counter

# Get current working directory and append parent directory for module imports
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
sys.path.append(parent_dir)

# Import modules from other project scripts
from data_constants import *

In [None]:
# Define repositories that are public for gap-packages organisation on GitHub
org = g.get_organization(ORG_NAME_PACKAGES)
repos = org.get_repos(type="public")

In [None]:
# Define function to check for a test directory and if it's empty
def check_for_tst_dir(repo):
    tst_dir_exists = False
    tst_dir_empty = False

    contents = repo.get_contents("")  # Get the root directory contents of the repository
    
    for item in contents:
        if item.type == "dir" and item.name == "tst":
            tst_dir_exists = True
            test_contents = repo.get_contents(item.path)
            if len(test_contents) == 0:
                tst_dir_empty = True
            break
    
    return tst_dir_exists, tst_dir_empty

In [None]:
# Define function to count repositories with test directories
def repos_with_tst_dir(repos):
    repositories_with_tests = 0
    for repo in repos:
        test_exists, test_empty = check_for_tst_dir(repo)
        if test_exists:
            repositories_with_tests += 1
    return repositories_with_tests

In [None]:
# Define function to find tst directories with .tst files
def count_tst_files(repos):
    tst_dirs_with_files = 0
    for repo in repos:
        test_exists, test_empty = check_for_tst_dir(repo)
        if test_exists and not test_empty:
            contents = repo.get_contents("tst")  # Get the contents of the tst directory
            tst_file_count = 0  # Counter for .tst files in the tst directory
            for item in contents:
                if item.type == "file" and item.name.endswith(".tst"):
                    tst_file_count += 1
            if tst_file_count > 0:
                tst_dirs_with_files += 1
    return tst_dirs_with_files

In [None]:
# Define function to retrieve version information from CI.yml files
def ci_version_testing(repos):
    repos_with_ci_file = 0
    ci_tested_version = {}
    repos_without_ci_tests = []
    for repo in repos:
        repo_name = repo.name
        try:
            contents = repo.get_contents("")
            has_workflows = any(content.name == ".github" and content.type == "dir" for content in contents)
            if has_workflows:
                workflows_contents = repo.get_contents(".github/workflows")
                if isinstance(workflows_contents, list):
                    if any(file.name.lower() == "ci.yml" for file in workflows_contents):
                        repos_with_ci_file += 1
                        ci_file = next(file for file in workflows_contents if file.name.lower() == "ci.yml")
                        pattern = r"stable-(\d+\.\d+)"
                        ci_file_contents = requests.get(ci_file.download_url).text
                        matches = re.findall(pattern, ci_file_contents)
                        if matches:
                            ci_tested_version[repo_name] = matches
                        else:
                            repos_without_ci_tests.append(repo_name)
        except Exception as e:
            print(f"Error occurred while analyzing repository '{repo_name}': {str(e)}")
    return repos_with_ci_file, ci_tested_version, repos_without_ci_tests


In [None]:
# Define function to retrieve GAP version information from PackageInfo.g files
def pkginfo_version_testing(repos):
    repos_with_pkginfo_file = 0
    repos_with_dependencies = 0
    pkg_tested_version = []
    for repo in repos:
        repo_name = repo.name
        try:
            contents = repo.get_contents("", ref="HEAD")
            pkginfo_file = next((file for file in contents if file.name.lower() == "packageinfo.g"), None)
            if pkginfo_file:
                repos_with_pkginfo_file += 1
                pkginfo_content = pkginfo_file.decoded_content.decode("utf-8")
                if "Dependencies" in pkginfo_content:
                    repos_with_dependencies += 1
                version_pattern = r'GAP\s+:=\s+"[^"]*?([\d.]+)"'
                version_match = re.search(version_pattern, pkginfo_content)
                if version_match:
                    gap_version = version_match.group(1)
                    pkg_tested_version.append((repo_name, gap_version))
        except Exception as e:
            pass
    return repos_with_pkginfo_file, repos_with_dependencies, pkg_tested_version

In [None]:
# Retrieve repositories with test directories count
repos_with_tests = repos_with_tst_dir(repos)
print(f"Total repositories with test directories: {repos_with_tests}")

# Retrieve tst directories with .tst files count
tst_dirs_with_files = count_tst_files(repos)
print(f"Number of tst directories with .tst files: {tst_dirs_with_files}")

# Retrieve CI.yml file information
repos_with_ci_file, ci_tested_version, repos_without_ci_tests = ci_version_testing(repos)
print(f"Number of repositories with CI.yml file: {repos_with_ci_file}")
num_packages_without_tests = len(repos_without_ci_tests)
if num_packages_without_tests > 0:
    print(f"Packages without any test data in their CI.yml files {repos_without_ci_tests}")

# Retrieve GAP version information from PackageInfo.g files
repos_with_pkginfo_file, repos_with_dependencies, pkg_tested_version = pkginfo_version_testing(repos)
print(f"Number of repositories with 'PackageInfo.g' file: {repos_with_pkginfo_file}")
print(f"Number of packages with 'Dependencies' section: {repos_with_dependencies}")

In [None]:
# Create a dictionary to hold the version testing data
version_testing_data = {}

# Add version info from CI.yml files to the dictionary
for package, versions in ci_tested_version.items():
    if versions:
        version_testing_data[package] = {"ci_file_version": versions}

# Add GAP version info from PackageInfo.g files to the dictionary
for package, version in pkg_tested_version:
    if version:
        version_testing_data.setdefault(package, {})["pkginfo_version"] = [version]

# Add test directory presence and .tst file count to the dictionary
for repo in repos:
    test_exists, test_empty = check_for_tst_dir(repo)
    package = repo.name
    version_data = version_testing_data.setdefault(package, {})
    if test_exists and not test_empty:
        contents = repo.get_contents("tst")
        tst_file_count = sum(1 for item in contents if item.type == "file" and item.name.endswith(".tst"))
        version_data["tst_file_count"] = tst_file_count

# Define the path for the JSON file
json_path = os.path.join("collected_data", "testing_data.json")

# Write the data to the JSON file
with open(json_path, "w") as json_file:
    json.dump(version_testing_data, json_file, indent=4)

print(f"Version testing data exported to: {json_path}")