In [1]:
import pandas as pd
import json
from mapping import get_technology

EXCLUDED_DIRS = ("docs/", "data/", "lib/", "benchmark/", "annotations/", "examples/")

df = pd.read_csv("../data/technology_composition/project_technologies.csv")

df_sample = df.sample(50)

df_sample.to_csv("../data/technology_composition/project_technologies_sample.csv", index=False)


In [4]:
project_sample = df_sample["project"].tolist()

file_type_files = []

for project in project_sample:
    project_file = f"../data/projects_last_commit/{project}_last_commit.json"
    with open(project_file, "r", encoding="utf-8") as f:
        data = json.load(f)
        commit_data = data["latest_commit_data"]["network_data"]

        for config_file in commit_data["config_file_data"]:
            if config_file["file_path"].startswith(EXCLUDED_DIRS):
                continue

            if config_file["concept"] in ["yaml"]:
                tech = get_technology(config_file["file_path"])
                if not tech:
                    file_type_files.append(config_file["file_path"])

First iteration: 21
Second iteration: 

In [5]:
print(file_type_files)

['artifacthub-repo.yml', 'charts/budibase/Chart.yaml', 'charts/budibase/templates/alb-ingress.yaml', 'charts/budibase/templates/app-service-deployment.yaml', 'charts/budibase/templates/app-service-hpa.yaml', 'charts/budibase/templates/app-service-service.yaml', 'charts/budibase/templates/automation-worker-service-deployment.yaml', 'charts/budibase/templates/automation-worker-service-hpa.yaml', 'charts/budibase/templates/couchdb-backup.yaml', 'charts/budibase/templates/minio-data-persistentvolumeclaim.yaml', 'charts/budibase/templates/minio-service-deployment.yaml', 'charts/budibase/templates/minio-service-service.yaml', 'charts/budibase/templates/proxy-service-deployment.yaml', 'charts/budibase/templates/proxy-service-hpa.yaml', 'charts/budibase/templates/proxy-service-service.yaml', 'charts/budibase/templates/redis-data-persistentvolumeclaim.yaml', 'charts/budibase/templates/redis-service-deployment.yaml', 'charts/budibase/templates/redis-service-service.yaml', 'charts/budibase/templa

In [None]:
from mapping import get_technology

tests = [
    ("packages/vite/tsconfig.base.json", "tsconfig"),
    ("ci/k8s/jobset.yaml", "kubernetes"),
    ("docker-compose.server.example.yml", "docker-compose"),
    ("js/tsconfig.eslint.json", "eslint"),
    (".github/ISSUE_TEMPLATE/bug_report.yaml", "github"),
    ("docs/reference/docker_compose_run.yaml", "docker-compose"),
    ("apps/api/pnpm-lock.yaml", "pnpm"),
    ("apps/redis/fly.toml", "flyio"),
    ("apps/ui/ingestion-ui/package-lock.json", "npm"),
    ("docker-compose.yaml", "docker-compose"),
    ("examples/kubernetes/cluster-install/api.yaml", "kubernetes"),
    (".devcontainer/devcontainer.json", "devcontainer"),
    ( ".eslintrc.json", "eslint"),
    (".github/ISSUE_TEMPLATE/bug_report.yml", "github"),
    (".github/actions/build-electron/action.yml", "github-actions"),
    (".github/dependabot.yml", "dependabot"),
    (".github/ISSUE_TEMPLATE/config.yml", "github"),
    ("apollo-adminservice/src/main/resources/application-zookeeper-discovery.properties", "spring"),
    (".mocharc.yml", "mocha"),
    ("packages/utils/src/utils/defaultSnapshots/chainConfig.json", "fuel"),
    ("deploy/charts/litellm-helm/templates/*.yaml", "helm"),
    ("charts/budibase/templates/*.yaml", "helm"),
    ("helm/minio/Chart.yaml", "helm"),
    ("helm/minio/values.yaml", "helm"),
    ("libs/automation/.verdaccio/config.yml", "verdaccio")
]

results = []

for filename, expected in tests:
    actual = get_technology(filename)
    success = (actual == expected)
    results.append((filename, expected, actual, success))

results

[('packages/vite/tsconfig.base.json', 'tsconfig', 'tsconfig', True),
 ('ci/k8s/jobset.yaml', 'kubernetes', 'kubernetes', True),
 ('docker-compose.server.example.yml',
  'docker-compose',
  'docker compose',
  False),
 ('js/tsconfig.eslint.json', 'eslint', 'eslint', True),
 ('.github/ISSUE_TEMPLATE/bug_report.yaml', 'github', 'github issues', False),
 ('docs/reference/docker_compose_run.yaml',
  'docker-compose',
  'docker compose',
  False),
 ('apps/api/pnpm-lock.yaml', 'pnpm', 'pnpm', True),
 ('apps/redis/fly.toml', 'flyio', 'flyio', True),
 ('apps/ui/ingestion-ui/package-lock.json', 'npm', 'npm', True),
 ('docker-compose.yaml', 'docker-compose', 'docker compose', False),
 ('examples/kubernetes/cluster-install/api.yaml',
  'kubernetes',
  'kubernetes',
  True),
 ('.devcontainer/devcontainer.json', 'devcontainer', 'devcontainer', True),
 ('.eslintrc.json', 'eslint', 'eslint', True),
 ('.github/ISSUE_TEMPLATE/bug_report.yml', 'github', 'github issues', False),
 ('.github/actions/build-e