In [3]:
import yaml
import requests
import os
from pathlib import Path

## Getting the data

Simply loading the data from the repo and load the yaml in memory. There are two version of the landscape. One light version with the crunchbase link and one with all the data extracted.

In [4]:
landscape_raw = requests.get("https://raw.githubusercontent.com/cncf/landscape/master/processed_landscape.yml")

In [5]:
landscape = yaml.safe_load(landscape_raw.content)['landscape']

## Parsing the categories

The file is quite big it take 30s to laod an loop. To make future processing faster I will break it down as small as possible. One folder per weeks plus one subfolder per sucategory.

At the same time it is nice to have some index to explore what is out there.

In [6]:
def make_path(c, s):
    """Sanitize category and subcategory name to make them viable folder names"""
    return (c+ "_" +s).lower() \
            .replace(" & ", "_") \
            .replace(" ", "_") \
            .replace("_-","") \
            .replace(",","") \
            .replace("/", "_")

In [7]:
categories = { c['name']: {sub['name']: make_path(c['name'], sub['name'])
    for sub in c ['subcategories']} 
for c in landscape }

categories

{'Provisioning': {'Automation & Configuration': 'provisioning_automation_configuration',
  'Container Registry': 'provisioning_container_registry',
  'Security & Compliance': 'provisioning_security_compliance',
  'Key Management': 'provisioning_key_management'},
 'Runtime': {'Cloud Native Storage': 'runtime_cloud_native_storage',
  'Container Runtime': 'runtime_container_runtime',
  'Cloud Native Network': 'runtime_cloud_native_network'},
 'Orchestration & Management': {'Scheduling & Orchestration': 'orchestration_management_scheduling_orchestration',
  'Coordination & Service Discovery': 'orchestration_management_coordination_service_discovery',
  'Remote Procedure Call': 'orchestration_management_remote_procedure_call',
  'Service Proxy': 'orchestration_management_service_proxy',
  'API Gateway': 'orchestration_management_api_gateway',
  'Service Mesh': 'orchestration_management_service_mesh'},
 'App Definition and Development': {'Database': 'app_definition_and_development_database',

In [8]:
with open("../data/category_index.yaml", 'w+') as file:
    documents = yaml.dump(categories, file)

In [10]:
items = { c['name']: {sub['name']: [item['name']
        for item in sub['items']]
    for sub in c['subcategories']} 
for c in landscape }

In [11]:
with open("../data/category_item_index.yaml", 'w+') as file:
    documents = yaml.dump(items, file)

## A-to-Z split of the landscape

In [9]:
def get_only_letter(x: str, landscape: list):
    """Give os the letter we want, not best performance but does the job"""
    return { make_path(c['name'], sub['name']): [item
            for item in sub['items'] if item['name'].startswith(x)]
        for c in landscape for sub in c['subcategories']}

In [42]:
for letter in range(ord('A'), ord('Z')+1):
    index = letter - ord('A')
    partial = get_only_letter(chr(letter), landscape)

    for key in partial:
        
        path = Path(f'../data/week_{str(index).zfill(2)}_{chr(letter)}')
        path.mkdir(parents=True, exist_ok=True)
        path = path.joinpath(f"{key}.yaml")

        with open(path, 'w+') as file:
            documents = yaml.dump(partial[key], file)
