In [22]:
import git
import os
import logging

from pathlib import Path


class KubeRank:
    """
    Class to load and parse kubernetes yaml files and produce a graph visualisation

    Attributes:
        repo_url(str): the url of the repo to clone
        repo_file_path(str): the path to save the repository import tensorflow as tf
        yaml_files(list): the kubernetes yaml files parsed

    Methods:
        save_repo

    """

    # Initialise logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    
    # Clear any existing handlers to avoid duplicate logging
    if logger.hasHandlers():
        logger.handlers.clear()
    
    # Create console handler with formatting
    console_handler = logging.StreamHandler()
    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    

    def __init__(self, repo_url: str, repo_file_path: str = "./kube-rank-repo"):
        self.repo_url = repo_url
        self.repo_file_path = repo_file_path
        self.yaml_files = {}

    def save_repo(self):
        """
        Method to save the repository. 

        If the repository exists already, it will return the file path of the repository saved.

        Return:
            git_repository: location of the file path saved
        """
        if not os.path.exists(self.repo_file_path):
            self.logger.info(f"Clone repo from {self.repo_url} to {self.repo_file_path}")
            return git.Repo.clone_from(self.repo_url, to_path=self.repo_file_path)
        else:
            self.logger.info(f"Repo already exists at {self.repo_file_path}")
            return git.Repo(self.repo_file_path)
    

    def get_yaml_files(self):
        """
        Method to return a dictionary of yaml file locations from the repository,
        where the key is the root directory and the value is a list of yaml files in that directory.

        Returns:
            dict: Dictionary mapping directory paths to lists of yaml filenames
        """
        
        self.logger.info(f"Getting yaml files from {self.repo_file_path}")
        count=0
        for root, dirs, files in os.walk(self.repo_file_path):
            yaml_files = []
            for file in files:
                if file.endswith(".yaml") or file.endswith(".yml"):
                    yaml_files.append(file)
                    count+=1
            if yaml_files:  # Only add directories that contain yaml files
                self.yaml_files[root] = yaml_files
        self.logger.info(f"Found {count} yaml files")
        return self.yaml_files
    
    
    




2024-12-05 13:33:20,781 - __main__ - INFO - Getting yaml files from ./kube-rank-repo
2024-12-05 13:33:20,785 - __main__ - INFO - Found 55 yaml files


{'./kube-rank-repo\\config': ['configmap-plaintext.yaml',
  'pod-configmap.yaml',
  'pod-env-metada.yaml',
  'pod-environment-var.yaml'],
 './kube-rank-repo\\crd': ['monkeys.yaml'],
 './kube-rank-repo\\deployment': ['deployment-mysql.yaml',
  'deployment-nginx-serviceaccount.yaml',
  'deployment-nginx.yaml'],
 './kube-rank-repo\\hpa': ['hpa-nginx.yaml'],
 './kube-rank-repo\\job': ['job-affinity.yaml'],
 './kube-rank-repo\\namespace': ['ns-dummy.yaml'],
 './kube-rank-repo\\networksecurity': ['networksecurity-allowall-ingress.yaml',
  'networksecurity-complicated.yaml',
  'networksecurity-deny-othernamespaces.yaml',
  'networksecurity-denyall-ingress.yaml',
  'networksecurity-denyall.yaml',
  'networksecurity-denyegress-exceptdns.yaml',
  'networksecurity-pod.yaml',
  'networksecurity-port.yaml'],
 './kube-rank-repo\\pod': ['pod-dummy.yaml',
  'pod-gitclone.yaml',
  'pod-handlers.yaml',
  'pod-healthcheck-nginx.yaml',
  'pod-hostaliases.yaml',
  'pod-initcontainer-sysctl.yaml',
  'pod-ng