In [None]:
#!pip install kubernetes
#!pip install pandas

In [None]:
from kubernetes import client, config
from kubernetes.stream import stream
import pandas as pd
import re
import subprocess
import requests
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
# Deshabilita alertas
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Cargar kubeconfig
config.load_kube_config("kubeconfig.yaml")
core_v1_api = client.CoreV1Api()

cpd_instance_url = 'https://cpd-cpd.apps.cp4d-education-install.cp.fyre.ibm.com'  # Reemplazar
username = 'jlara'  # Reemplazar
api_key = '4JpUDZ3dB3H8XnUlQHvlrVyqCPccZqCVaMCjJJHh'  # Reemplazar
namespace = "cpd"  # Reemplazar
label_selector = "app=asset-files-api"
base_paths = [
    "/mnt/asset_file_api/projects",
    "/mnt/asset_file_api/spaces"
]
# Temporales y salida final
output_file_intermediate = "file_sizes.csv"
output_file_with_names = "file_sizes_with_names.csv"
output_file_final = "data_proyectos.csv"
output_file_space_deployments = "space_deployments.csv"  # For spaces

In [None]:
def is_valid_numeric_id(value):
    """Valida si el id es numerico"""
    return value.isdigit()

In [None]:
# Obtener bearer token
def get_bearer_token():
    auth_url = f"{cpd_instance_url}/icp4d-api/v1/authorize"
    auth_payload = {"username": username, "api_key": api_key}
    auth_headers = {'Content-Type': 'application/json'}

    print(f"Auth URL: {auth_url}")

    try:
        response = requests.post(auth_url, headers=auth_headers, json=auth_payload, verify=False)
        print(f"Status respuesta obtención token: {response.status_code}")
        response.raise_for_status()
        token = response.json().get('token')
        if not token:
            raise ValueError("No se pudo obtener BearerToken")
        return token
    except requests.exceptions.RequestException as e:
        print(f"Error durante autenticación: {e}")
        raise

In [None]:
# Obtener username en base a user_id
def get_username(user_id, bearer_token):
    user_mgmt_url = f"{cpd_instance_url}/usermgmt/v1/usermgmt/user/{user_id}?any_status=true&include_session_info=true"
    headers = {'Authorization': f'Bearer {bearer_token}', 'Accept': 'application/json'}
    response = requests.get(user_mgmt_url, headers=headers, verify=False)
    response.raise_for_status()
    user_data = response.json()
    return user_data.get('username', 'Unknown')

In [None]:
# Obtener nombre de proyecto en base a project_id
def get_project_name(project_id):
    try:
        result = subprocess.run(
            ["./cpdctl", "project", "get", "--project-id", project_id],
            capture_output=True, text=True, check=True
        )
        match = re.search(r"Name:\s+(.+)", result.stdout)
        if match:
            return match.group(1)
    except subprocess.CalledProcessError as e:
        print(f"Error fetching project name for project_id={project_id}: {e}")
    return "Unknown"

In [None]:
# Obtener nombre de space en base a space_id
def get_space_name(space_id):
    try:
        result = subprocess.run(
            ["./cpdctl", "space", "get", "--space-id", space_id],
            capture_output=True, text=True, check=True
        )
        match = re.search(r"Name:\s+(.+)", result.stdout)
        if match:
            return match.group(1).strip()
    except subprocess.CalledProcessError as e:
        print(f"Error fetching space name for space_id={space_id}: {e}")
    return "Unknown"

In [None]:
# Main logic
try:
    # Find the pod
    pods = core_v1_api.list_namespaced_pod(namespace, label_selector=label_selector)
    if not pods.items:
        raise RuntimeError("No asset-files-api pod found.")

    pod_name = pods.items[0].metadata.name
    print(f"Using pod: {pod_name}")

    all_file_details = []
    for path in base_paths:
        print(f"Processing base path: {path}")

        # Command to list files and their sizes
        command = [
            "bash", "-c",
            f"shopt -s nullglob; for d in {path}/*/*/* {path}/*/*/.[!.]*; do if [ -e \"$d\" ]; then du -sh \"$d\"; fi; done"
        ]

        # Ejecutar en pod
        response = stream(
            core_v1_api.connect_get_namespaced_pod_exec,
            pod_name,
            namespace,
            command=command,
            stderr=True, stdin=False, stdout=True, tty=False,
        )

        # Parse response
        for line in response.splitlines():
            match = re.match(r"^(\S+)\s+(.+)$", line)
            if match:
                file_size, file_path = match.groups()
                parts = file_path.split("/")
                if "projects" in path and len(parts) >= 6:
                    project_id = parts[4]
                    user_id = parts[5]
                    if is_valid_numeric_id(user_id):
                        print(f"Appending valid user_id for projects: {user_id}")
                        file_name = parts[-1]
                        all_file_details.append({
                            "type": "project",
                            "id": project_id,
                            "user_id": user_id,
                            "file_name": file_name,
                            "file_size": file_size,
                        })
                    else:
                        print(f"Invalid user_id detected for projects: {user_id}, skipping.")
                elif "spaces" in path and len(parts) >= 5:
                    space_id = parts[4]
                    user_id = parts[5]
                    print(f"appending this user id for spaces: {user_id}")
                    file_name = parts[-1]
                    all_file_details.append({
                        "type": "space",
                        "id": space_id,
                        "user_id": user_id,
                        "file_name": file_name,
                        "file_size": file_size,
                    })

    df = pd.DataFrame(all_file_details)
    print(f"Collected {len(df)} file records.")
    df.to_csv(output_file_intermediate, index=False)
    print(f"Intermediate data saved to {output_file_intermediate}")

    # Add names for projects and spaces
    df["name"] = df.apply(
        lambda row: get_project_name(row["id"]) if row["type"] == "project" else get_space_name(row["id"]),
        axis=1
    )

    # Add usernames
    bearer_token = get_bearer_token()
    df["username"] = df["user_id"].apply(lambda user_id: get_username(user_id, bearer_token))

    # Save final output
    df.to_csv(output_file_final, index=False)
    print(f"Final data saved to {output_file_final}")

except Exception as e:
    print(f"An error occurred: {e}")