# Jupyter notebook pod collection data.

In [1]:
from prometheus_api_client import PrometheusConnect
import datetime
import pandas as pd

## Func

In [2]:
def init_prometheus(prom_url):
    """
        Connect prometheus server.
    """
    return PrometheusConnect(url=prometheus_url, disable_ssl=True)

In [3]:
def cpu_usage(prom, pod_name, pod_namespace, option=0):
    """
        Pod CPU usage (per 5m).

        Return Type : FLOAT .5

        Option
            - option=0 -> All Pod.
            - option=1 -> istio-proxy container.
            - option=2 -> container.
    """
    query = f'rate(container_cpu_usage_seconds_total{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}[5m])'
    sub_query = 'count(node_cpu_seconds_total)-count(node_cpu_seconds_total{mode="idle"})'
    
    data = prom.custom_query(query=query)
    total_data = prom.custom_query(query=sub_query)

    return round((float(data[option]['value'][1]) / float(total_data[0]['value'][1])) * 100, 5)

In [4]:
def mem_usage(prom, pod_name, pod_namespace, option=0):
    """
        Pod Memory usage (per 5m).

        Return Type : FLOAT .5

        Option
            - option=0 -> All Pod.
            - option=1 -> Pod Container.
            - option=2 -> istio-proxy container.
            - option=3 -> container.
    """
    query = f'rate(container_memory_usage_bytes{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}[5m])'
    sub_query = 'node_memory_MemTotal_bytes'

    data = prom.custom_query(query=query)
    total_data = prom.custom_query(query=sub_query)

    return round((float(data[option]['value'][1]) / (float(total_data[0]['value'][1]) + float(total_data[1]['value'][1]) + float(total_data[2]['value'][1]))) * 100, 5)

In [5]:
def pod_status_phase(prom, pod_name, pod_namespace):
    """
        Pod status phase. (Current)
        
        Return Type : STRING
    """
    query = f'kube_pod_status_phase{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    status_column = ['Pending', 'Succeeded', 'Failed', 'Unknown', 'Running']
    status = []
    
    for i in data:
        status.append(int(i['value'][1]))

    return status_column[status.index(1)]

In [6]:
def pod_network_transmit(prom, pod_name, pod_namespace):
    """
       Pod network transmit. (byte -> MB)
       
       Return Type : FLOAT .2
    """
    query = f'rate(container_network_transmit_bytes_total{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}[5m])'
    data = prom.custom_query(query=query)

    return round(float(data[0]['value'][1]) / 1024 / 1024, 2)

In [7]:
def pod_fs_usage(prom, pod_name, pod_namespace):
    """
        Pod file system usage. (byte -> MB)
        
        Return Type : FLOAT .2
    """
    query = f'container_fs_usage_bytes{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return round(float(data[2]['value'][1]) / 1024 / 1024, 2)

In [8]:
def pod_restart_count(prom, pod_name, pod_namespace):
    """
        Pod restart count.

        Return Type : int
    """
    query = f'kube_pod_container_status_restarts_total{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return int(data[1]['value'][1])

In [9]:
def pod_network_packets(prom, pod_name, pod_namespace):
    """
        Pod receive network packets. (per 5m)

        Return Type : Float .4
    """
    query = f'rate(container_network_receive_packets_total{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}[5m])'
    data = prom.custom_query(query=query)

    return round(float(data[0]['value'][1]), 4)

In [10]:
def pod_disk_read_io(prom, pod_name, pod_namespace):
    """
        Pod disk read. (per 5m)

        Return Type : Float .4
    """
    query = f'increase(container_fs_reads_bytes_total{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}[5m])'
    data = prom.custom_query(query=query)

    return round(float(data[0]['value'][1]), 4)

In [11]:
def pod_disk_write_io(prom, pod_name, pod_namespace):
    """
        Pod disk write. (per 5m)

        Return Type : Float .4
    """
    query = f'increase(container_fs_writes_bytes_total{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}[5m])'
    data = prom.custom_query(query=query)

    return round(float(data[0]['value'][1]), 4)

In [12]:
def pod_age(prom, pod_name, pod_namespace):
    """
        Pod age.

        Unit : minute.
        Return Type : INT
    """
    query = f'kube_pod_created{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return int(data[0]['value'][1])//60

In [13]:
def pod_mem_utilization(prom, pod_name, pod_namespace):
    """
        Pod memory utilization.

        Return Type : FLOAT .3
    """
    query = f'container_memory_usage_bytes / container_spec_memory_limit_bytes{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return round(float(data[0]['value'][1]),3)

In [14]:
def pod_memory_limit(prom, pod_name, pod_namespace):
    """
        Pod memory limit. (byte -> MB)

        Return Type : FLOAT .2
    """
    query = f'container_spec_memory_limit_bytes{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return round(float(data[3]['value'][1]) / 1024 / 1024, 2)

In [15]:
def pod_cpu_limit(prom, pod_name, pod_namespace):
    """
        Pod CPU limit. (100,000 µs = 1 CPU)

        Return Type : FLOAT .2
    """
    query = f'container_spec_cpu_quota{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return round(float(data[2]['value'][1])/100000, 2)

In [16]:
def pod_disk_usage(prom, pod_name, pod_namespace):
    """
        Pod disk usage. (

        Return Type : FLOAT .2
    """
    query = f'container_spec_cpu_quota{{pod=\"{pod_name}\", namespace=\"{pod_namespace}\"}}'
    data = prom.custom_query(query=query)

    return round(float(data[2]['value'][1]) / 1024, 2)

## Main

In [None]:
prometheus_url = "URL"
prom = init_prometheus(prometheus_url)


jupyter_pod_info = []

column_name = [
    "timestamp", 
    "pod_cpu_usage", 
    "pod_mem_usage", 
    "pod_name", 
    "namespace",
    "status",
    "network_transmit",
    "fs_usage",
    "pod_restart_count",
    "container_cpu_load",
    "container_mem_load",
    "receive_network_packet",
    "disk_read_io",
    "disk_write_io",
    "pod_age",
    "pod_mem_utilization",
    "pod_mem_limit",
    "pod_cpu_limit",
    "pod_disk_usage"
]

# Test Jupyter pod name, pod namespace -> [pod_name, pod_namespace]
jupyter_pod_name = []
for name in range(10):
    jupyter_pod_name.append([f'test-{name+1}-0','kubeflow-user-example-com'])

# RUN
for pod_count in range(10):
    jupyter_pod_info.append(
        [
            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), # Timestamp. -0
            cpu_usage(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1], 0), # Pod CPU usage. -1
            mem_usage(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1], 0), # Pod Memory usage. -2
            jupyter_pod_name[pod_count][0], # Pod name. -3
            jupyter_pod_name[pod_count][1], # Pod namespace. -4
            pod_status_phase(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod status. -5
            pod_network_transmit(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Network transmit. (MB) -6
            pod_fs_usage(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod file system usage. (MB) -7
            pod_restart_count(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod restart count. -8
            cpu_usage(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1], 2), # Container CPU load. -9
            mem_usage(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1], 3), # Container Memory Load. -10
            pod_network_packets(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Receive network packets per 5m. -11
            pod_disk_read_io(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod disk read I/O per 5m. -12
            pod_disk_write_io(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod disk write I/O per 5m. -13
            pod_age(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod Age. (Unit: minute) -14
            pod_mem_utilization(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod Memory utilization. -15
            pod_memory_limit(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod Memory limit. (MB) -16
            pod_cpu_limit(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]), # Pod CPU limit. (Core) -17
            pod_disk_usage(prom, jupyter_pod_name[pod_count][0], jupyter_pod_name[pod_count][1]) # Pod disk usage. (MB) -18
        ]
    )    

## Save

In [22]:
df = pd.DataFrame(jupyter_pod_info, columns=column_name)
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
save_path = f"data/prometheus_data_{current_time}.csv"
df.to_csv(save_path, index=False)