In [5]:
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

PROM_URL = "http://10.35.29.108:30900/api/v1/query_range"

# Worker-only CPU usage (10.35.29.109 and 10.35.29.110) (Only Worker Node)
CPU_QUERY = r'''(sum(rate(node_cpu_seconds_total{mode!="idle", instance=~"10\\.35\\.29\\.(109|110):9100"}[1m])) /
sum(rate(node_cpu_seconds_total{instance=~"10\\.35\\.29\\.(109|110):9100"}[1m]))) * 100'''

# Pending pods
PENDING_QUERY = r'''sum(kube_pod_status_phase{phase="Pending"})'''

# TIME RANGE (THAILAND UTC+7)
TH = timezone(timedelta(hours=7))
start = datetime(2025, 12, 2, 0, 0, 0, tzinfo=TH)
end   = datetime(2025, 12, 6, 0, 0, 0, tzinfo=TH)
start_ts = start.timestamp() # converted automatically to UTC for Prometheus
end_ts   = end.timestamp()

print("Querying between:")
print("Local TH start:", start)
print("Local TH end:  ", end)
print("Unix start:", start_ts)
print("Unix end:  ", end_ts)

def query_prometheus(promql):
    params = {
        "query": promql,
        "start": start_ts,
        "end": end_ts,
        "step": "60s"
    }
    response = requests.get(PROM_URL, params=params).json()

    if len(response["data"]["result"]) == 0:
        print("⚠️ No data returned for query:", promql)
        return []

    return response["data"]["result"][0]["values"]

cpu_data = query_prometheus(CPU_QUERY)
pending_data = query_prometheus(PENDING_QUERY)

df_cpu = pd.DataFrame(cpu_data, columns=["timestamp", "cpu"])
df_cpu["timestamp"] = pd.to_datetime(df_cpu["timestamp"], unit="s", utc=True).dt.tz_convert("Asia/Bangkok")
df_cpu["cpu"] = df_cpu["cpu"].astype(float)

df_pending = pd.DataFrame(pending_data, columns=["timestamp", "pending"])
df_pending["timestamp"] = pd.to_datetime(df_pending["timestamp"], unit="s", utc=True).dt.tz_convert("Asia/Bangkok")
df_pending["pending"] = df_pending["pending"].astype(float)

df = pd.merge(df_cpu, df_pending, on="timestamp", how="inner")

df.to_csv("CPU_Usage_&_Pending_Pod_Dateset.csv", index=False)
print("Saved as CPU_Usage_&_Pending_Pod_Dateset.csv")

df.head()


Querying between:
Local TH start: 2025-12-02 00:00:00+07:00
Local TH end:   2025-12-06 00:00:00+07:00
Unix start: 1764608400.0
Unix end:   1764954000.0
Saved as CPU_Usage_&_Pending_Pod_Dateset.csv


Unnamed: 0,timestamp,cpu,pending
0,2025-12-02 01:01:00+07:00,27.523602,0.0
1,2025-12-02 01:02:00+07:00,26.59026,0.0
2,2025-12-02 01:03:00+07:00,26.521949,0.0
3,2025-12-02 01:04:00+07:00,27.172243,0.0
4,2025-12-02 01:05:00+07:00,34.733194,0.0
