In [75]:
import pandas as pd
import numpy as np
import torch
import re
from minio import Minio
from minio.error import S3Error
import os

print("pandas: " + pd.__version__)
print("numpy: " + np.__version__)
print("torch: " +torch.__version__)

pandas: 2.2.3
numpy: 1.26.4
torch: 2.8.0+cpu


In [76]:
dataset_path = "/home/jovyan/kpf_sunrise/data/preprocessed-k8s-dataset.csv"
out_path = "/home/jovyan/kpf_sunrise/data/prec-pct-k8s-dataset.csv"

In [77]:
df = pd.read_csv(dataset_path, parse_dates=["timestamp"])
df.set_index("timestamp", inplace=True)

In [78]:
# Keep only *_cpu_pct and *_mem_pct (or *_memory_pct) columns
pattern = re.compile(r"(?:cpu|mem(?:ory)?)_pct$", re.IGNORECASE)
cols_to_keep = [c for c in df.columns if pattern.search(c)]
df_pct = df[cols_to_keep]
# Round to nearest integer and cast to int
df_pct = df_pct.round().astype(int)


In [79]:
# Save new dataset locally
df_pct.to_csv(out_path)
print(f"Saved to {out_path}")

Saved to /home/jovyan/kpf_sunrise/data/prec-pct-k8s-dataset.csv


In [80]:
#upload and stored in Minio (kfp)
#install minio
!pip install minio




In [81]:
# Connect to MinIO
minio_client = Minio(
    "minio-service.kubeflow.svc.cluster.local:9000",
    access_key="minio",
    secret_key="minio123",
    secure=False,
)


In [82]:
# Create the bucket to store the data

bucket_name = "k8s-resources-forecast"

if minio_client.bucket_exists(bucket_name):
    print(f"Bucket \033[1m{bucket_name}\033[0m already exists.\n")
else:
    minio_client.make_bucket(bucket_name)
    print(f"Bucket \033[1m{bucket_name}\033[0m created.\n")

print("\033[1mAvailable buckets:\033[0m")
for b in minio_client.list_buckets():
    print(" * " + b.name)

Bucket [1mk8s-resources-forecast[0m created.

[1mAvailable buckets:[0m
 * k8s-resources-forecast
 * mlpipeline


In [83]:
# Upload the files to Minio


local_folder = "/home/jovyan/kpf_sunrise/data"
bucket_name = "k8s-resources-forecast"
remote_prefix = "data/k8s-preprocessed/"

# Ensure the bucket exists
if not minio_client.bucket_exists(bucket_name):
    minio_client.make_bucket(bucket_name)

# Upload all files in the folder
for fname in os.listdir(local_folder):
    local_path = os.path.join(local_folder, fname)
    if os.path.isfile(local_path):
        remote_path = f"{remote_prefix}{fname}"
        minio_client.fput_object(bucket_name, remote_path, local_path)
        print(f"Uploaded: {fname} -> s3://{bucket_name}/{remote_path}")

print("All files uploaded.")


Uploaded: preprocessed-k8s-dataset.csv -> s3://k8s-resources-forecast/data/k8s-preprocessed/preprocessed-k8s-dataset.csv
Uploaded: prec-pct-k8s-dataset.csv -> s3://k8s-resources-forecast/data/k8s-preprocessed/prec-pct-k8s-dataset.csv
All files uploaded.


# No Run Delete The bucket

In [74]:
bucket_name = "k8s-resources-forecast"

# List all objects in the bucket and remove them
objects_to_delete = minio_client.list_objects(bucket_name, recursive=True)
for obj in objects_to_delete:
    minio_client.remove_object(bucket_name, obj.object_name)

# Now remove the bucket
minio_client.remove_bucket(bucket_name)
print(f"Bucket '{bucket_name}' deleted.")

print("\033[1mAvailable buckets:\033[0m")
for b in minio_client.list_buckets():
    print(" * " + b.name)

Bucket 'k8s-resources-forecast' deleted.
[1mAvailable buckets:[0m
 * mlpipeline
