In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import random
import docker
import docker.errors
import pandas as pd
import time



print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 3060


In [None]:
# Generate Training Data (Images must be build first, see Makefile)

client = docker.from_env()

def kill_container(container_name):
    container = client.containers.get(container_name)
    container.kill()

# Function to run containers
def run_container(image, container_name, command=None, detach=True):
    try:
        container = client.containers.run(
            image,
            name=container_name,
            command=command,
            detach=detach,
            auto_remove=True,
            cpu_period=100000,
            cpu_quota=10000
        )
        return container
    except docker.errors.ContainerError as e:
        print(f"Error: {e}")
    except docker.errors.ImageNotFound as e:
        print(f"Error: Image not found: {e}")

def collect_stats(containers, duration=60, interval=1):
    stats = []
    for _ in range(int(duration / interval)):
        cpu_total = 0
        cpu_max = 0
        timestamp_total = 0
        for container_name in containers:
            container = client.containers.get(container_name)
            s = container.stats(stream=False)
            cpu_delta = s["cpu_stats"]["cpu_usage"]["total_usage"] - s["precpu_stats"]["cpu_usage"]["total_usage"]
            system_cpu_delta = s["cpu_stats"]["system_cpu_usage"] - s["precpu_stats"]["system_cpu_usage"]
            cpu_percent = (cpu_delta / system_cpu_delta) * len(s["cpu_stats"]["cpu_usage"]["percpu_usage"]) * 100 if system_cpu_delta else 0.0
            cpu_total += cpu_percent
            cpu_max = max(cpu_max, cpu_percent)
            timestamp_total += time.time()

        cpu_percent_mean = cpu_max / len(containers)
        timestamp_mean = timestamp_total / len(containers)
        print("Got stat")
        stats.append({
            "timestamp": timestamp_mean,
            "cpu_mean": cpu_percent_mean,
            "cpu_max": cpu_max
        })
        time.sleep(interval)
    return pd.DataFrame(stats)



def generate_cpu_data(num_containers, duration_s=60):
    image_names = ["cpu_max", "periodic_cpu_spikes", "random_cpu"]
    rand_image_set = [random.choice(image_names) for _ in range (num_containers)]

    # Spin up containers
    containers = []
    for i, image in enumerate(rand_image_set):
        container_name = f"container_{i}"
        print(f"Running container {container_name}")
        run_container(image, container_name)
        containers.append(container_name)
    
    # Collect stats from all the containers
    print("Collecting stats...")
    df = collect_stats(containers, duration_s)
    print("Collected stats")
    
    
    # Convert timestamp to datetime
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit='s')

    # Set timestamp as index (this helps with resampling)
    df.set_index("timestamp", inplace=True)

    # Resample to 1-second intervals (or whatever interval you prefer)
    df_resampled = df.resample('1S').mean()  # Resampling to 1 second

    # Interpolate missing data using linear interpolation
    df_resampled = df_resampled.interpolate(method='linear')

    # Save collected stats to CSV
    df.to_csv("container_stats.csv", index=False)

    # Stop containers
    for container_name in containers:
        print(f"Stopping container {container_name}")
        kill_container(container_name)

generate_cpu_data(num_containers=1, duration_s=60)



Running container container_0
Running container container_1
Running container container_2
Running container container_3
Running container container_4
Collecting stats...
Got stat


KeyboardInterrupt: 