# Cloud Workload Data Exploration
This notebook explores the synthetic workload traces used for training the RL agent.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from workload_generator import WorkloadGenerator

%matplotlib inline
sns.set_theme(style="whitegrid")

## 1. Generate Synthetic Traces
We generate multiple days of data to observe patterns (daily seasonality, noise, random spikes).

In [None]:
gen = WorkloadGenerator()
trace_avg = gen.generate_full_trace(days=3)
trace_busy = gen.generate_sinusoidal(period=1440, amplitude=0.4, baseline=0.6)

plt.figure(figsize=(15, 5))
plt.plot(trace_avg, label='Standard Mix', alpha=0.8)
plt.title("Synthetic Workload Trace (3 Days)")
plt.xlabel("Time (minutes)")
plt.ylabel("Normalized Demand (0-1)")
plt.legend()
plt.show()

## 2. Kernel Density Estimation (KDE)
Analyze the distribution of load values to ensure it covers low, medium, and high demand states.

In [None]:
plt.figure(figsize=(10, 6))
sns.kdeplot(trace_avg, fill=True, label='Workload Density')
plt.title("Demand Distribution (KDE)")
plt.xlabel("Normalized Demand")
plt.show()

## 3. Autocorrelation
Check for temporal dependencies (seasonality).

In [None]:
from pandas.plotting import autocorrelation_plot
plt.figure(figsize=(10, 5))
autocorrelation_plot(pd.Series(trace_avg).iloc[:3000])
plt.title("Autocorrelation of Workload")
plt.show()