In [None]:
!pip install --upgrade numpy prometheus-api-client scikit-learn torch urllib3

In [None]:
from copy                        import deepcopy
from datetime                    import timedelta
from numpy                       import flip
from prometheus_api_client       import MetricSnapshotDataFrame, PrometheusConnect
from prometheus_api_client.utils import parse_datetime
from urllib3                     import disable_warnings
from torch                       import tensor
from torch.utils.data            import Dataset
from sklearn.preprocessing       import MinMaxScaler

disable_warnings()

In [None]:
PROMETHEUS_URL   = '<PROMETHEUS_URL>'
PROMETHEUS_TOKEN = '<PROMETHEUS_TOKEN>'

In [None]:
prometheus_connect = PrometheusConnect(
    url         = PROMETHEUS_URL,
    headers     = { 'Authorization' : f'bearer { PROMETHEUS_TOKEN }' },
    disable_ssl = True
)

In [None]:
prometheus_connect.all_metrics()[:10]

In [None]:
metric_name = 'pod:container_cpu_usage:sum'
start_time  = parse_datetime('1h')
end_time    = parse_datetime('now')
chunk_size  = timedelta(minutes = 1)

label_config = {
    'prometheus' : 'openshift-monitoring/k8s',
    'namespace'  : '<namespace>'
}

metric_data = prometheus_connect.get_metric_range_data(
    metric_name  = metric_name,
    label_config = label_config,
    start_time   = start_time,
    end_time     = end_time,
    chunk_size   = chunk_size
)

metric_data = MetricSnapshotDataFrame(metric_data)
metric_data

In [None]:
len(metric_data)

In [None]:
def transform_and_normalize(metric_data, lookback):

    metric_data = deepcopy(metric_data)
    metric_data = metric_data[['timestamp', 'value']]

    for index in range(1, lookback + 1):

        metric_data[f't - {index}'] = metric_data['value'].shift(index)

    metric_data.set_index('timestamp', inplace = True)
    metric_data.dropna(inplace = True)

    return metric_data

In [None]:
lookback = 4

metric_data = transform_and_normalize(metric_data, lookback)
metric_data

In [None]:
metric_data = metric_data.to_numpy()
metric_data

In [None]:
metric_data = MinMaxScaler(feature_range = (-1, 1)).fit_transform(metric_data)
metric_data

In [None]:
X = metric_data[:, 1:]
X = deepcopy(flip(X, axis = 1))

split_index = int(len(X) * 0.75)

X_train = X[:split_index]
X_train = X_train.reshape((-1, lookback, 1))
X_train = tensor(X_train).float()

X_test = X[split_index:]
X_test = X_test.reshape((-1, lookback, 1))
X_test = tensor(X_test).float()

y = metric_data[:, 0]

y_train = y[:split_index]
y_train = y_train.reshape((-1, 1))
y_train = tensor(y_train).float()

y_test = y[split_index:]
y_test = y_test.reshape((-1, 1))
y_test = tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
class TimeSeriesDataset(Dataset):

    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __len__(self):

        return len(self.X)

    def __getitem__(self, index):

        return self.X[index], self.y[index]

In [None]:
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset  = TimeSeriesDataset(X_test, y_test)