__Loading the data into a panel format with a MultiIndexed DataFrame__

In [2]:
import sktime
import pandas as pd
import numpy as np

def make_datetime_index(timestamps:str) -> pd.DatetimeIndex:
    beginning = timestamps[0]
    end = timestamps[-1]
    beginning, end = pd.to_datetime((beginning,end), unit="s")
    index = pd.date_range(start=beginning, end=end, periods=len(timestamps))
    return index

def readcsv_modified(csv_loc:str):
    csv = pd.read_csv(csv_loc)
    metrics = csv["identifier"].to_list()
    timestamps = csv.columns[1:].to_flat_index()
    timestamps = timestamps.to_numpy().tolist()
    timestamps = make_datetime_index(timestamps)
   # index = pd.MultiIndex.from_product([[num], timestamps], names=['instances','timepoints'])
    vals = csv.drop(labels="identifier",axis=1).to_numpy().transpose()
    s = pd.DataFrame(vals, index=timestamps, columns=metrics)
    return s

def removeNaNs(df:pd.DataFrame) -> pd.DataFrame:
    for col in df.columns:
        if df[col].isna().any():
            df = df.drop(col, axis=1)
    return df

def removeUniqueColumns(first:pd.DataFrame, second:pd.DataFrame) -> tuple:
    common_columns = first.columns.intersection(second.columns)
    reduced_old = first.reindex(columns=common_columns)
    reduced_new = second.reindex(columns=common_columns)
    return reduced_old, reduced_new

def readcsvs(csv_loc_list:list):
    individual_dataframes = []
    for i in range(len(csv_loc_list)):
        individual_dataframes.append(readcsv_modified(csv_loc_list[i]))

    #Here: Go through the loop once again, start trimming. compare everything to element at 0, trim with it so it stays as the leanest version.

    removed_nans = []
    for frame in individual_dataframes:
        removed_nans.append(removeNaNs(frame))

    initial_df = removed_nans[0]
    removed_unique_cols = []
    for frame in removed_nans[1:]:
        reduced_frames = removeUniqueColumns(initial_df, frame)
        initial_df = reduced_frames[0]
        removed_unique_cols.append(reduced_frames[1])
    removed_unique_cols.append(initial_df)

    concated = pd.concat(removed_unique_cols, keys=[f'csv {i}' for i in range(1, len(removed_unique_cols)+1)])

    concated.index = pd.MultiIndex.from_tuples([(idx, date) for idx, date in zip(concated.index.get_level_values(0), concated.index.get_level_values(1))], names=['files','times'])

    return concated



__Resulting dataframe, with some info about it__

In [6]:
file_list = ["./catalog.csv",
             "./catalog2.csv",
             "./even_load.csv"]
complete_df = readcsvs(file_list)
print(complete_df.shape)
print(complete_df.values.size)
complete_df

(399, 586)
233814


Unnamed: 0_level_0,Unnamed: 1_level_0,go_gc_duration_seconds&catalogue:80&catalogue&0,go_gc_duration_seconds&catalogue:80&catalogue&0.25,go_gc_duration_seconds&catalogue:80&catalogue&0.5,go_gc_duration_seconds&catalogue:80&catalogue&0.75,go_gc_duration_seconds&catalogue:80&catalogue&1,go_gc_duration_seconds&payment:80&payment&0,go_gc_duration_seconds&payment:80&payment&0.25,go_gc_duration_seconds&payment:80&payment&0.5,go_gc_duration_seconds&payment:80&payment&0.75,go_gc_duration_seconds&payment:80&payment&1,...,up&edge-router:80&frontend,up&nodeexporter:9100&node-exporter,up&orders:80&orders,up&payment:80&payment,up&queue-master:80&queue-master,up&shipping:80&shipping,up&user:80&user,uptime&carts:80&cart,uptime&orders:80&orders,uptime&shipping:80&shipping
files,times,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
csv 1,2023-01-18 17:11:01,0.000316,0.000352,0.000373,0.000413,0.012802,0.00028,0.000329,0.000342,0.000359,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,24606819.0,24608476.0,24608582.0
csv 1,2023-01-18 17:11:06,0.000316,0.000352,0.000373,0.000413,0.012802,0.00028,0.000329,0.000341,0.000359,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,24611818.0,24613476.0,24613583.0
csv 1,2023-01-18 17:11:11,0.000316,0.000352,0.000373,0.000413,0.012802,0.00028,0.000329,0.000341,0.000359,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,24616819.0,24618477.0,24618583.0
csv 1,2023-01-18 17:11:16,0.000316,0.000352,0.000373,0.000413,0.012802,0.00028,0.000329,0.000341,0.000359,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,24621818.0,24623477.0,24623583.0
csv 1,2023-01-18 17:11:21,0.000316,0.000352,0.000373,0.000413,0.012802,0.00028,0.000329,0.000342,0.000359,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,24626819.0,24628477.0,24628583.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
csv 3,2023-01-18 17:31:59,0.000316,0.000352,0.000375,0.000413,0.012802,0.00028,0.000328,0.000342,0.000358,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,25861819.0,25863476.0,25868583.0
csv 3,2023-01-18 17:32:04,0.000316,0.000352,0.000375,0.000413,0.012802,0.00028,0.000328,0.000342,0.000358,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,25866819.0,25868477.0,25873583.0
csv 3,2023-01-18 17:32:09,0.000316,0.000352,0.000375,0.000413,0.012802,0.00028,0.000328,0.000342,0.000358,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,25871819.0,25873477.0,25878583.0
csv 3,2023-01-18 17:32:14,0.000316,0.000352,0.000375,0.000413,0.012802,0.00028,0.000328,0.000342,0.000358,0.000521,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,25876819.0,25878476.0,25883583.0


__Rocket fails__

In [4]:
from sktime.transformations.panel.rocket import Rocket

rocket = Rocket()
rocket.fit(complete_df)

ValueError: cannot reshape array of size 233814 into shape (3,399,586)