# Time Series Processing

In [None]:
import stumpy
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display, HTML

In [None]:
def get_subdirectories(directory=""):
    subdirectories = []
    p = Path("./../experiments_data/" + directory)
    for item in p.glob('*/'):
        if item.suffix not in (['.csv', '.zip']):
            subdirectories.append(directory + "/" + item.name)
    return subdirectories

In [None]:
all_directories = get_subdirectories()
nodes_directories = [x for x in all_directories if "node" in x]

data_directories = []
data_directories_groups = []
for directory in nodes_directories:
    cur_node_subdirectories = get_subdirectories(directory)
    data_directories.append(cur_node_subdirectories)

data_directories_groups = data_directories
data_directories = [item for sublist in data_directories for item in sublist]

function_names = data_directories_groups[1]
function_names = list(map(lambda x: x[8:], data_directories_groups[1]))
function_names

### Aggregating labels by functions names

In [None]:
labels = pd.read_csv("./../notebooks/functions.csv")
labels = labels.groupby('function_name')['label'].apply(', '.join).reset_index()
labels

### Adding symbols (ts1, ts2, ...) to function names

In [None]:
iterator = 1
symbolic_data = pd.DataFrame(columns=["sym", "f_name"])
for function in function_names:
    symbolic_data = symbolic_data.append({"sym": f"ts{iterator}", "f_name": function[1:]}, ignore_index=True)
    iterator += 1

### Merging labels with symbols

In [None]:
print("Legend:")
labeled_data = pd.merge(symbolic_data, labels, left_on='f_name', right_on='function_name', how='left').drop('function_name', axis=1)
labeled_data = labeled_data.sort_values("label")
display(labeled_data)
labeled_data.to_csv(f"./../experiments_data/preprocessed-data/corr_legend.csv")

### Filling missing values with '0'

In [None]:
corr_df = pd.DataFrame()
ts_counter = 1

for function in function_names:
    df_ts = pd.read_csv(f"./../experiments_data/preprocessed-data/workers-mean-data{function}/mean_data.csv")
    corr_df[f"ts{ts_counter}"] = df_ts["CPU"]
    ts_counter += 1

corr_df = corr_df.fillna(0)


### Time series correlation
* Pearson
* Kendall
* Spearman

In [None]:
measures = {"pearson", "kendall", "spearman"}
measures_data = []

for measure in measures:
    df = corr_df.corr(method=measure)
    measures_data.append(df)
    df.to_csv(f"./../experiments_data/preprocessed-data/corr_{measure}.csv")