Script to extract optimized parameters and generate commands to execute experiments (can be found in ../00_used_for_setup/4-param-optimization)

In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
param_ref = pd.read_csv("tsad-evaluation-results.csv")
param_ref

In [None]:
alg_map = pd.read_csv("algorithm_mapping.csv")
alg_map

In [None]:
unsupervised_algs=["adapad_novelty_svr", "adapad_phasespace_svm", "adapad_ensemble_gi", "adapad_grammarviz3", "adapad_hotsax", "adapad_ts_bitmap", "adapad_norma", "adapad_sand", "adapad_series2graph", "adapad_stamp", "adapad_stomp", "adapad_valmod", "adapad_left_stampi", "adapad_ssa", "adapad_pst", "adapad_numenta_htm", "adapad_subsequence_lof", "adapad_subsequence_if", "adapad_dwt_mlead", "adapad_fft", "adapad_sr", "adapad_s_h_esd", "adapad_dspot", "adapad_arima", "adapad_median_method", "adapad_sarima", "adapad_triple_es", "adapad_pci"]
r_based = ["adapad_valmod", "adapad_stamp", "adapad_stomp", "adapad_pst"]

In [None]:
with open("unsupervised_commands.txt", "w") as f:
    template_unsupervised = "docker run --rm -v $(pwd)/1-data:/data:ro -v $(pwd)/2-results:/results:rw <<alg>>:latest execute-algorithm '{\"executionType\": \"execute\", \"dataInput\": \"/data/<<data_name>>.total.csv\", \"dataOutput\": \"/results/<<alg>>_<<data_name>>_<<i>>.ts\""
    for alg_name in alg_map.ref_name.values:
        try:
            param_trials = param_ref[(param_ref.algorithm == alg_name) & (param_ref.dataset_input_dimensionality == "UNIVARIATE") & 
                                     (param_ref.status == "Status.OK") & (param_ref.error_category == "- OK -")].hyper_params.unique()
            for i in range(len(param_trials)):
                alg_folder = alg_map[alg_map.ref_name == alg_name].folder.values[0]
                #print(alg_folder)
                if "adapad_" + alg_folder not in unsupervised_algs or "adapad_" + alg_folder in r_based:
                    break                
                command = template_unsupervised.replace("<<alg>>", "adapad_" + alg_map[alg_map.ref_name == alg_name].folder.values[0])
                command += ", " + str(param_trials[i])[1:] + "'"
                command = command.replace("<<i>>", f'{i:05d}')
                f.write(command + "\n")
        except:
            pass

In [None]:
with open("supervised_commands_train.txt", "w") as f:
    template_supervised_train = "docker run --rm -v $(pwd)/1-data:/data:ro -v $(pwd)/2-results:/results:rw <<alg>>:latest execute-algorithm '{\"executionType\": \"train\", \"dataInput\": \"/data/<<data_name>>.train.csv\", \"modelOutput\": \"/results/<<alg>>_<<data_name>>_<<i>>.pkl\""
    for alg_name in alg_map.ref_name.values:
        try:
            param_trials = param_ref[(param_ref.algorithm == alg_name) & (param_ref.dataset_input_dimensionality == "UNIVARIATE") & 
                                     (param_ref.status == "Status.OK") & (param_ref.error_category == "- OK -")].hyper_params.unique()
            for i in range(len(param_trials)):
                alg_folder = alg_map[alg_map.ref_name == alg_name].folder.values[0]
                if "adapad_" + alg_folder in unsupervised_algs or "adapad_" + alg_folder in r_based:
                    break
                command = template_supervised_train.replace("<<alg>>", "adapad_" + alg_map[alg_map.ref_name == alg_name].folder.values[0])
                command += ", " + str(param_trials[i])[1:] + "'"
                command = command.replace("<<i>>", f'{i:05d}')
                f.write(command + "\n")
        except:
            pass

In [None]:
with open("supervised_commands_test.txt", "w") as f:
    template_supervised_execute = "docker run --rm -v $(pwd)/1-data:/data:ro -v $(pwd)/2-results:/results:rw <<alg>>:latest execute-algorithm '{\"executionType\": \"execute\", \"dataInput\": \"/data/<<data_name>>.test.csv\", \"modelInput\": \"/results/<<alg>>_<<data_name>>_<<i>>.pkl\", \"dataOutput\": \"/results/<<alg>>_<<data_name>>_<<i>>.ts\""
    for alg_name in alg_map.ref_name.values:
        try:
            param_trials = param_ref[(param_ref.algorithm == alg_name) & (param_ref.dataset_input_dimensionality == "UNIVARIATE") & 
                                     (param_ref.status == "Status.OK") & (param_ref.error_category == "- OK -")].hyper_params.unique()
            for i in range(len(param_trials)):
                alg_folder = alg_map[alg_map.ref_name == alg_name].folder.values[0]
                if "adapad_" + alg_folder in unsupervised_algs or "adapad_" + alg_folder in r_based:
                    break               
                
                command = template_supervised_execute.replace("<<alg>>", "adapad_" + alg_map[alg_map.ref_name == alg_name].folder.values[0])
                #command += ", " + str(param_trials[i])[1:] + "'"
                command = command.replace("<<i>>", f'{i:05d}')+ "}'"
                f.write(command + "\n")
        except:
            pass

In [None]:
with open("unsupervised_r_commands.txt", "w") as f:
    template_unsupervised = "Rscript <<alg>>/algorithm.r '{\"executionType\": \"execute\", \"dataInput\": \"./1-data/<<data_name>>.total.csv\", \"dataOutput\": \"./2-results/<<alg>>_<<data_name>>_<<i>>.ts\""
    for alg_name in alg_map.ref_name.values:
        try:
            param_trials = param_ref[(param_ref.algorithm == alg_name) & (param_ref.dataset_input_dimensionality == "UNIVARIATE") & 
                                     (param_ref.status == "Status.OK") & (param_ref.error_category == "- OK -")].hyper_params.unique()
            for i in range(len(param_trials)):
                alg_folder = alg_map[alg_map.ref_name == alg_name].folder.values[0]
                #print(alg_folder)
                if "adapad_" + alg_folder not in unsupervised_algs or "adapad_" + alg_folder not in r_based:
                    break                
                command = template_unsupervised.replace("<<alg>>", "adapad_" + alg_map[alg_map.ref_name == alg_name].folder.values[0])
                command += ", " + str(param_trials[i])[1:] + "'"
                command = command.replace("<<i>>", f'{i:05d}')
                f.write(command + "\n")
        except:
            pass