In [2]:
import mlflow
from utils.evaluate import compare_result
mlflow.set_tracking_uri("/Users/Pongpanod.Sa/Desktop/cu/thesis/coding/pun_master_thesis/mlruns")

In [3]:
import numpy as np
import pandas as pd

from mlflow import MlflowClient
from utils.read import read_UCR_dataset_name

def query_runs(filter_string: str, order_by: str = "params.dataset ASC"):
    client = MlflowClient()
    runs = client.search_runs("0", filter_string=filter_string, order_by=[order_by])
    return runs


def query_augmented_result(augment_name, augment_params):
    params_keys = list(augment_params.keys())
    filter_string = (
        "params.augmentation LIKE"
        + '"%'
        + f'\'name\': \'{augment_name}\', '
        +  '\'params\': {'
    )
    for count, key in enumerate(params_keys):
        filter_string += f'\'{key}\': {augment_params[key]}'
        if count != len(params_keys)-1:
            filter_string += ', '

    filter_string += '%"'
    runs = query_runs(filter_string)
    return runs


def query_baseline(dataset: list, model: str = "minirocket"):
    result = {}
    runs = query_runs(f'params.augmentation = "None" AND params.model = "{model}"')
    for i in runs:
        current_data = i.data.params["dataset"]
        if current_data in dataset:
            result[current_data] = i.data.metrics["accuracy"]
    return result


def compare_result(query_list: list):
    datasets = read_UCR_dataset_name()
    variation_table_index = []

    all_variation_acc_list = []
    for i in query_list:
        # Query each augmented variation result
        query_result = query_augmented_result(i["augment_name"], i["augment_params"])
        acc = clean(query_result, datasets)

        all_acc = {}
        for data in acc:
            # Append each augmented result to the result_dict
            all_acc[data] = float(acc[data])

        all_variation_acc_list.append(all_acc)
        variation_table_index.append(f"{i['augment_params']}")

    all_variation_acc_list.append(query_baseline(datasets))
    variation_table_index.append('baseline')
    
    df = pd.DataFrame(all_variation_acc_list, index=variation_table_index).fillna(0.0)

    def highlight_max(s, props=''):
        return np.where(s == np.nanmax(s.values), props, '')

    # df_t = df.T
    df = df.style.apply(highlight_max, props='background-color:darkblue', axis=0)
    return df


def clean(query_result: list, datasets: int=3):
    def filter_dataset(query_result, datasets):
        filtered_runs = []
        for run in query_result:
            if run.data.params["dataset"] in datasets:
                filtered_runs.append(run)
        return filtered_runs

    def clean_duplication(query_result):
        # To implement
        return query_result

    def transfrom(query_result):
        result_dict = {}
        for run in query_result:
            result_dict[run.data.params['dataset']] = "{:.3f}".format(run.data.metrics["accuracy"])
        return result_dict

    def sort_dict(d):
        out = dict()
        for k in sorted(d.keys()):
            if isinstance(d[k], dict):
                out[k] = sort_dict(d[k])
            else:
                out[k] = d[k]
        return out

    filtered_result_list = filter_dataset(query_result, datasets)
    filtered_result_list = clean_duplication(filtered_result_list)
    filtered_result_dict = transfrom(filtered_result_list)
    filtered_result_dict = sort_dict(filtered_result_dict)
    return filtered_result_dict

In [6]:
query_list = []
for i in range(5, 10, 2):
    # query_list.append({"augment_name":"window_slice_concat", "augment_params":{"reduce_ratio": i/10}})
    query_list.append({"augment_name":"window_warp_concat", "augment_params":{"window_ratio": i/10}})

# query_list.append({"augment_name":"window_warp", "augment_params":{"window_ratio": 0.9}})
# query_list.append({"augment_name":"wdba", "augment_params":{"batch_size": 2}})
# query_list = [
#     {"augment_name":"window_warp_wdba", "augment_params":{'window_ratio': 0.1, 'batch_size': 6}},
#     {"augment_name":"window_warp", "augment_params":{"window_ratio": 0.1}},
#     {"augment_name":"wdba", "augment_params":{"batch_size": 6}},
# ]

df = compare_result(query_list)
df

Unnamed: 0,BeetleFly,ECG5000,FiftyWords,Ham,MoteStrain,OliveOil,PhalangesOutlinesCorrect,ScreenType,ShapesAll,CinCECGTorso
{'window_ratio': 0.5},0.825,0.942,0.736,0.686,0.93,0.667,0.792,0.471,0.855,0.0
{'window_ratio': 0.7},0.85,0.943,0.757,0.695,0.915,0.767,0.789,0.476,0.868,0.0
{'window_ratio': 0.9},0.9,0.94,0.819,0.71,0.925,0.883,0.804,0.436,0.907,0.0
baseline,0.9,0.944667,0.83956,0.714286,0.928914,0.933333,0.831002,0.48,0.926667,0.865217


In [54]:
query_list = []

for i in range(1, 6):
    query_list.append({"augment_name":"discriminative_guided_warp", "augment_params":{"batch_size": i}})

df = compare_result(query_list)
df


Unnamed: 0,BeetleFly,CinCECGTorso,ECG5000,FiftyWords,Ham,MoteStrain,OliveOil,PhalangesOutlinesCorrect,ScreenType,ShapesAll
{'batch_size': 1},0.75,0.681,0.939,0.721,0.79,0.908,0.567,0.746,0.493,0.872
{'batch_size': 2},1.0,0.722,0.942,0.725,0.762,0.899,0.567,0.744,0.432,0.89
{'batch_size': 3},0.8,0.687,0.941,0.684,0.771,0.903,0.733,0.747,0.472,0.882
{'batch_size': 4},0.9,0.724,0.942,0.701,0.8,0.904,0.6,0.804,0.448,0.883
{'batch_size': 5},0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
baseline,0.9,0.865217,0.944667,0.83956,0.714286,0.928914,0.933333,0.831002,0.48,0.926667
