In [1]:
import pandas as pd
import os
import json


In [2]:
def read_folder(folder_path):
    """
    Opens all folder inside "folder_path" and reads the contents of thoose folders.
    """

    data_list = []

    for folder in os.listdir(folder_path):
        folderPath = folder_path+"/"+folder
        
        for file in os.listdir(folderPath):
            filePath = f"{folderPath}/{file}"
            with open( filePath ) as json_file:
                data = json.load(json_file)
                data["filePath"] = filePath

                data_list.append( data )
    
    return data_list


In [3]:
def dict_to_df(_dict:dict) -> pd.DataFrame:
    """Converts a dictionary to a Dataframe"""
    return pd.DataFrame( dict( zip(list( _dict ), [ [_dict[key]] for key in _dict]) ) )

def split_col_D(df, col,  prefix=None, splitter="_", drop_OG_col=True):
    """
    Splits a dataframe column only contain Dict, into a new one, where each key, in the dict is it's own column.

    Parameters
    ----------
    df: pd.DataFrame
        The DataFrame that contains "col"
    col: str
        The name of the columns, that is going to be split
    prefix: str or None, default None
        The prefix for naming the new columns, if None use "col" as prefix
    splitter: str, default "_"
        The string between prefix and the dict-key when making names for the new columns.
    drop_OG_col: bool. default True
        Wheter or not to drop the original columnm, when returning the df
    
    Returns
    -------
    pd.DataFrame now with a column split into mutiple columns

    Notes
    -----
    The result may contain NaN-values, in cases where not all the dict's contain the same keys.
    """

    if prefix == None: prefix = col    

    new_df = pd.concat( [dict_to_df(d) for d in df[col]] )

    rename_dict = dict(zip( [ col for col in new_df.columns] , [ prefix+splitter+col for col in new_df.columns] ))

    new_df = new_df.rename(columns=rename_dict)
    new_df.index = df.index

    target_col_I =  list(df.columns).index(col)
    new_cols = list(df.columns)[:target_col_I] + list(new_df.columns) + list(df.columns)[target_col_I+drop_OG_col:]
    new_cols

    return pd.concat([df, new_df], axis=1)[new_cols]

In [11]:
root = "../../"
cifar_df = pd.concat( [dict_to_df(i) for i in read_folder(root+r"Results\HPO\TripletAgain\Cifar")] )
cifar_df = split_col_D(cifar_df, "task")
cifar_df = split_col_D(cifar_df, "result")

best = []

for bit in cifar_df["task_bits"].unique():
    bit_df = cifar_df[cifar_df["task_bits"] == bit]
    
    best.append( bit_df[bit_df["result_map"] == max(bit_df["result_map"])] )

pd.concat(best)
(cifar_df.apply(lambda x: x["time"] / len(x["result_loss"]) , axis=1)).mean()

9.256377903716343

In [12]:
cifar_df = pd.concat( [dict_to_df(i) for i in read_folder(root+r"Results\HPO\TripletAgain\ImgNet")] )
cifar_df = split_col_D(cifar_df, "task")
cifar_df = split_col_D(cifar_df, "result")

best = []

for bit in cifar_df["task_bits"].unique():
    bit_df = cifar_df[cifar_df["task_bits"] == bit]
    
    best.append( bit_df[bit_df["result_map"] == max(bit_df["result_map"])] )

pd.concat(best)
(cifar_df.apply(lambda x: x["time"] / len(x["result_loss"]) , axis=1)).mean()

47.74878791064333

In [13]:
cifar_df = pd.concat( [dict_to_df(i) for i in read_folder(root+r"Results\HPO\TripletAgain\Nus")] )
cifar_df = split_col_D(cifar_df, "task")
cifar_df = split_col_D(cifar_df, "result")

best = []

for bit in cifar_df["task_bits"].unique():
    bit_df = cifar_df[cifar_df["task_bits"] == bit]
    
    best.append( bit_df[bit_df["result_map"] == max(bit_df["result_map"])] )

pd.concat(best)
(cifar_df.apply(lambda x: x["time"] / len(x["result_loss"]) , axis=1)).mean()

20.253066688576634

In [14]:
cifar_df = pd.concat( [dict_to_df(i) for i in read_folder(root+r"Results\HPO\DTSH2\Nus")] )
cifar_df = split_col_D(cifar_df, "task")
cifar_df = split_col_D(cifar_df, "result")

best = []

for bit in cifar_df["task_bits"].unique():
    bit_df = cifar_df[cifar_df["task_bits"] == bit]
    
    best.append( bit_df[bit_df["result_map"] == max(bit_df["result_map"])] )

pd.concat(best)
(cifar_df.apply(lambda x: x["time"] / len(x["result_loss"]) , axis=1)).mean()

2.115133437626592

In [15]:
cifar_df = pd.concat( [dict_to_df(i) for i in read_folder(root+r"Results\HPO\DTSH2\ImgNet")] )
cifar_df = split_col_D(cifar_df, "task")
cifar_df = split_col_D(cifar_df, "result")

best = []

for bit in cifar_df["task_bits"].unique():
    bit_df = cifar_df[cifar_df["task_bits"] == bit]
    
    best.append( bit_df[bit_df["result_map"] == max(bit_df["result_map"])] )

pd.concat(best)
(cifar_df.apply(lambda x: x["time"] / len(x["result_loss"]) , axis=1)).mean()

40.963386398632835

In [10]:
cifar_df = pd.concat( [dict_to_df(i) for i in read_folder(root+r"Results\HPO\DTSH2\Cifar")] )
cifar_df = split_col_D(cifar_df, "task")
cifar_df = split_col_D(cifar_df, "result")

best = []

for bit in cifar_df["task_bits"].unique():
    bit_df = cifar_df[cifar_df["task_bits"] == bit]
    
    best.append( bit_df[bit_df["result_map"] == max(bit_df["result_map"])] )

pd.concat(best)

(cifar_df.apply(lambda x: x["time"] / len(x["result_loss"]) , axis=1)).mean()

2.0375278177536487