In [1]:
import pandas as pd
import json
import os
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
out_dir = "./data/out"
in_dir = "./data/in"
heuristic_name = "simulated annealing"
heuristic_dir = os.path.join(out_dir, heuristic_name)

box_name = "black-box-22"
box_dir = os.path.join(heuristic_dir, box_name)

brute_force_name = "brute force"
set_name = "set-1"
brute_force_dir = os.path.join(out_dir, brute_force_name)
set_dir = os.path.join(brute_force_dir, set_name)

In [3]:
def get_experimnet_dirs(set_dir):
    experiment_dirs = []
    
    for filename in os.listdir(set_dir):
        path = os.path.join(set_dir, filename)

        if os.path.isdir(path):
            experiment_dirs.append(path)
    
    return experiment_dirs    

In [4]:
def read_text(path):
    file = open(path, mode='r')
    content = file.read()
    file.close()
    return content

In [5]:
def read_json(path):
    file = open(path, "r")
    doc = json.loads(file.read())
    file.close()
    return doc

In [6]:
def parse_duration(stamp):
    parts = stamp.split(':')
    seconds = float(parts[-1])
    parts.pop()
    
    for (i, dur) in enumerate(reversed(parts)):
        seconds += float(dur)*(60**(i+1))
    
    return seconds

In [7]:
def ftos(num):
    out = "{:,.1f}".format(num).replace(',', ' ')
    return out.replace(".", ",")

In [8]:
asset_col_name = "Měna"
optim_value_col_name = "Optimalizační hodnota"
duration_col_name = "Doba trvání (s)"

def read_test_set(set_dir):
    duration_name = "duration: "
    data = []

    for experiment_dir in get_experimnet_dirs(set_dir):
        log = read_text(os.path.join(experiment_dir, "log.txt"))
        duration_idx = log.rfind(duration_name)
        duration = parse_duration(log[duration_idx:-1].replace(duration_name, ""))

        best_state_doc = read_json(os.path.join(experiment_dir, "best-states.json"))[0]
        optim_value = best_state_doc["optimization value"]

        settings_doc = read_json(os.path.join(experiment_dir, "settings.json"))
        asset_name = settings_doc["candles"]["currency pair"]["base"]
        
        data.append({
            asset_col_name : asset_name,
            optim_value_col_name : optim_value,
            duration_col_name : duration
        })
        
    overview = pd.DataFrame(data)
    return overview.sort_values(asset_col_name)

In [9]:
optim_value_diff_col_name = "Rozdíl hodnot optimalizačního kritéria (%)"
acceleration_col_name = "Zrychlení"

def update_overview(set_overview, box_overview):
    box_overview[optim_value_diff_col_name] = np.nan
    box_overview[acceleration_col_name] = np.nan

    for asset_name in box_overview[asset_col_name]:
        box_row = box_overview.loc[box_overview[asset_col_name] == asset_name]
        set_row = set_overview.loc[set_overview[asset_col_name] == asset_name]

        box_idx = box_row.index[0]
        box_row = box_row.to_dict('r')[0]
        set_row = set_row.to_dict('r')[0]

        box_overview.at[box_idx, acceleration_col_name] = set_row[duration_col_name]/box_row[duration_col_name]
        box_overview.at[box_idx, optim_value_diff_col_name] = 100*((box_row[optim_value_col_name]
                                                                          -set_row[optim_value_col_name])
                                                                         /set_row[optim_value_col_name])

In [10]:
def convert_overview(box_overview):
    box_overview[optim_value_col_name] = box_overview[optim_value_col_name].apply(lambda x: ftos(x))
    box_overview[duration_col_name] = box_overview[duration_col_name].apply(lambda x: ftos(x))
    box_overview[optim_value_diff_col_name] = box_overview[optim_value_diff_col_name].apply(lambda x: ftos(x))
    box_overview[acceleration_col_name] = box_overview[acceleration_col_name].apply(lambda x: ftos(x))

In [11]:
display(read_test_set(box_dir).describe())
stop

Unnamed: 0,Optimalizační hodnota,Doba trvání (s)
count,7.0,7.0
mean,570.136477,153.584788
std,393.57416,34.433531
min,202.487307,86.857604
25%,246.772899,146.779317
50%,581.324902,161.189218
75%,702.149056,170.490415
max,1309.299217,192.507231


NameError: name 'stop' is not defined

In [None]:
set_overview = read_test_set(set_dir)
box_overview = read_test_set(box_dir)
update_overview(set_overview, box_overview)
convert_overview(box_overview)

In [None]:
# reorder columns
cols = [asset_col_name, optim_value_diff_col_name, duration_col_name, acceleration_col_name]
box_overview = box_overview[cols]

In [None]:
box_overview

In [None]:
report_dir = "./data/report"
optimizer_dir = os.path.join(report_dir, heuristic_name)

if not os.path.exists(optimizer_dir):
    os.makedirs(optimizer_dir)

box_overview.to_csv(os.path.join(optimizer_dir, box_name + "-overview.csv"), index=False)