In [90]:
import itertools
import os
import pandas as pd
from enum import Enum
import numpy as np

In [37]:
class TableFields(Enum):
    Rule = "Rule"
    EntropyMove = "EntropyMove"
    CurrentEntropy = "CurrentEntropy"
    EntropySize = "EntropySize"
    DataType = "DataType"
    DescriptionData = "DescriptionData"
    MessageSize = "MessageSize"
    AlphabetSize = "AlphabetSize"
    GrammaticSize = "GrammaticSize"
    CalcTime = "CalcTime"


In [38]:
datasets = ['dna', 'english', 'proteins', 'sources']
pick_size = [10000]
limit_steps = [15]
is_largest = [True, False]
optimized = ["optimized", "bruto"]
filename = "steps.csv"

In [39]:
loads = list(itertools.product(datasets, pick_size, limit_steps, is_largest, optimized))

In [40]:
loads

[('dna', 10000, 15, True),
 ('dna', 10000, 15, False),
 ('english', 10000, 15, True),
 ('english', 10000, 15, False),
 ('proteins', 10000, 15, True),
 ('proteins', 10000, 15, False),
 ('sources', 10000, 15, True),
 ('sources', 10000, 15, False)]

In [56]:
def load_experiment_results(loads):
    res = {}
    counter = 0
    for directory, pick_size, limit_steps, is_largest, optimized in loads:
        arguments = [directory, pick_size, limit_steps, is_largest, optimized, filename]
        arguments = [str(arg) for arg in arguments]
        path = os.path.sep.join(arguments)
        data = pd.read_csv(path, sep=',')
        res[path] = data
        counter += 1
    return res

In [57]:
res = load_experiment_results(loads)

In [59]:
current_keys = list(res.keys())
current_keys

['dna\\10000\\15\\True\\steps.csv',
 'dna\\10000\\15\\False\\steps.csv',
 'english\\10000\\15\\True\\steps.csv',
 'english\\10000\\15\\False\\steps.csv',
 'proteins\\10000\\15\\True\\steps.csv',
 'proteins\\10000\\15\\False\\steps.csv',
 'sources\\10000\\15\\True\\steps.csv',
 'sources\\10000\\15\\False\\steps.csv']

In [117]:
class NewTable(Enum):
    StartedEntropy =  "StartedEntropy"
    EndedEntropy = "EndedEntropy"
    DiffEntropy = "DiffEntropy"
    
    StartedSizeEntropy =  "StartedSizeEntropy"
    EndedSizeEntropy = "EndedSizeEntropy"
    DiffSizeEntropy = "DiffSizeEntropy"
    
    CalculationTime = "CalculationTime"

    StartedMessageSize = "StartedMessageSize"
    EndedMessageSize = "EndedMessageSize"
    DiffMessageSize = "DiffMessageSize"

    LimitSteps = "LimitSteps"
    PickMethod = "PickMethod"
    NormalizationValue = "NormalizationValue"

    Optimized = "Optimized"

In [130]:
def process_table(table, arguments=None):
    res = {}

    if arguments is not None:
        directory, pick_size, limit_steps, is_largest, optimized, filename = arguments
        res[NewTable.LimitSteps.value] = limit_steps 
        res[NewTable.PickMethod.value] = "Largest" if is_largest else "Random"
        res[NewTable.NormalizationValue.value] = pick_size 
        res[NewTable.Optimized.value] = optimized

    res[NewTable.StartedEntropy.value] = table.iloc[0][TableFields.CurrentEntropy.value]
    res[NewTable.EndedEntropy.value] = table.iloc[len(table) - 1][TableFields.CurrentEntropy.value]
    res[NewTable.DiffEntropy.value] = res[NewTable.EndedEntropy.value] - res[NewTable.StartedEntropy.value]

    res[NewTable.StartedSizeEntropy.value] = table.iloc[0][TableFields.EntropySize.value]
    res[NewTable.EndedSizeEntropy.value] = table.iloc[len(table) - 1][TableFields.EntropySize.value]
    res[NewTable.DiffSizeEntropy.value] = res[NewTable.EndedSizeEntropy.value] - res[NewTable.StartedSizeEntropy.value]

    res[NewTable.StartedMessageSize.value] = table.iloc[0][TableFields.MessageSize.value]
    res[NewTable.EndedMessageSize.value] = table.iloc[len(table) - 1][TableFields.MessageSize.value]
    res[NewTable.DiffMessageSize.value] = res[NewTable.StartedMessageSize.value] - res[NewTable.EndedMessageSize.value]

    res[NewTable.CalculationTime.value] = np.sum(table[TableFields.CalcTime.value].values)

    return res

In [139]:
def process_loads(res):
    res = {}
    for directory, pick_size, limit_steps, is_largest in loads:
        arguments_raw = [directory, pick_size, limit_steps, is_largest, filename]
        arguments_str = [str(arg) for arg in arguments_raw]
        path = os.path.sep.join(arguments_str)
        data = pd.read_csv(path, sep=',')
        res[path] = process_table(data ,arguments_raw)
    return res

In [140]:
results = pd.DataFrame.from_dict(process_loads(loads), orient="index")

In [141]:
results

Unnamed: 0,LimitSteps,PickMethod,NormalizationValue,StartedEntropy,EndedEntropy,DiffEntropy,StartedSizeEntropy,EndedSizeEntropy,DiffSizeEntropy,StartedMessageSize,EndedMessageSize,DiffMessageSize,CalculationTime
dna\10000\15\True\steps.csv,15,Largest,10000,2.264636,3.832112,1.567476,20653.477357,23820.407625,3166.930269,9120,6216,2904,43.561212
dna\10000\15\False\steps.csv,15,Random,10000,2.102757,2.355298,0.252541,20270.577101,20931.534578,660.957477,9640,8887,753,23.586428
english\10000\15\True\steps.csv,15,Largest,10000,4.647096,5.199803,0.552706,45318.483246,46247.045912,928.562666,9752,8894,858,620.3637
english\10000\15\False\steps.csv,15,Random,10000,4.513233,4.554015,0.040781,45109.76879,45203.148606,93.379816,9995,9926,69,511.453867
proteins\10000\15\True\steps.csv,15,Largest,10000,4.255582,4.63572,0.380138,42172.813312,42977.76026,804.946948,9910,9271,639,278.587336
proteins\10000\15\False\steps.csv,15,Random,10000,4.21427,4.386234,0.171964,42083.698811,42476.290891,392.592079,9986,9684,302,229.946999
sources\10000\15\True\steps.csv,15,Largest,10000,5.607772,5.930715,0.322942,55438.434898,55944.430047,505.995148,9886,9433,453,1421.401215
sources\10000\15\False\steps.csv,15,Random,10000,5.507507,5.526773,0.019266,55058.548195,55096.404543,37.856347,9997,9969,28,1185.260313


- Vizualizace času
- Srovnání random/largest
- Pozorování pohybu po určitém počtu kroků (size, entropy)
- Finální tabulka výsledků