# Loading experiments from folder

In [173]:
import os
import sys

def adding_module_path():
    module_path = os.path.abspath(os.path.sep.join([".."]*2))

    if module_path not in sys.path:
        sys.path.append(module_path)

adding_module_path()

In [220]:
from src.config.config import EXPERIMENT_RESULTS_DIRECTORY, FILENAME_CONFUSION_MATRIX, FILENAME_METRICS, FILENAME_DESCRIPTION, FILENAME_SUMMARIZATION
import pandas as pd
import os
from os.path import exists as file_exists
from src.config.config import BLANK_DESCRIPTION
from src.types.experiment_summarization_fields import ExperimentSummarizationFields
from src.types.experiment_description import ExperimentDescriptionType
from src.types.results import ResultType


In [221]:
start_directory = EXPERIMENT_RESULTS_DIRECTORY

In [222]:
filenames = [FILENAME_CONFUSION_MATRIX, FILENAME_METRICS, FILENAME_DESCRIPTION, FILENAME_SUMMARIZATION]

In [223]:
start_directory

'C:\\Users\\Vojta\\Desktop\\diploma\\experiment_results'

In [224]:
test_start = os.path.sep.join([start_directory, "ClassicAndVectorizer", "stamp%3A1646081182.0033693"])

In [225]:
test_start

'C:\\Users\\Vojta\\Desktop\\diploma\\experiment_results\\ClassicAndVectorizer\\stamp%3A1646081182.0033693'

In [226]:
def exists(directory, filename):
    current_path = os.path.sep.join([directory, filename])
    if file_exists(current_path):
        return current_path
    return None

In [227]:
def parse_confusion_matrix(directory):
    path = exists(directory, FILENAME_CONFUSION_MATRIX)

    if path is None:
        return None
    
    content = pd.read_csv(path, sep=';')
    return content

In [228]:
def prase_metrics(directory):
    path = exists(directory, FILENAME_METRICS)
    
    if path is None:
        return None
        
    content = pd.read_csv(path, sep=';')
    return content

In [229]:
def parse_description(directory):
    path = exists(directory, FILENAME_DESCRIPTION)
    
    if path is None:
        return None
        
    content = pd.read_csv(path, sep=';')
    return content

In [230]:
def parse_summarization(directory):
    path = exists(directory, FILENAME_SUMMARIZATION)
    
    if path is None:
        return None
        
    content = pd.read_csv(path,  sep=';')
    return content

In [241]:
def merge_content(confusion_matrix, metrics, description, summarization, directory):
    concat_df = pd.DataFrame()

    for df in [metrics, description, summarization]:
        if df is not None:
            concat_df = pd.concat([concat_df, df])

    keys = concat_df.iloc[:, 0].values
    values = concat_df.iloc[:, 1].values


    if metrics is None:
        print(f"No metrics in {directory}")
        append_keys = [x.value for x in list(ResultType) if x != ResultType.ConsfusionMatrix] 
        keys = keys + append_keys
        values = values + [BLANK_DESCRIPTION] * len(append_keys)

    if description is None:
        print(f"No description in {directory}")
        append_keys = [x.value for x in list(ExperimentDescriptionType)] 
        keys = keys + append_keys
        values = values + [BLANK_DESCRIPTION] * len(append_keys)

    if summarization is None:
        print(f"No summarization in {directory}")
        append_keys = [x.value for x in list(ExperimentSummarizationFields)] 
        keys = keys + append_keys
        values = values + [BLANK_DESCRIPTION] * len(append_keys)

    dic = {k:v for k, v in zip(keys, values)}

    dic['ConfusionMatrix'] = confusion_matrix.values if confusion_matrix is not None else BLANK_DESCRIPTION
    dic['Directory'] = directory
    

    record = pd.DataFrame.from_dict(dic, orient='index').T

    return record

In [242]:
def create_record(directory):
    try:
        confusion_matrix = parse_confusion_matrix(directory)
        metrics = prase_metrics(directory)
        description = parse_description(directory)
        summarization = parse_summarization(directory)
        record = merge_content(confusion_matrix, metrics, description, summarization, directory)
        return record
    except Exception as e:
        print(f"Exception in {directory}")
        print(f"Exception {e}")
        return None

In [243]:
def is_correct_file(path):
    for filename in filenames:
        current_path = os.path.sep.join([path, filename])
        if os.path.exists(current_path):
            return True
    return False

In [244]:
def process_directory(directory, storage=None):
    is_correct = is_correct_file(directory)
    record = None
    
    if is_correct:
        if storage is not None:
            record = create_record(directory)
            if record is not None:
                storage.append(record)

    for current_directory in os.listdir(directory):
        deeper_level = os.path.sep.join([directory, current_directory])
        if os.path.isdir(deeper_level):
            process_directory(deeper_level, storage)

In [245]:
def create_dataframe(start_directory, storage=None):
    return process_directory(start_directory, storage)

In [246]:
class Storage:
    def __init__(self):
        self.records = []

    def reset(self):
        self.records = []

    def run(self, directory=None):
        self.directory = directory

        if self.directory is None:
            return

        create_dataframe(self.directory, self.records)

    def get_dataframe(self):
        return pd.concat(self.records)

In [247]:
storage = Storage()

In [248]:
storage.run(start_directory)

No summarization in C:\Users\Vojta\Desktop\diploma\experiment_results\FirstExperiment
Exception in C:\Users\Vojta\Desktop\diploma\experiment_results\FirstExperiment
Exception operands could not be broadcast together with shapes (19,) (12,) 
No metrics in C:\Users\Vojta\Desktop\diploma\experiment_results\Trainable\Train_False
Exception in C:\Users\Vojta\Desktop\diploma\experiment_results\Trainable\Train_False
Exception operands could not be broadcast together with shapes (16,) (4,) 
No summarization in C:\Users\Vojta\Desktop\diploma\experiment_results\Trainable\Train_True
Exception in C:\Users\Vojta\Desktop\diploma\experiment_results\Trainable\Train_True
Exception operands could not be broadcast together with shapes (20,) (12,) 
No metrics in C:\Users\Vojta\Desktop\diploma\experiment_results\Trainable\Train_True_stamp%3A1644923067.0391238
Exception in C:\Users\Vojta\Desktop\diploma\experiment_results\Trainable\Train_True_stamp%3A1644923067.0391238
Exception operands could not be broadca

In [251]:
pd.set_option('display.max_columns', None)
df = storage.get_dataframe()
df.index = list(range(len(df)))

In [252]:
df

Unnamed: 0,Accuracy,F1,Precision,Recall,ExperimentType,ExperimentId,BatchSize,Epochs,LearningRate,TransformerName,TransformerPooling,PredictionModelType,NetType,EmbeddingType,IsTrainable,PreprocessingType,NumberOfAuthors,NumberOfSentences,LoadPath,SeqLen,IsTest,ClassicModelName,ExtraField,TransformerStartIndex,TransformerEndIndex,TransformerPoolingStrategy,NormalizationSize,VectorizationTime,LearningTime,PredictionTime,EvaluateTime,TrainRecords,TestRecords,ValidRecords,MissingRatioTrain,MissingRatioTest,EmbeddingSize,ConfusionMatrix,Directory
0,0.758356,0.758356,0.758356,0.758356,ClassicAndVectorizer/stamp:1646081182.0033693,,,,,Nada,Nada,Classic,Nada,BoW,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,RandomForestClassifier,"{""bootstrap"": true, ""ccp_alpha"": 0.0, ""class_w...",Nada,Nada,Nada,30000,37.92502689361572,4115.668527364731,12.029074430465698,0.0357723236083984,127500,22500,0,0,0,59957,"[[0, 3277, 259, 513, 132, 331], [1, 351, 3119,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
1,0.747289,0.747289,0.747289,0.747289,ClassicAndVectorizer/stamp:1646085348.3980286,,,,,Nada,Nada,Classic,Nada,BoW,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,NaiveBayesClassifier,"{""priors"": null, ""var_smoothing"": 1e-09}",Nada,Nada,Nada,30000,37.679288148880005,127.1037585735321,51.518155336380005,0.0349454879760742,127500,22500,0,0,0,59978,"[[0, 2294, 178, 998, 523, 379], [1, 54, 2969, ...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
2,0.882089,0.882089,0.882089,0.882089,ClassicAndVectorizer/stamp:1646085565.5369852,,,,,Nada,Nada,Classic,Nada,BoW,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,LinearClassifier,"{""alpha"": 0.0001, ""average"": false, ""class_wei...",Nada,Nada,Nada,30000,37.89919805526733,765.9207320213318,5.490895509719849,0.080115795135498,127500,22500,0,0,0,59944,"[[0, 3851, 192, 286, 103, 179], [1, 137, 3876,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
3,0.756933,0.756933,0.756933,0.756933,ClassicAndVectorizer/stamp:1646086375.7190003,,,,,Nada,Nada,Classic,Nada,TfIdf,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,RandomForestClassifier,"{""bootstrap"": true, ""ccp_alpha"": 0.0, ""class_w...",Nada,Nada,Nada,30000,44.53505182266235,4179.814553260803,11.3750319480896,0.0356054306030273,127500,22500,0,0,0,59716,"[[0, 3389, 276, 431, 151, 317], [1, 369, 3211,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
4,0.743289,0.743289,0.743289,0.743289,ClassicAndVectorizer/stamp:1646090612.272644,,,,,Nada,Nada,Classic,Nada,TfIdf,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,NaiveBayesClassifier,"{""priors"": null, ""var_smoothing"": 1e-09}",Nada,Nada,Nada,30000,44.85550928115845,117.47401261329652,48.42798638343811,0.0355470180511474,127500,22500,0,0,0,60223,"[[0, 2440, 180, 713, 666, 428], [1, 102, 3013,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
5,0.844933,0.844933,0.844933,0.844933,ClassicAndVectorizer/stamp:1646090823.8785756,,,,,Nada,Nada,Classic,Nada,TfIdf,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,LinearClassifier,"{""alpha"": 0.0001, ""average"": false, ""class_wei...",Nada,Nada,Nada,30000,45.47531342506409,536.2487215995789,2.885272979736328,0.0786454677581787,127500,22500,0,0,0,60111,"[[0, 3784, 280, 218, 170, 200], [1, 210, 3753,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
6,0.570489,0.570489,0.570489,0.570489,ClassicAndVectorizer/stamp:1646091458.448648,,,,,Nada,Nada,Classic,Nada,Glove,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,RandomForestClassifier,"{""bootstrap"": true, ""ccp_alpha"": 0.0, ""class_w...",Nada,Nada,Nada,30000,37.91206431388855,243.8152496814728,1.1184513568878174,0.0359320640563964,127500,22500,0,"(158286, 3267451, 4.844326663200152)","(27706, 572997, 4.83527836969478)",300,"[[0, 2592, 586, 557, 331, 468], [1, 518, 2414,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
7,0.418489,0.418489,0.418489,0.418489,ClassicAndVectorizer/stamp:1646091791.6211834,,,,,Nada,Nada,Classic,Nada,Glove,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,NaiveBayesClassifier,"{""priors"": null, ""var_smoothing"": 1e-09}",Nada,Nada,Nada,30000,37.497331619262695,0.7588906288146973,0.2956633567810058,0.0350520610809326,127500,22500,0,"(158146, 3263406, 4.846041221962576)","(27626, 573926, 4.813512543428944)",300,"[[0, 1467, 476, 481, 1188, 970], [1, 543, 994,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
8,0.582889,0.582889,0.582889,0.582889,ClassicAndVectorizer/stamp:1646091881.3473852,,,,,Nada,Nada,Classic,Nada,Glove,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,LinearClassifier,"{""alpha"": 0.0001, ""average"": false, ""class_wei...",Nada,Nada,Nada,30000,36.923553705215454,4.407043933868408,0.0431835651397705,0.0967781543731689,127500,22500,0,"(158372, 3261928, 4.85516541137634)","(27994, 581904, 4.810759163023454)",300,"[[0, 2285, 451, 656, 344, 747], [1, 391, 2288,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
9,0.592578,0.592578,0.592578,0.592578,ClassicAndVectorizer/stamp:1646091990.6369662,,,,,Nada,Nada,Classic,Nada,Word2Vec,True,Default,5,3,/home/usp/pro0255/diploma/data/gutenberg/5Auth...,Nada,False,RandomForestClassifier,"{""bootstrap"": true, ""ccp_alpha"": 0.0, ""class_w...",Nada,Nada,Nada,30000,36.17229771614075,252.58549737930295,1.1195757389068604,0.0360500812530517,127500,22500,0,"(64846, 3264908, 1.9861509114498785)","(11386, 575426, 1.9787079485459467)",300,"[[0, 2562, 532, 577, 342, 447], [1, 566, 2640,...",C:\Users\Vojta\Desktop\diploma\experiment_resu...
