In [2]:
from abc import abstractmethod
from typing import Any

class Stat():
    def __init__(self, name: str):
        self._name = name
        self._value = None
    
    @abstractmethod
    def set_value_from_stat_file(self, stat_file):
        raise NotImplementedError

    def set_value(self, value: Any):
        if not self._value is None:
            raise ValueError("_value is already set.")
        self._value = value

    def get_value(self):
        if self._value == None:
            raise ValueError("_value has not been set yet.")
        return self._value
    
    def reset(self):
        if self._value is None:
            raise ValueError("_value has not been set yet.")
        self._value = None
    
    def __str__(self):
        return f"{self._name}={self._value}"

class RootStat(Stat):
    def __init__(self, name: str):
        super().__init__(name)
    
    def set_value_from_stat_file(self, stat_file):
        stat_file.seek(0)
        for line in stat_file.readlines()[2:10]:
            stat_in_line = line.split()[0]
            if self._name == stat_in_line:
                self._value = float(line.split()[1])
    
class AggregateStat(Stat):
    def __init__(self, name: str):
        super().__init__(name)
    
    def set_value_from_stat_file(self, stat_file):
        ret = 0
        instances = 0
        stat_file.seek(0)
        for line in stat_file.readlines()[10:-3]:
            stat_in_line = line.split()[0].split(".")[-1]
            if self._name == stat_in_line:
                ret += float(line.split()[1])
                instances += 1
        if instances == 0:
            raise ValueError(f"Could not find {self._name} in {stat_file.name}")
        self._value = ret
    
class DictStat(Stat):
    def __init__(self, name: str):
        super().__init__(name)
    
    def set_value_from_stat_file(self, stat_file):
        ret = {}
        instances = 0
        stat_file.seek(0)
        for line in stat_file.readlines()[10:-3]:
            stat_key = line.split()[0]
            stat_in_line = stat_key.split(".")[-1]
            if self._name == stat_in_line:
                ret[stat_key] = float(line.split()[1])
                instances += 1
        if instances == 0:
            raise ValueError(f"Could not find {self._name} in {stat_file.name}")
        self._value = ret

class HistStat(Stat):
    def __init__(self, name: str):
        super().__init__(name)
    
    def set_value_from_stat_file(self, stat_file):
        ret = []
        instances = 0
        stat_file.seek(0)
        for line in stat_file.readlines()[10:-3]:
            stat_in_line = line.split("::")[0].split(".")[-1]
            if self._name == stat_in_line:
                bucket = line.split("::")[1].split()[0]
                if bucket in ["total", "samples", "mean", "gmean", "stdev"]:
                    continue
                bucket_start = float(bucket.split("-")[0])
                bucket_end = float(bucket.split("-")[-1])
                bucket_freq = float(line.split()[1])

                merged = False
                for row in ret:
                    start = row[0]
                    end = row[1]
                    freq = row[2]
                    if ((start >= bucket_start) and (end < bucket_end)) or \
                        ((start > bucket_start) and (end <= bucket_end)):
                        bucket_freq += freq
                        ret.remove(row)
                    elif ((bucket_start >= start) and (bucket_end < end)) or \
                        ((bucket_start > start) and (bucket_end <= end)):
                        row[2] += bucket_freq
                        merged = True
                    elif ((bucket_start == start) and (bucket_end == end)):
                        row[2] += bucket_freq
                        merged = True
                    else:
                        continue
                if not merged:
                    ret.append((bucket_start, bucket_end, bucket_freq))
                instances += 1
        if instances == 0:
            raise ValueError(f"Could not find {self._name} in {stat_file.name}")
        self._value = self._get_list_format(ret)
    
    def _get_list_format(self, value):
        list_form = []
        for bucket in value:
            start, end, freq = bucket
            list_form.append((start, freq))
            if start != end:
                list_form.append((end, freq))
        return list_form


In [15]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def plot_hist_stat(column_name, hue_name, data_frame, pdf=False):
    columns = []
    for col in data_frame.columns:
        if col != column_name:
            columns.append(col)
    
    columns.append(f"{column_name}.x")
    columns.append(f"{column_name}.y")
    
    rows = []
    for index, row in data_frame.iterrows():
        base_row = []
        for item in row.items():
            if item[0] != column_name:
                base_row.append(item[1])
        normalizer = 1
        if pdf:
            normalizer = sum([x[1] for x in row[column_name]])
        for x, y in row[column_name]:
            new_row = base_row + [x, (y / normalizer)]
            rows.append(new_row)
    
    new_df = pd.DataFrame(rows, columns=columns)
    
    ax = sns.lineplot(x=f"{column_name}.x", \
            y=f"{column_name}.y", hue=hue_name, data=new_df)
    ylabel = "Frequency"
    if pdf:
        ylabel = "Relative Frequency"
    ax.set(xlabel=f"{column_name}", ylabel=ylabel)
    return new_df
    