In [1]:
%matplotlib widget

In [2]:
import os
import pickle as pk
from itertools import tee
from math import ceil
import statistics

import matplotlib
import matplotlib.pyplot as plt
plt.ioff()
import yaml

In [3]:
ROOT_PATH = os.path.join(os.path.pardir, "logs", "GIN")
DEEP_CHEM = "DeepChem_20200707"
JAK123 = "JAK123_20200706"

## Rename results

In [4]:
class Renamer:
    def __init__(self):
        self.required_fields = ["data_ratio", "encoder_dataset", "classifier_dataset", "encoder_epochs"]

    def rename_result(self, path):
        conf_file = os.path.join(path, "configs.yml")
        try:
            configs = yaml.load(open(conf_file, "r"), Loader=yaml.FullLoader)
        except FileNotFoundError:
            return
        new_name = ""
        for key, value in configs.items():
            if not key in self.required_fields: continue
            if isinstance(value, list):
                value = "-".join(map(str,value))
            if new_name == "":
                new_name += str(value)
            else:
                new_name += "_" + str(value)
        counter = 1
        while 1:
            try:
                os.rename(path, os.path.join(os.path.dirname(path), new_name))
                break
            except FileExistsError:
                counter += 1
                new_name += "_" + str(value) + "_" + str(counter)
    
    def rename_results(self, path):
        results = os.scandir(path)
        for res in results:
            if not res.is_dir():
                continue
            self.rename_result(res.path)

In [5]:
rn = Renamer()
for folder in [DEEP_CHEM, JAK123]:
    for date in os.scandir(os.path.join(ROOT_PATH, folder)):
        for task in os.scandir(date.path):
            rn.rename_results(task.path)

NotADirectoryError: [WinError 267] The directory name is invalid: '..\\logs\\GIN\\DeepChem_20200707\\20200707_011659\\bace_roc.png'

## Dude vs ZINC 

In [37]:
class Comparer:
    
    def __init__(self,
                 path1=None,
                 path2=None,
                 metric="validating_AP_AUC",
                 metric_fname="classifier_metrics.pk",
                 config_fname="configs.yml"):
        self._path1 = path1
        self._path2 = path2
        self.metric = metric
        self.metric_fname = metric_fname
        self.config_fname = config_fname
    
    @property
    def path1(self):
        return self._path1
    
    @path1.setter
    def path1(self, value):
        assert os.path.exists(value)
        self._path1 = value
        
    @property
    def path2(self):
        return self._path2
    
    @path2.setter
    def path2(self, value):
        assert os.path.exists(value)
        self._path2 = value
    
    def _get_pairs(self):
        dirs1 = os.scandir(self.path1)
        dirs2 = os.scandir(self.path2)
        marks1 = dict()
        marks2 = dict()
        for d in dirs1:
            if d.is_dir():
                tokens = d.name.split("_")
                ratio = tokens[1]
                encoder_epochs = tokens[-1]
                marks1[f"{ratio}_{encoder_epochs}"] = d.path
        for d in dirs2:
            if d.is_dir():
                tokens = d.name.split("_")
                ratio = tokens[1]
                encoder_epochs = tokens[-1]
                marks2[f"{ratio}_{encoder_epochs}"] = d.path
        pairs = list()
        for m, v in marks1.items():
            pairs.append((v, marks2[m]))
        return pairs
    
    def _number_of_rows(self, nplots, ncols):
        return int(ceil(nplots / ncols))
        
    def compare(self, ncols=2, figsize=[16, 12], ylim=[0, 1]):
        pairs = self._get_pairs()
        nr = self._number_of_rows(len(pairs), ncols)
        fig, axes = plt.subplots(ncols=ncols, nrows=nr, figsize=figsize)
        fig.tight_layout(pad=3.0)
        for pair, axe in zip(pairs, axes.flatten()):
            met1 = pk.load(open(os.path.join(pair[0], self.metric_fname), "rb"))[self.metric]
            met2 = pk.load(open(os.path.join(pair[1], self.metric_fname), "rb"))[self.metric]
            conf1 = yaml.load(open(os.path.join(pair[0], self.config_fname), "r"), Loader=yaml.FullLoader)
            conf2 = yaml.load(open(os.path.join(pair[1], self.config_fname), "r"), Loader=yaml.FullLoader)
            axe.plot(list(range(len(met1))), met1, label=f"{'_'.join(conf1['encoder_dataset'])}_{self.metric}")
            axe.plot(list(range(len(met2))), met2, label=f"{'_'.join(conf2['encoder_dataset'])}_{self.metric}")
            axe.set_ylim(ylim)
            axe.legend()
            axe.set_title(f"Ratio: {conf1['data_ratio'][0]}, Encoder: {bool(conf1['encoder_epochs'])}, {'_'.join(conf1['encoder_dataset'])} vs {'_'.join(conf2['encoder_dataset'])}")
            axe.grid(axis="y")
        return fig, axes

In [83]:
c = Comparer(metric="validating_AP_AUC")
c.path1 = os.path.join(ROOT_PATH, JAK123, "20200706_181605", "JAK1Dude")
c.path2 = os.path.join(ROOT_PATH, JAK123, "20200706_181605", "ZINC1k")
fig, _ = c.compare(ylim=[0.8, 1])
fig.savefig(os.path.join(ROOT_PATH, JAK123, os.path.basename(c.path1)+"_"+os.path.basename(c.path2)+"_"+c.metric))
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [79]:
for met in ["validating_F1", "validating_AP_AUC"]:
    c = Comparer(metric=met)
    c.path1 = os.path.join(ROOT_PATH, JAK123, "20200706_154014", "JAK3Dude")
    c.path2 = os.path.join(ROOT_PATH, JAK123, "20200706_154014", "ZINC1k")
    fig, _ = c.compare(ylim=[0.4, 1])
    fig.savefig(os.path.join(ROOT_PATH, JAK123, os.path.basename(c.path1)+"_"+os.path.basename(c.path2)+"_"+c.metric))
    fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [86]:

for met in ["validating_F1", "validating_AP_AUC"]:
    c = Comparer(metric=met)
    c.path1 = os.path.join(ROOT_PATH, JAK123, "20200706_154006", "JAK2")
    c.path2 = os.path.join(ROOT_PATH, JAK123, "20200706_154006", "ZINC1k")
    fig, _ = c.compare(ylim=[0.4, 1])
    fig.savefig(os.path.join(ROOT_PATH, JAK123, os.path.basename(c.path1)+"_"+os.path.basename(c.path2)+"_"+c.metric))
    fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [76]:
plt.close("all")

In [26]:
class Analyzer:
    
    def __init__(self, root, metric="validating_AP_AUC", metric_fname="classifier_metrics.pk"):
        self.root = root
        self.metric = metric
        self.metric_fname = metric_fname
        self._header_written = False
        
    @property
    def groups(self):
        try:
            return self._groups
        except AttributeError:
            self._groups = self.find_groups()
            return self._groups
        
    def find_groups(self):
        dirs = os.scandir(self.root)
        groups = set()
        for dir_ in dirs:
            name = os.path.basename(dir_.path)
            base_name = "_".join(name.split("_")[:-1])
            groups.add(base_name)
        return sorted(list(groups))
    
    def _number_of_rows(self, nplots, ncols):
        return int(ceil(nplots / ncols))

    def plot_results(self, ncols=4, figsize=[16, 12], ylim=[0, 1]):
        groups = self.groups
        modes = ["origin", "additive", "scaled"]
        nr = self._number_of_rows(len(groups), ncols)
        fig, axes = plt.subplots(ncols=ncols, nrows=nr, figsize=figsize)
        fig.tight_layout(pad=3.0)
        for grp, axe in zip(groups, axes.flatten()[:len(groups)]):
            for mod in modes:
                with open(os.path.join(self.root, f"{grp}_{mod}", self.metric_fname), "rb") as f:
                    metric = pk.load(f)[self.metric]
                label_name = f"{self.metric}_{mod}"
                axe.plot(list(range(len(metric))), metric, label=label_name)
            axe.set_ylim(ylim)
            axe.legend()
            axe.set_title(grp)
            axe.grid(axis="y")
        return fig, axes
    
    def _write_header(self, outf, metrics):
        if self._header_written:
            return
        outf.write("group_mode,")
        outf.write(",".join([key for key in metrics.keys() if "loss" not in key]))
        outf.write("\n")
        self._header_written = True
        
    def _find_best(self, metric):
        return statistics.mean(sorted(metric)[-5:])
    
    def _analysis_metrics(self, metrics, group, mode):
        outf = open(os.path.join(self.root, "statistics.csv"), "a")
        self._write_header(outf, metrics)
        outf.write(f"{group}_{mode}")
        for key, metric in metrics.items():
            if "loss" in key:
                continue
            best_avg = self._find_best(metric)
            outf.write(f",{best_avg}")
        outf.write("\n")
        outf.close()
    
    def results_statistics(self, mode="scaled"):
        groups = self.groups
        for grp in groups:
            with open(os.path.join(self.root, f"{grp}_{mode}", self.metric_fname), "rb") as f:
                metrics = pk.load(f)
            self._analysis_metrics(metrics, grp, mode)