In [1]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pathlib

In [2]:
def list_exp_csv(csv_dir, exp_name, pre_gan, metrics):
    if isinstance(csv_dir, str):
        csv_dir = pathlib.Path(csv_dir)
    csv_list = []
    for metric in metrics:
        csv_list.append(list(csv_dir.glob('**/' + exp_name + '/*' + pre_gan + '*' + metric + '*.csv')))
    csv_list = [item for sublist in csv_list for item in sublist]
    return csv_list

def csv_list_to_dfs(csv_list):
    dfs = {}
    for csv_file in csv_list:
        dfs[csv_file.stem] = pd.read_csv(csv_file)
    return dfs

def merge_dfs_by_epoch(dfs, metric, metric_rename, wide_long, smooth=True, smooth_par=0.9):
    new_df = pd.DataFrame()
    smooth_df = pd.DataFrame()
    exp_variations = []
    for name, df in dfs.items():
        if metric not in name:
            continue
        try:
            new_df['epoch']
        except KeyError:
            new_df['epoch'] = df.index + 1
            smooth_df['epoch'] = df.index + 1
        idx1 = name.find('_e') + 1
        idx2 = name.find('_2')
        exp_variation = name[idx1:idx2]
        exp_variations.append(exp_variation)
        col_name = name
        new_df[col_name] = df['Value']
        if smooth:
            smooth_df[col_name] = df['Value'].ewm(alpha=(1 - smooth_par)).mean()            
    if wide_long == 'long':
        #return new_df
        new_df = pd.melt(new_df, id_vars=['epoch'], value_name=metric_rename, var_name='exp_variation_id')
        if smooth:
            smooth_df = pd.melt(smooth_df, id_vars='epoch', value_name=metric, var_name='exp_variation_id')
            new_df[metric_rename + '-smooth'] = smooth_df[metric]
            
        new_df.loc[new_df['exp_variation_id'].str.contains('train'), 'train_val'] = 'train'
        new_df.loc[new_df['exp_variation_id'].str.contains('val'), 'train_val'] = 'val'

        new_df.loc[new_df['exp_variation_id'].str.contains('WV02'), 'sensor'] = 'WV02'
        new_df.loc[new_df['exp_variation_id'].str.contains('GE01'), 'sensor'] = 'GE01'
        new_df['sensor'].fillna('WV02', inplace=True)

        for exp_variation in exp_variations:
            label = exp_variation[exp_variation.find('-') + 1] + ' bands'
            new_df.loc[new_df['exp_variation_id'].str.contains(exp_variation), 'exp_variation'] = exp_variation
            new_df.loc[new_df['exp_variation_id'].str.contains(exp_variation), 'label'] = label
        # new_df['legend'] = new_df['exp_variation'] + '-' + new_df['sensor']
        #new_df.set_index(['exp_variation', 'train_val', 'sensor'], inplace=True)
        new_df['metric'] = metric_rename
    return new_df

## e01 Pretrain metrics plot

In [3]:
CSV_DIR = 'logs/csv'
METRICS_NAME = ['PSNR', 'SSIM', 'loss'] # The term used in tensorboard logs
METRICS = ['PSNR', 'SSIM', 'MAE'] # Change to this term instead
SMOOTH = True
SMOOTH_PAR = 0.9

In [4]:
csv_list = list_exp_csv(CSV_DIR, 'e01', 
                        pre_gan='pre',
                        metrics=METRICS_NAME)
dfs = []
for i in range(len(METRICS)):
    dfs.append(merge_dfs_by_epoch(csv_list_to_dfs(csv_list), 
                                   metric=METRICS_NAME[i], 
                                   metric_rename=METRICS[i],
                                   wide_long='long', 
                                   smooth=SMOOTH, 
                                   smooth_par=SMOOTH_PAR))
metric_df = dfs[0]
for i in range(len(METRICS) - 1):
    metric_df = metric_df.merge(dfs[i+1], how='outer')
metric_df

KeyError: "The following 'id_vars' are not present in the DataFrame: ['epoch']"

In [5]:
if SMOOTH:
    y = [metric + '-smooth' for metric in METRICS]
else:
    y = METRICS
fig = px.line(metric_df, 
              x='epoch', 
              y=y, 
              color='label', 
              #range_y=(29,44), 
              title='Experiment 01 Pretraining - Metrics plots', 
              facet_col='train_val',
              facet_row='metric',
              facet_col_spacing=0.01,
              line_dash='sensor', 
              line_dash_sequence=['solid', 'dot'], 
              render_mode='svg',
              width=1000,
              height=1000
             )
fig.update_layout(legend_title_text='')
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_yaxes(matches=None)
fig.layout.yaxis1.range = [0.005, 0.035]
fig.layout.yaxis2.range = [0.005, 0.035]
fig.layout.yaxis3.range = [0.7, 1]
fig.layout.yaxis4.range = [0.7, 1]
fig.layout.yaxis5.range = [30, 44]
fig.layout.yaxis6.range = [30, 44]
fig.layout.yaxis1.title=''
fig.layout.yaxis3.title=''
fig.layout.yaxis5.title=''
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="top",
    y=-0.05,
    xanchor="left",
    x=0
))

fig.show()

NameError: name 'metric_df' is not defined

In [6]:
fig.write_image("results-public/loss-plots/loss-e01-pre.pdf")

NameError: name 'fig' is not defined

## e01 GAN training metrics plot

In [7]:
CSV_DIR = 'logs/csv'
METRICS_NAME = ['PSNR', 'SSIM', 'D_loss', 'G_generator', 'G_perceptual', 'G_pixel', 'G_loss_total', 
                'Ma', 'NIQE', 'PI'] # The term used in tensorboard logs
METRICS = ['PSNR', 'SSIM', 'D_loss', 'G_generator', 'G_perceptual', 'G_pixel', 'G_loss_total', 
           'Ma', 'NIQE', 'PI'] # Change to this term instead
SMOOTH = True
SMOOTH_PAR = 0.9

In [8]:
csv_list = list_exp_csv(CSV_DIR, 'e01', 
                        pre_gan='gan',
                        metrics=METRICS_NAME)
dfs = []
for i in range(len(METRICS)):
    dfs.append(merge_dfs_by_epoch(csv_list_to_dfs(csv_list), 
                                   metric=METRICS_NAME[i], 
                                   metric_rename=METRICS[i],
                                   wide_long='long', 
                                   smooth=SMOOTH, 
                                   smooth_par=SMOOTH_PAR))
metric_df = dfs[0]
for i in range(len(METRICS) - 1):
    metric_df = metric_df.merge(dfs[i+1], how='outer')
metric_df

KeyError: "The following 'id_vars' are not present in the DataFrame: ['epoch']"

In [9]:
if SMOOTH:
    y = [metric + '-smooth' for metric in METRICS]
else:
    y = METRICS

fig = px.line(metric_df, 
              x='epoch', 
              y=y, 
              color='label', 
              #range_y=(29,44), 
              title='Experiment 01 GAN training - Metrics plots', 
              facet_col='train_val',
              facet_row='metric',
              facet_col_spacing=0.01,
              line_dash='sensor', 
              line_dash_sequence=['solid', 'dot'], 
              render_mode='svg',
              width=1000,
              height=1000
             )
fig.update_layout(legend_title_text='')
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_yaxes(matches=None)
#fig.layout.yaxis1.range = [0.005, 0.035]
#fig.layout.yaxis2.range = [0.005, 0.035]
#fig.layout.yaxis3.range = [0.7, 1]
#fig.layout.yaxis4.range = [0.7, 1]
#fig.layout.yaxis5.range = [30, 44]
#fig.layout.yaxis6.range = [30, 44]
fig.layout.yaxis1.title=''
fig.layout.yaxis3.title=''
fig.layout.yaxis5.title=''
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="top",
    y=-0.05,
    xanchor="left",
    x=0
))

fig.show()

NameError: name 'metric_df' is not defined

## New form of logging

In [5]:
def hex_to_rgb(hex_color):
    h = hex_color.lstrip('#')
    return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))


def get_plotly_standard_colors(hex_rgb_rgba, alpha=0.5):
    hex_colors = px.colors.qualitative.Plotly
    if hex_rgb_rgba == 'hex':
        return hex_colors
    colors = []
    for h in hex_colors:
        if hex_rgb_rgba in ('rgb', 'rgba'):
            rgb = hex_to_rgb(h)
            if hex_rgb_rgba == 'rgb':
                colors.append('rgb' + str(rgb))
            elif hex_rgb_rgba == 'rgba':
                colors.append('rgba' + str((rgb[0], rgb[1], rgb[2], alpha)))
    return colors


def rgb_to_rgba(rgb_string, alpha):
    rgba = rgb_string.replace('rgb', 'rgba')
    rgba = rgba.replace(')', ', ' + str(alpha) + ')')
    return rgba

In [25]:
CSV_DIRS = {'e01-8-WV02': 'logs/e01-8/csv/WV02', 
            'e01-6-WV02': 'logs/e01-6/csv/WV02', 
            'e01-4-WV02': 'logs/e01-4/csv/WV02', 
            'e01-4-GE01': 'logs/e01-4/csv/GE01', 
            'e01-3-WV02': 'logs/e01-3/csv/WV02', 
            'e01-3-GE01': 'logs/e01-3/csv/GE01', 
           }
CSV_DIRS = {'e01-8-WV02': 'logs/e01-8/csv/val-WV02', 
            'e01-6-WV02': 'logs/e01-6/csv/val-WV02'
           }
COLORS = get_plotly_standard_colors('rgb')
STATISTICS = ['mean', 'median', 'std', 'quantile-0.40', 'quantile-0.60']
QUANTILES = ['quantile-0.40', 'quantile-0.60']
METRICS = ['PSNR', 'SSIM']

exp_stat_dict = {}

def extract_epoch_number(pathlib_path, pre_or_gan):
    #print(pathlib_path)
    if pre_or_gan == 'pre':
        start_epoch = 1
    elif pre_or_gan == 'gan':
        start_epoch = 0
    else:
        raise ValueError
    epoch = int(pathlib_path.stem[-3:].replace('-', '').replace('G', ''))
    epoch += 1 - start_epoch
    #print(epoch)
    return epoch

def list_csv(csv_dir, epoch_start=1):
    if isinstance(csv_dir, str):
        csv_dir = pathlib.Path(csv_dir)
    csv_list = list(csv_dir.glob('**/*.csv'))
    csv_dict = {}
    for csv in csv_list:
        if 'pre' in csv.as_posix():
            pre_or_gan = 'pre'
        elif 'gan' in csv.as_posix():
            pre_or_gan = 'gan'
        else:
            raise ValueError
            
        epoch = extract_epoch_number(csv, pre_or_gan=pre_or_gan)
        
        if epoch_start != 1:
            epoch = epoch - epoch_start + 1
        csv_dict[pre_or_gan + str(epoch)] = csv
    return csv_dict

def csvs_to_dfs(csv_dict):
    n = len(csv_dict)
    print(n)
    df_dict = {}
    for i in range(1, n + 1):
        df_dict[i] = pd.read_csv(csv_dict[i], index_col=0)
    return df_dict

def epoch_df(dfs_dict, metrics_filter, statistic='mean'):
    n = len(dfs_dict)
    epoch_dict = {}
    for i in range(1, n + 1):
        if statistic == 'mean':
            epoch_dict[i] = dfs_dict[i].loc[:,metrics_filter].mean()
        elif statistic == 'median':
            epoch_dict[i] = dfs_dict[i].loc[:,metrics_filter].median()
        elif statistic == 'std':
            epoch_dict[i] = dfs_dict[i].loc[:,metrics_filter].std()
        elif 'quantile' in statistic:
            level = float(statistic[-4:])
            epoch_dict[i] = dfs_dict[i].loc[:,metrics_filter].quantile(level)
        else:
            raise ValueError()
    epoch_df = pd.DataFrame.from_dict(epoch_dict).transpose()
    return epoch_df

def collect_all_exp_dfs(csv_dirs, statistics, metrics, epoch_start=1):
    df_dict_exp = {}
    for exp_variation in csv_dirs.keys():
        csv_dir = csv_dirs[exp_variation]
        df_dict_stat = {}
        for statistic in STATISTICS:
            df_dict_stat[statistic] = epoch_df(csvs_to_dfs(list_csv(csv_dir, epoch_start=epoch_start)), 
                                               metrics_filter=metrics, 
                                               statistic=statistic)
        df_dict_exp[exp_variation] = df_dict_stat
    return df_dict_exp

def merge_all_exp_dfs(dfs, metrics):
    #new_df = pd.DataFrame()
    for epoch in dfs.keys():
        n = len(dfs[epoch])
        #dfs[epoch]['epoch'] = epoch
    exp_df = pd.concat(dfs)
    exp_df = exp_df.loc[:,metrics]
    return exp_df
        
def compute_statistics(exp_df, statistics):
    n_epochs = int(len(exp_df) / len(exp_df.loc[1]))
    dfs = []
    for statistic in statistics:
        exp_df_stat = {}
        for i in range(1, n_epochs + 1):
            if statistic == 'mean':
                exp_df_stat[i] = exp_df.loc[i,:].mean()
            elif statistic == 'median':
                exp_df_stat[i] = exp_df.loc[i,:].median()
            elif statistic == 'std':
                exp_df_stat[i] = exp_df.loc[i,:].std()
            elif 'quantile' in statistic:
                level = float(statistic[-4:])
                exp_df_stat[i] = exp_df.loc[i,:].quantile(level)
            else:
                raise ValueError()
        exp_df_stat = pd.DataFrame.from_dict(exp_df_stat).transpose()
        new_colnames = []
        for colname in exp_df_stat.columns:
            new_colnames.append(colname + '_' + statistic)
        exp_df_stat.columns = new_colnames
        dfs.append(exp_df_stat)
    exp_df_stat = pd.concat(dfs, axis=1)
    return exp_df_stat


for exp in CSV_DIRS:
    print(exp)
    dfs = csvs_to_dfs(list_csv(csv_dir=CSV_DIRS[exp], epoch_start=1))
    exp_df = merge_all_exp_dfs(dfs, metrics=METRICS)
    exp_stat_dict[exp] = compute_statistics(exp_df, statistics=STATISTICS)

e01-8-WV02
34


KeyError: 1

In [8]:
def plot_trace(fig, df, metric, statistic, quantile, row, col, name, showlegend, color, alpha):
    if quantile:
        fig.add_trace(go.Scatter(
            x=df.index,
            y=df[metric + '_' + QUANTILES[1]],
            showlegend=False, 
            legendgroup=name, 
            mode='lines',
            line=dict(width=0.0),
            fillcolor=rgb_to_rgba(color, alpha=alpha)
        ), row=row, col=col)

        fig.add_trace(go.Scatter(
            x=df.index,
            y=df[metric + '_' + QUANTILES[0]],
            fill='tonexty',
            showlegend=False,
            legendgroup=name, 
            mode='lines',
            line=dict(width=0.0),
            fillcolor=rgb_to_rgba(color, alpha=alpha)
        ), row=row, col=col)

    fig.add_trace(go.Scatter(
        x=df.index,
        y=df[metric + '_' + statistic],
        mode='lines',
        line=dict(color=color),
        name=name, 
        legendgroup=name, 
        showlegend=showlegend
    ), row=row, col=col)

    
def plot_pretrain(exp_stat_dict, colors, x_type='linear', quantile=True, alpha=0.5):
    fig = make_subplots(rows=2, cols=2, 
                        subplot_titles=("Training set","Validation set"), 
                        horizontal_spacing=0.05, vertical_spacing=0.05)
    for i, exp_variation in enumerate(exp_stat_dict.keys()):
        df_dict_stat = exp_stat_dict[exp_variation]
        plot_trace(fig, df_dict_stat, metric='PSNR', statistic='median', 
                   quantile=quantile, row=1, col=2, name=exp_variation, showlegend=True, 
                   color=colors[i], alpha=alpha)
        plot_trace(fig, df_dict_stat, metric='SSIM', statistic='median', 
                   quantile=quantile, row=2, col=2, name=exp_variation, showlegend=False, 
                   color=colors[i], alpha=alpha)
    
    if x_type == 'log':
        fig.update_xaxes(type=x_type, row=1, col=2)
        fig.update_xaxes(type=x_type, row=2, col=2)
        fig.update_yaxes(range=[30, 40], row=1, col=2)
        fig.update_yaxes(range=[0.7, 1.0], row=2, col=2)
    elif x_type == 'linear':
        fig.update_yaxes(range=[30, 40], row=1, col=2)
        fig.update_yaxes(range=[0.7, 1.0], row=2, col=2)
    else:
        raise ValueError


    fig.update_layout(title='Pretrain', height=800, width=900)
    fig.update_layout(legend=dict(orientation="h",yanchor="top",y=-0.05,xanchor="left",x=0))
    fig.show()
    
plot_pretrain(exp_stat_dict, colors=COLORS, x_type='linear', quantile=False, alpha=0.3)
plot_pretrain(exp_stat_dict, colors=COLORS, x_type='log', quantile=False, alpha=0.3)