In [None]:
import os

In [None]:
import tensorflow as tf

In [None]:
import sys

In [None]:
import numpy as np

In [None]:
import matplotlib.pyplot as plt

In [None]:
import matplotlib

In [None]:
matplotlib.rc('font', size=15)

In [None]:
from scipy.interpolate import make_interp_spline, BSpline
from scipy.ndimage.filters import gaussian_filter1d

In [None]:
sys.version

In [None]:
!pip --version

In [None]:
def read_tf_data(fs):
    steps = []
    precs = []
    recalls = []
    shds = []
    losses = []
    if type(fs) is not list:
        fs = [fs]
    for f in fs:
        it = tf.compat.v1.train.summary_iterator(f)

        # the first seems to be a placeholder
        first = next(it)
        assert first.step == 0
        assert len(first.summary.value) == 0

        for e in it:
            v = e.summary.value[0]
            if v.tag == 'graph/prec':
                precs.append(v.simple_value)
            elif v.tag == 'graph/recall':
                recalls.append(v.simple_value)
            elif v.tag == 'graph/v/shd':
                shds.append(v.simple_value)
            elif v.tag == 'loss':
                steps.append(e.step)
                losses.append(v.simple_value)
            else:
                pass
    res = {}
    steps = np.array(steps)
    precs = np.array(precs)
    recalls = np.array(recalls)
    res['steps'] = steps
    res['precs'] = precs
    res['recalls'] = recalls
    res['losses'] = np.array(losses)
    res['shds'] = np.array(shds)
    return res

In [None]:
type([1,2]) == list

# compare EQ CNN FC

In [None]:
def eqcnnfc_tf_name(model, d):
    f = 'tensorboard_logs/test-{}-ERSF-k1-d={}-ensemble'.format(model, d)
    return sorted([os.path.join(f,x) for x in os.listdir(f)])

In [None]:
thedata = {}

In [None]:
for model in ['EQ2', 'CNN', 'FC']:
    thedata[model] = {}
    for d in [10, 20, 50, 100]:
        thedata[model][d] = read_tf_data(eqcnnfc_tf_name(model, d))

In [None]:
# the numbers
# FIXME this is just a place holder. Use the real test.
print('ng=, precs, recalls, shd= ')
for model in ['EQ2', 'CNN', 'FC']:
    for d in [10, 20, 50, 100]:
        print('{} & {} & {:.1f} & {:.1f} & {:.1f} \\\\'
                      .format(model, d,
                              thedata[model][d]['precs'][-10:].mean()*100,
                             thedata[model][d]['recalls'][-10:].mean()*100,
                             thedata[model][d]['shds'][-10:].mean()*1))

# ERSF 1,2,4

In [None]:
def ersf124_tf_name(d):
    f = 'tensorboard_logs/test-EQ2-ERSF124-d={}-ensemble'.format(d)
    return sorted([os.path.join(f,x) for x in os.listdir(f)])

In [None]:
thedata = {}

In [None]:
thedata[10] = read_tf_data(ersf124_tf_name(10))

In [None]:
thedata[20] = read_tf_data(ersf124_tf_name(20))

In [None]:
# the numbers
# FIXME this is just a place holder. Use the real test.
print('ng=, precs, recalls, shd= ')
for d in [10,20]:
    print('{} & {:.1f} & {:.1f} & {:.1f} \\\\'
                  .format(d,
                          thedata[d]['precs'][-10:].mean()*100,
                         thedata[d]['recalls'][-10:].mean()*100,
                         thedata[d]['shds'][-10:].mean()*1))

# Test different number of graphs

In [None]:
def ngraph_tf_name(ng, d, which):
    f = 'tensorboard_logs/{}-ngraph-ng={}-d={}-ensemble'.format(which, ng, d)
    return sorted([os.path.join(f,x) for x in os.listdir(f)])

In [None]:
thedata = {}

In [None]:
for ng in (200,500,1000,2000,5000,10000):
    thedata[ng] = {}
    thedata[ng]['train'] = read_tf_data(ngraph_tf_name(ng, 20, 'train'))
    thedata[ng]['test'] = read_tf_data(ngraph_tf_name(ng, 20, 'test'))

In [None]:
thedata10 = {}

In [None]:
for ng in (200,500,1000,2000,5000,10000):
    thedata10[ng] = {}
    thedata10[ng]['train'] = read_tf_data(ngraph_tf_name(ng, 10, 'train'))
    thedata10[ng]['test'] = read_tf_data(ngraph_tf_name(ng, 10, 'test'))

In [None]:
def smoothed_plot(ax, x, y, label=''):
    xnew = np.linspace(x.min(), x.max(), int(round(len(x) / 2)))
    spl = make_interp_spline(x, y, k=3)  # type: BSpline
    ynew = spl(xnew)
    ax.plot(xnew, ynew, label=label)

In [None]:
def smoothed_plot(ax, x, y, label=''):
    ax.plot(x, y, label=label)

In [None]:
def smoothed_plot(ax, x, y, label='', npoints=None):
    if npoints:
        idx = range(0, x.size, int(round(x.size / npoints)))
    else:
        idx = range(0, x.size)
    ax.plot(x[idx], y[idx], label=label)

In [None]:
thedata[200]['train']

In [None]:
fig, axs = plt.subplots(1,2,figsize=(10,5))
for ng in thedata:
#     smoothed_plot(axs[0], thedata[ng]['test']['steps'], thedata[ng]['test']['precs'], label='ng={}'.format(ng), npoints=100)
#     smoothed_plot(axs[1], thedata[ng]['steps'], thedata[ng]['recalls'], label='ng={}'.format(ng), npoints=100)
    smoothed_plot(axs[0], thedata[ng]['train']['steps'], thedata[ng]['train']['losses'], label='ng={}'.format(ng), npoints=100)
    smoothed_plot(axs[1], thedata[ng]['test']['steps'], thedata[ng]['test']['losses'], label='ng={}'.format(ng), npoints=100)

axs[0].set_xlabel('steps')
axs[0].set_ylabel('loss')
axs[0].legend()

axs[1].set_xlabel('steps')
axs[1].set_ylabel('loss')
axs[1].legend()

axs[0].set_title('Training Loss')
axs[1].set_title('Testing Loss')
plt.savefig('results/ngraph.pdf')

In [None]:
# the numbers
# FIXME this is just a place holder. Use the real test.
print('ng=, precs, recalls, shd= ')
for ng in thedata:
    print('20 & {} & {:.1f} & {:.1f} & {:.1f} \\\\'
                  .format(ng,
                          thedata[ng]['test']['precs'][-10:].mean()*100,
                         thedata[ng]['test']['recalls'][-10:].mean()*100,
                         thedata[ng]['test']['shds'][-10:].mean()*1))

In [None]:
# the numbers
# FIXME this is just a place holder. Use the real test.
print('ng=, precs, recalls, shd= ')
for ng in thedata10:
    print('{} & 10 & {:.1f} & {:.1f} & {:.1f} &'
                  .format(ng,
                          thedata10[ng]['test']['precs'][-10:].mean()*100,
                         thedata10[ng]['test']['recalls'][-10:].mean()*100,
                         thedata10[ng]['test']['shds'][-10:].mean()*1))
    print('   20 & {:.1f} & {:.1f} & {:.1f} \\\\'
                  .format(
                          thedata[ng]['test']['precs'][-10:].mean()*100,
                         thedata[ng]['test']['recalls'][-10:].mean()*100,
                         thedata[ng]['test']['shds'][-10:].mean()*1))

# Ensemble on different d

In [None]:
def ensD_tf_name(model, ds):
    f = 'tensorboard_logs/test-{}-CH3-d=[{}]-ensemble'.format(model, ','.join([str(d) for d in ds]))
    # assume only one file in each folder
    #     assert len(os.listdir(f)) == 1
    # FIXME whether this is in time order?
    return sorted([os.path.join(f,x) for x in os.listdir(f)])
#     f = os.path.join(f, [0])
#     return f

In [None]:
ensD_tf_name('EQ2', [10,15,20])

In [None]:
ensD_tf_name('EQ2', [20,30,40])

In [None]:
d1 = read_tf_data(ensD_tf_name('EQ2', [10,15,20]))

In [None]:
d2 = read_tf_data(ensD_tf_name('EQ2', [20,30,40]))

In [None]:
d1cnn = read_tf_data(ensD_tf_name('CNN', [10,15,20]))

In [None]:
d2cnn = read_tf_data(ensD_tf_name('CNN', [20,30,40]))

In [None]:
d2['steps'].size

In [None]:
d1['steps'].size

In [None]:
idx1 = range(1, d1['steps'].size)

In [None]:
# evenly intercept d2 just d1 size
# TODO abstract this and apply in place of smoothing
idx2 = range(1, d2['steps'].size, round(d2['steps'].size / d1['steps'].size))

In [None]:
d2['steps'][idx2]

In [None]:
def smoothed_plot(ax, x, y, label=''):
    xnew = np.linspace(x.min(), x.max(), int(round(len(x) / 2)))
    spl = make_interp_spline(x, y, k=3)  # type: BSpline
    ynew = spl(xnew)
    ax.plot(xnew, ynew, label=label)

In [None]:
def smoothed_plot(ax, x, y, label=''):
    ax.plot(x, y, label=label)

In [None]:
def plot_subfig(ax, label, which, thedata, idx):
    # smoothing
    smoothed_plot(ax, thedata['steps'][idx], thedata[which][idx], label=label)
#     smoothed_plot(ax, thedata['steps'], thedata[which], label='CNN-{}'.format(d))
    ax.set_xlabel('steps')
    ax.set_ylabel(which)
    ax.legend()
#     ax.set_title('{} for d={}'.format(which, d))

In [None]:
fig, axs = plt.subplots(1,2,figsize=(10,5))
plot_subfig(axs[0], 'EQ-[10,15,20]', 'precs', d1, idx1)
plot_subfig(axs[0], 'EQ-[20,30,40]', 'precs', d2, idx2)
plot_subfig(axs[0], 'CNN-[10,15,20]', 'precs', d1cnn, range(3, d1cnn['steps'].size))
plot_subfig(axs[0], 'CNN-[20,30,40]', 'precs', d2cnn, range(3, d2cnn['steps'].size))

plot_subfig(axs[1], 'EQ-[10,15,20]', 'recalls', d1, idx1)
plot_subfig(axs[1], 'EQ-[20,30,40]', 'recalls', d2, idx2)
plot_subfig(axs[1], 'CNN-[10,15,20]', 'recalls', d1cnn, range(3, d1cnn['steps'].size))
plot_subfig(axs[1], 'CNN-[20,30,40]', 'recalls', d2cnn, range(3, d2cnn['steps'].size))

axs[0].set_title('Precision')
axs[1].set_title('Recall')
plt.savefig('results/ensemble-d.pdf')

In [None]:
# the numbers
# FIXME this is just a place holder. Use the real test.
for data in [d1, d2, d1cnn, d2cnn]:
    print('d=, mat=, precs, recalls, shd=   {:.1f} & {:.1f} & {:.1f}'
                  .format(
                          data['precs'][-10:].mean()*100,
                         data['recalls'][-10:].mean()*100,
                         data['shds'][-10:].mean()*1))

# ensemble on different k, and compare MAT

In [None]:
def mat_tf_name(d, mat):
    f = 'tensorboard_logs/test-EQ-d={}-mat={}-ensemble'.format(d, mat)
    # assume only one file in each folder
    assert len(os.listdir(f)) == 1
    f = os.path.join(f, os.listdir(f)[0])
    return f

In [None]:
data = {}
for d in [20,50]:
    data[d] = {}
    for mat in ['CH3', 'COV', 'COR']:
        data[d][mat] = read_tf_data(mat_tf_name(d, mat))

In [None]:
def smoothed_plot(ax, x, y, label=''):
    xnew = np.linspace(x.min(), x.max(), int(round(len(x) / 2)))
    spl = make_interp_spline(x, y, k=2)  # type: BSpline
    ynew = spl(xnew)
    ax.plot(xnew, ynew, label=label)

In [None]:
def smoothed_plot(ax, x, y, label=''):
    ax.plot(x, y, label=label)

In [None]:
def plot_subfig(ax, d, which, data):
    # smoothing
    smoothed_plot(ax, data[d]['CH3']['steps'], data[d]['CH3'][which], label='COR+norm(COV)')
    smoothed_plot(ax, data[d]['COR']['steps'], data[d]['COR'][which], label='COR')
    smoothed_plot(ax, data[d]['COV']['steps'], data[d]['COV'][which], label='COV')
    ax.set_xlabel('steps')
    ax.set_ylabel(which)
    ax.legend()
    ax.set_title('{} for d={}'.format(which, d))

In [None]:
fig, axs = plt.subplots(2,2,figsize=(10,10))
plot_subfig(axs[0,0], 20, 'precs', data)
plot_subfig(axs[0,1], 20, 'recalls', data)
plot_subfig(axs[1,0], 50, 'precs', data)
plot_subfig(axs[1,1], 50, 'recalls', data)
plt.savefig('results/mat.pdf')

In [None]:
# get the values. Use the average of last 10 values
data[20]['CH3']['precs'][-10:].mean()

In [None]:
# FIXME this is just a place holder. Use the real test.
for d in [20,50]:
    for mat in ['CH3', 'COR', 'COV']:
        print('d={}, mat={}, precs, recalls, shd=   {:.1f} & {:.1f} & {:.1f}'
              .format(d, mat, 
                      data[d][mat]['precs'][-10:].mean()*100,
                     data[d][mat]['recalls'][-10:].mean()*100,
                     data[d][mat]['shds'][-10:].mean()*1))

# training process

In [None]:
os.listdir("tensorboard_logs")

In [None]:
def train_process_tf_name(model, d):
    f = 'tensorboard_logs/test-{}-ERSF-k1-d={}-ensemble'.format(model, d)
    # assume only one file in each folder
    assert len(os.listdir(f)) == 1
    f = os.path.join(f, os.listdir(f)[0])
    return f

In [None]:
fc10 = read_tf_data(train_process_tf_name('FC', 10))

In [None]:
fc10['steps']

In [None]:
len(fc10['steps'])

In [None]:
fc20 = read_tf_data(train_process_tf_name('FC', 20))
fc50 = read_tf_data(train_process_tf_name('FC', 50))
cnn20 = read_tf_data(train_process_tf_name('CNN', 20))
cnn50 = read_tf_data(train_process_tf_name('CNN', 50))
eq20 = read_tf_data(train_process_tf_name('EQ2', 20))
eq50 = read_tf_data(train_process_tf_name('EQ2', 50))

In [None]:
# plot it
x = np.linspace(steps.min(), steps.max(), 20)
spl = make_interp_spline(steps, precs, k=3)  # type: BSpline
y = spl(x)

plt.plot(x, y)

In [None]:
def smoothed_plot(ax, x, y, label=''):
    xnew = np.linspace(x.min(), x.max(), int(round(len(x) / 2)))
    spl = make_interp_spline(x, y, k=2)  # type: BSpline
    ynew = spl(xnew)
    ax.plot(xnew, ynew, label=label)

In [None]:
def smoothed_plot(ax, x, y, label=''):
    ax.plot(x, y, label=label)

In [None]:
def plot_subfig(ax, d, which, eq, fc, cnn):
    # smoothing
    smoothed_plot(ax, eq['steps'], eq[which], label='EQ')
    smoothed_plot(ax, fc['steps'], fc[which], label='FC')
    smoothed_plot(ax, cnn['steps'], cnn[which], label='CNN')
    ax.set_xlabel('steps')
    ax.set_ylabel(which)
    ax.legend()
    ax.set_title('{} for d={}'.format(which, d))

In [None]:
fig, axs = plt.subplots(2,2,figsize=(10,10))
plot_subfig(axs[0,0], 20, 'precs', eq20, fc20, cnn20)
plot_subfig(axs[0,1], 20, 'recalls', eq20, fc20, cnn20)
plot_subfig(axs[1,0], 50, 'precs', eq50, fc50, cnn50)
plot_subfig(axs[1,1], 50, 'recalls', eq50, fc50, cnn50)
plt.savefig('results/training_process.pdf')