# Graph settings

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

red=(255/255,75/255,0/255)
yellow=(255/255,241/255,0/255)
green=(3/255,175/255,122/255)
blue=(0/255,90/255,255/255)
lightblue=(77/255,196/255,255/255)
pink=(255/255,128/255,130/255)
orange=(246/255,170/255,0/255)
purple=(153/255,0/255,153/255)
brown=(128/255,64/255,0/255)

plt.rcParams["font.family"]="serif"
plt.rcParams["font.serif"]=["Times New Roman"]+plt.rcParams["font.serif"]

In [None]:
class OutputFontColor:
    BLACK          = '\033[30m'
    RED            = '\033[31m'
    GREEN          = '\033[32m'
    YELLOW         = '\033[33m'
    BLUE           = '\033[34m'
    COLOR_DEFAULT  = '\033[39m'
    END            = '\033[0m'

# Data Preparation

In [None]:
x_test, y_test = np.load("../datasets/test/dataset_a.npz").values()
x_train_all, y_train_all = np.load("../datasets/training/dataset_a.npz").values()
x_train, y_train = np.load("../datasets/training/dataset_b.npz").values()
x_train_bright, y_train_bright = np.load("../datasets/training/dataset_c.npz").values()
x_train_con, y_train_con = np.load("../datasets/training/dataset_d.npz").values()
x_train_sat, y_train_sat = np.load("../datasets/training/dataset_e.npz").values()
label_name={
    0:"airplane",
    1:"automobile",
    2:"bird",
    3:"cat",
    4:"deer",
    5:"dog",
    6:"frog",
    7:"horse",
    8:"ship",
    9:"truck"
}
label_lack_pred_res = np.load("./pred_res/b-a.npy")
normal_pred_res = np.load("./pred_res/a-a.npy")

In [None]:
import glob
import os

all_recon_loss = {}
all_base_weight = {}

loss_files = glob.glob("./results/*-*.npz")
for loss_file in loss_files:
    train_name, test_name = os.path.splitext(os.path.basename(loss_file))[0].split('-')
    if train_name not in all_recon_loss.keys():
        all_recon_loss[train_name] = {}
    all_recon_loss[train_name][test_name] = {k:v for k,v in np.load(loss_file).items()}

loss_files = glob.glob("./results/?.npz")
for loss_file in loss_files:
    train_name = os.path.splitext(os.path.basename(loss_file))[0]
    all_recon_loss[train_name]["closed"] = {k:v for k,v in np.load(loss_file).items()}

weight_files = glob.glob("./nmf_info/base_weight/*-*.npz")
for weight_file in weight_files:
    train_name, test_name = os.path.splitext(os.path.basename(weight_file))[0].split('-')
    if train_name not in all_base_weight.keys():
        all_base_weight[train_name] = {}
    all_base_weight[train_name][test_name] = {k:v for k,v in np.load(weight_file).items()}

weight_files = glob.glob("./nmf_info/base_weight/?.npz")
for weight_file in weight_files:
    train_name = os.path.splitext(os.path.basename(weight_file))[0]
    all_base_weight[train_name]["closed"] = {k:v for k,v in np.load(weight_file).items()}

In [None]:
with open("./nmf_info/model/nmf_for_dataset_a.pkl", mode="rb") as f:
    nmf = pickle.load(f)

## RQ1

In [None]:
target_layer = "activation_1"
base_weights = all_base_weight["a"]["closed"][target_layer]


In [None]:
import random

fig = plt.figure(figsize=(10, 10))
axes = fig.subplots(nrows=10, ncols=10, subplot_kw=dict(xticks=[], yticks=[]))
for chunk in range(5):
    target_base_id = range(chunk*10, (chunk+1)*10)
    for ax, base_id in zip(axes, target_base_id):
        order = np.argsort(-base_weights[:, base_id])
        ax[0].set_ylabel(base_id)
        for i in range(10):
            ax[i].imshow(x_train_all[order[i]])
    plt.tight_layout()
    # plt.savefig("figure/Base_maxim_example_actv_1_{}.pdf".format(chunk))

In [None]:
fig = plt.figure(figsize=(10, 4.5))
axes = fig.subplots(nrows=4, ncols=10, subplot_kw=dict(xticks=[], yticks=[]))
target_base_id = [4,25,26,37]
for ax, base_id in zip(axes, target_base_id):
    order = np.argsort(-base_weights[:, base_id])
    ax[0].set_ylabel("# {}".format(base_id), size=20)
    for i in range(10):
        ax[i].imshow(x_train_all[order[i]])
plt.suptitle("Examples with Large Weights of Certain Features [activation_1]", size=24)
plt.tight_layout()
# plt.savefig("figure/Base_maxim_example_actv_1_rep.pdf")

In [None]:
target_layer = "activation_6"
base_weights = all_base_weight["a"]["closed"][target_layer]

In [None]:
fig = plt.figure(figsize=(10, 3.5))
axes = fig.subplots(nrows=3, ncols=10, subplot_kw=dict(xticks=[], yticks=[]))
target_base_id = [19,21,46]
for ax, base_id in zip(axes, target_base_id):
    order = np.argsort(-base_weights[:, base_id])
    ax[0].set_ylabel("# {}".format(base_id), size=20)
    for i in range(10):
        ax[i].imshow(x_train_all[order[i]])
plt.suptitle("Examples with Large Weights of Certain Features [activation_6]", size=24)
plt.tight_layout()
# plt.savefig("figure/Base_maxim_example_actv_6_Horse.pdf")

## RQ2

In [None]:
# show Maxim Recon Loss for each class in Train
target_layer="activation_6"

fig = plt.figure(figsize=(15, 15))
unnatural={}
unnatural[0]=[8,9]
unnatural[1]=[1,4]
unnatural[2]=[]
unnatural[3]=[0]
unnatural[4]=[1,4,8]
unnatural[5]=[]
unnatural[6]=[0]
unnatural[7]=[]
unnatural[8]=[2]
unnatural[9]=[4,5,7,9]

axes = fig.subplots(nrows=10, ncols=10, subplot_kw=dict(xticks=[], yticks=[]))
train_recon_loss = all_recon_loss["a"]["closed"][target_layer]
test_recon_loss = all_recon_loss["a"]["a"][target_layer]
for rank, ax in enumerate(axes):
    ax[0].set_ylabel("# {}".format(rank+1), size=24)
    for i in range(10):
        if rank==0:
            ax[i].set_title(label_name[i], size=24)
        ax[i].imshow(x_train_all[y_train_all==i][np.argsort(-train_recon_loss[y_train_all==i])[rank]])
        if rank in unnatural[i]:
            for prop in ax[i].spines.values():
                prop.set(ec="red", lw=8)
plt.suptitle("Example of Train Data with High Complexities for Each Label", size=36)
plt.tight_layout()

In [None]:
# following codes are used to generate randomly picked samples

fig = plt.figure(figsize=(15, 75))

axes = fig.subplots(nrows=50, ncols=10, subplot_kw=dict(xticks=[], yticks=[]))
rand_idx = np.random.permutation(np.arange(0, 5000))
for rank, ax in enumerate(axes):
    ax[0].set_ylabel("# {}".format(rank+1))
    for i in range(10):
        if rank==0:
            ax[i].set_title(label_name[i], size=24)
        ax[i].imshow(x_train_all[y_train_all==i][rand_idx[rank]])
plt.tight_layout()
# plt.savefig("figure/Example_Rand_Recon_Train.pdf")

## RQ3

In [None]:
target_layer="activation_6"

sort_order = np.argsort(all_recon_loss["a"]["a"][target_layer])
res_cnt = normal_pred_res[sort_order].astype(np.int64)
res_cnt_rev = normal_pred_res[sort_order].astype(np.int64)[::-1]
res_cnt_rand = np.random.permutation(normal_pred_res).astype(np.int64)
for i in range(res_cnt.shape[0]-1):
    res_cnt[i+1] += res_cnt[i]
    res_cnt_rev[i+1] += res_cnt_rev[i]
    res_cnt_rand[i+1] += res_cnt_rand[i]

plt.figure(figsize=(5,2.5))
plt.plot(np.arange(res_cnt.shape[0])[100:]+1, 100*res_cnt[100:]/(np.arange(res_cnt.shape[0])+1)[100:], color=lightblue, label="Ascending Complexities")
plt.plot(np.arange(res_cnt.shape[0])[100:]+1, 100*res_cnt_rev[100:]/(np.arange(res_cnt.shape[0])+1)[100:], color=pink, label="Decending Complexities")
plt.plot(np.arange(res_cnt.shape[0])[100:]+1, 100*res_cnt_rand[100:]/(np.arange(res_cnt.shape[0])+1)[100:], color="gray", label="Random")
plt.legend()
plt.title("Prediction Accuracy Curve")
plt.xlim(0,10000)
plt.ylim(20,100)
plt.xlabel("# Test Inputs")
plt.ylabel("Prediction Accuracy [%]")
plt.tight_layout()

In [None]:
target_layer = "activation_6"

plt.figure(figsize=(5,3))
plt.title("Proportion of Labels in Test Inputs with High Complexities")

sort_order = np.argsort(all_recon_loss["a"]["a"][target_layer])
hist, tick = np.histogram(y_test[sort_order][-1000:], range=(0,10))
plt.bar(tick[:-1]-0.2, hist/10, color=green, label="Trained on dataset (a)", width=0.4)

sort_order = np.argsort(all_recon_loss["b"]["a"][target_layer])
hist, tick = np.histogram(y_test[sort_order][-1000:], range=(0,10))
plt.bar(tick[:-1]+0.2, hist/10, color=orange, label="Trained on dataset (b)", width=0.4)

_, xtc = plt.xticks(np.arange(10), [*label_name.values()], rotation=30, size=8)
xtc[3].set_color(red)
xtc[9].set_color(red)
plt.ylabel("Proportion [%]")
# plt.xlabel("Label")
plt.legend()

plt.tight_layout()

## RQ4

In [None]:
def bhatta (hist1,  hist2):
    score = 0
    for item1, item2 in zip(hist1, hist2):
        score += np.sqrt(item1 * item2)/np.sqrt(np.sum(hist1) * np.sum(hist2))
    return np.sqrt(1-score)


In [None]:
from scipy import stats

def show_stats_res(target_layer):
    print(target_layer)
    print()
    print("Train=Test")
    print("\ta")
    tval, pval = stats.ttest_ind(all_recon_loss["a"]["closed"][target_layer], all_recon_loss["a"]["a"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tLabel lack")
    tval, pval = stats.ttest_ind(all_recon_loss["b"]["closed"][target_layer], all_recon_loss["b"]["b"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tBrightness")
    tval, pval = stats.ttest_ind(all_recon_loss["c"]["closed"][target_layer], all_recon_loss["c"]["c"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tContrast")
    tval, pval = stats.ttest_ind(all_recon_loss["d"]["closed"][target_layer], all_recon_loss["d"]["d"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tSaturation")
    tval, pval = stats.ttest_ind(all_recon_loss["e"]["closed"][target_layer], all_recon_loss["e"]["e"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    
    print("Train<Test")
    print("\tLabel")
    tval, pval = stats.ttest_ind(all_recon_loss["b"]["closed"][target_layer], all_recon_loss["b"]["a"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tBrightness")
    tval, pval = stats.ttest_ind(all_recon_loss["a"]["closed"][target_layer], all_recon_loss["a"]["c"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tContrast")
    tval, pval = stats.ttest_ind(all_recon_loss["a"]["closed"][target_layer], all_recon_loss["a"]["d"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tSaturation")
    tval, pval = stats.ttest_ind(all_recon_loss["a"]["closed"][target_layer], all_recon_loss["a"]["e"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    
    print("Train>Test")
    print("\tLabel")
    tval, pval = stats.ttest_ind(all_recon_loss["a"]["closed"][target_layer], all_recon_loss["a"]["b"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tBrightness")
    tval, pval = stats.ttest_ind(all_recon_loss["c"]["closed"][target_layer], all_recon_loss["c"]["a"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tContrast")
    tval, pval = stats.ttest_ind(all_recon_loss["d"]["closed"][target_layer], all_recon_loss["d"]["a"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print("\tSaturation")
    tval, pval = stats.ttest_ind(all_recon_loss["e"]["closed"][target_layer], all_recon_loss["e"]["a"][target_layer], equal_var=False)
    print("\t"+(OutputFontColor.RED if pval < 0.05 else OutputFontColor.BLUE) + str(pval) + OutputFontColor.END)
    print("\t"+(OutputFontColor.RED if tval < 0 else OutputFontColor.BLUE) + str(tval) + OutputFontColor.END)
    
    print()

for layer in all_recon_loss["a"]["closed"].keys():
    show_stats_res(layer)


In [None]:
from functools import reduce

def show_hist_paper(loss_closed, *losses, fig_title="", file_tag="", colors=["blue", "yellow", "red", "firebrick"], rels=["few test", "similar test", "few train", "different"], num_bins=100, target_layers=None):
    if target_layers is None:
        target_layers = loss_closed.keys()
    plt.clf()
    fig, axes = plt.subplots(nrows=len(target_layers), ncols=len(losses), figsize=(4*len(rels),1.5*len(target_layers)), subplot_kw=dict(xticks=[], yticks=[]))
    if len(losses) != len(colors) or len(losses) != len(rels):
        print("inadequate call")
        return
    
    first_layer = True
    for ax, l in zip(axes, target_layers):
        vrange = (0, max(loss_closed[l].max(), reduce(max, [loss[l].max() for loss in losses])))
        closed_hist, tick = np.histogram(loss_closed[l], bins=num_bins, density=True, range=vrange)
        tick = (tick[1:] + tick[:-1])/2
        ax[0].set_ylabel(l, fontsize=14)
        for mono_ax, loss, color, rel in zip(ax, losses, colors, rels):
            if first_layer:
                mono_ax.set_title(rel, fontsize=20)
            mono_ax.bar(tick, closed_hist, color="gray", alpha=0.3, width=tick[1]-tick[0], label="Train", lw=0)
            hist, _ = np.histogram(loss[l], bins=num_bins, density=True, range=vrange)
            mono_ax.bar(tick, hist, color=color, alpha=0.3, width=tick[1]-tick[0], label="Test", lw=0)
            tval, pval = stats.ttest_ind(loss_closed[l], loss[l], equal_var=False)
            dist = bhatta(closed_hist, hist)
            mono_ax.text(tick[-1], 0, "dist: {:.3f}".format(dist), ha="right", va="bottom", size=20, color=(red if dist > 0.04 else None))
#             mono_ax.text(tick[-1], 0, "$p$: {:.3f}".format(pval), ha="right", va="bottom", size=10, color=(red if pval > 0.05 else None))
            mono_ax.set_xlim(vrange)
            mono_ax.legend(fontsize=14)
        if first_layer:
            first_layer=False

    fig.suptitle("Complexity Histogram [{}]".format(fig_title), size=20)
    plt.tight_layout()
    plt.savefig("figure/Histogram_dist_{}.pdf".format(file_tag))
    plt.show()


In [None]:
show_hist_paper(all_recon_loss["a"]["closed"], all_recon_loss["a"]["a"], all_recon_loss["a"]["b"], all_recon_loss["a"]["c"], all_recon_loss["a"]["d"], all_recon_loss["a"]["e"], fig_title="Trained on Datatset (a)", file_tag="cmp_normal", colors=[green, blue, yellow, yellow, yellow], rels=["Train = Test\n[Test on Dataset (a')]", "Train $\supset$ Test\n[Test on Dataset (b')]", "Train $\subset$ Test\n[Test on Dataset(c')]", "Train $\subset$ Test\n[Test on Dataset(d')]", "Train $\subset$ Test\n[Test on Dataset(e')]"])

In [None]:
show_hist_paper(all_recon_loss["a"]["closed"], all_recon_loss["a"]["a"], all_recon_loss["a"]["b"], all_recon_loss["a"]["c"], fig_title="Trained on Dataset (a)", file_tag="cmp_a_for_paper", colors=[green, blue, yellow], rels=["Train = Test\n[Test on Dataset (a')]", "Train $\supset$ Test\n[Test on Dataset (b')]", "Train $\subset$ Test\n[Test on Dataset(c')]"],  target_layers=["activation_1", "activation_3", "activation_5", "activation_7"])

In [None]:
show_hist_paper(all_recon_loss["b"]["closed"], all_recon_loss["b"]["b"], all_recon_loss["b"]["a"], fig_title="Trained on Dataset (b)", file_tag="label_for_paper", colors=[green, yellow], rels=["Train = Test\n[Test on dataset (b')]", "Train $\subset$ Test\n[Test on dataset (a')]"])

In [None]:
show_hist_paper(all_recon_loss["b"]["closed"], all_recon_loss["b"]["b"], all_recon_loss["b"]["a"], fig_title="Trained on Dataset (b)", file_tag="label_for_paper", colors=[green, yellow], rels=["Train = Test\n[Test on dataset (b')]", "Train $\subset$ Test\n[Test on dataset (a')]"],  target_layers=["activation_1", "activation_3", "activation_5", "activation_7"])

In [None]:
show_hist_paper(all_recon_loss["c"]["closed"], all_recon_loss["c"]["c"], all_recon_loss["c"]["a"], fig_title="Trained on Dataset (c)", file_tag="brightness", colors=[green, blue], rels=["Train = Test\n[Test on dataset (c')]", "Train $\supset$ Test\n[Test on dataset (a')]"])

In [None]:
show_hist_paper(all_recon_loss["d"]["closed"], all_recon_loss["d"]["d"], all_recon_loss["d"]["a"], fig_title="Trained on Dataset (d)", file_tag="contrast", colors=[green, blue], rels=["Train = Test\n[Test on dataset (d')]", "Train $\supset$ Test\n[Test on dataset (a')]"])

In [None]:
show_hist_paper(all_recon_loss["e"]["closed"], all_recon_loss["e"]["e"], all_recon_loss["e"]["a"], fig_title="Trained on Dataset (e)", file_tag="saturation", colors=[green, blue], rels=["Train = Test\n[Test on dataset (e')]", "Train $\supset$ Test\n[Test on dataset (a')]"])