In [1]:
from fynesse import access, assess, address

import torch
import torch.nn as nn

import numpy as np

## Mapping

In [68]:
model2layers = assess.eat_pickle('./data/pickle/memo.pickle')

max_seq_len = 300

# add <bos> and <eos> markers
for model in model2layers.keys():
    pad_len = max_seq_len - len(model2layers[model]) - 2
    model2layers[model] = ['<bos>'] + model2layers[model] + ['<eos>'] + ['<pad>'] * pad_len

# take note of all the tokens that have appeared
all_tokens = set()

for layers in model2layers.values():
    for layer in layers:
        all_tokens.add(layer)

all_tokens = np.array(list(all_tokens))

In [70]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(all_tokens)

def model2seq(model):
    layers = model2layers[model]
    return le.transform(layers)

def seq2token(seq):
    return le.inverse_transform(seq)

def token2model(token):
    model2layers = assess.eat_pickle('./data/pickle/memo.pickle')
    for model, layers in model2layers.items():
        if token == layers:
            return model

In [80]:
model2layers = assess.eat_pickle('./data/pickle/memo.pickle')
layers2models = dict()

for model, layers in model2layers.items():
    layers2models[tuple(layers)] = model

In [81]:
seqs = assess.eat_pickle('./data/pickle/seqs.pickle')
tgts = assess.eat_pickle('./data/pickle/tgts.pickle')

In [83]:
def trim_seq(seq):
    memo = []

    for (i, token) in enumerate(seq):
        if token == '<bos>' or token == '<pad>':
            continue
        
        if token == '<eos>': 
            return memo
        
        memo.append(token)
        
    return memo

In [84]:
seqs = list(map(trim_seq, seqs))
tgts = list(map(trim_seq, tgts))

In [27]:
# Dynamic programming implementation of LCS problem

# Returns length of LCS for X[0..m-1], Y[0..n-1] 
def lcs(X, Y):
    m = len(X)
    n = len(Y)
    L = [[0 for x in range(n+1)] for x in range(m+1)]
 
    # Following steps build L[m+1][n+1] in bottom up fashion. Note
    # that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] 
    for i in range(m+1):
        for j in range(n+1):
            if i == 0 or j == 0:
                L[i][j] = 0
            elif X[i-1] == Y[j-1]:
                L[i][j] = L[i-1][j-1] + 1
            else:
                L[i][j] = max(L[i-1][j], L[i][j-1])
 
    # Following code is used to print LCS
    index = L[m][n]
 
    # Create a character array to store the lcs string
    lcs = [""] * (index+1)
    lcs[index] = "\0"
 
    # Start from the right-most-bottom-most corner and
    # one by one store characters in lcs[]
    i = m
    j = n
    while i > 0 and j > 0:
 
        # If current character in X[] and Y are same, then
        # current character is part of LCS
        if X[i-1] == Y[j-1]:
            lcs[index-1] = X[i-1]
            i-=1
            j-=1
            index-=1
 
        # If not same, then find the larger of two and
        # go in the direction of larger value
        elif L[i-1][j] > L[i][j-1]:
            i-=1
        else:
            j-=1
 
    return lcs[:-1]


In [87]:
# proportion of target sequence predicted

def ptsp(seq, tgt):
    if len(seq) == 0: return 0
    return len(lcs(seq, tgt)) / len(seq)

# edit distance over seq length

import editdistance
def edsl(seq, tgt):
    return editdistance.eval(seq, tgt) / len(tgt)

In [104]:
from collections import defaultdict

p_by_model = defaultdict(int)
pp_by_model = defaultdict(list)
pe_by_model = defaultdict(list)

for seq, tgt in zip(seqs, tgts):
    model = layers2models[tuple(tgt)]

    if ptsp(seq, tgt) == 1: p_by_model[model] += 1

    pp_by_model[model].append(ptsp(seq, tgt))
    pe_by_model[model].append(edsl(seq, tgt))

In [105]:
for model in pp_by_model.keys():
    print(f'{model}: {p_by_model[model]}')

vgg13: 6
resnet50: 9
vitl32: 4
vith14: 4
vgg11: 9
swinb: 7
swint: 4
vitb32: 3
mobilenet: 5
unet: 11
vgg19: 8
vgg16: 18
retinanet: 9


In [106]:
for model in pp_by_model.keys():
    print(f'{model}: {np.mean(pp_by_model[model]):.3f}')

vgg13: 0.410
resnet50: 0.490
vitl32: 0.493
vith14: 0.462
vgg11: 0.469
swinb: 0.424
swint: 0.433
vitb32: 0.435
mobilenet: 0.341
unet: 0.525
vgg19: 0.437
vgg16: 0.550
retinanet: 0.454


In [107]:
for model in pe_by_model.keys():
    print(f'{model}: {np.mean(pe_by_model[model]):.3f}')

vgg13: 1.731
resnet50: 0.951
vitl32: 0.855
vith14: 0.885
vgg11: 1.909
swinb: 0.920
swint: 0.931
vitb32: 0.925
mobilenet: 0.949
unet: 0.986
vgg19: 1.453
vgg16: 1.335
retinanet: 0.919
