In [1]:
import sys
import numpy as np
import scipy.linalg, scipy.spatial.distance

import time
import argparse
import pickle
from collections import OrderedDict

from class_hierarchy import ClassHierarchy

In [2]:
def unitsphere_embedding(class_sim):
    """
    Finds an embedding of `n` classes on a unit sphere in `n`-dimensional space, so that their dot products correspond
    to pre-defined similarities.
    
    class_sim - `n-by-n` matrix specifying the desired similarity between each pair of classes.
    
    Returns: `n-by-n` matrix with rows being the locations of the corresponding classes in the embedding space.
    """
    
    # Check arguments
    if (class_sim.ndim != 2) or (class_sim.shape[0] != class_sim.shape[1]):
        raise ValueError('Given class_sim has invalid shape. Expected: (n, n). Got: {}'.format(class_sim.shape))
    if (class_sim.shape[0] == 0):
        raise ValueError('Empty class_sim given.')
    
    # Place first class
    nc = class_sim.shape[0]
    embeddings = np.zeros((nc, nc))
    embeddings[0,0] = 1.
    
    # Iteratively place all remaining classes
    for c in range(1, nc):
        embeddings[c, :c] = np.linalg.solve(embeddings[:c, :c], class_sim[c, :c])
        embeddings[c, c] = np.sqrt(1. - np.sum(embeddings[c, :c] ** 2))
    
    return embeddings

In [3]:
    # Read hierarchy
    id_type=str
    hierarchy = ClassHierarchy.from_file('Cifar-Hierarchy/variability.parent-child.txt', is_a_relations = False, id_type=id_type)
    class_list = None#'Cifar-Hierarchy/leaf_class_names.txt'
    # Determine target classes
    if class_list is not None:
        with open(class_list) as class_file:
            unique_labels = list(OrderedDict((id_type(l.strip().split()[0]), None) for l in class_file if l.strip() != '').keys())
    else:
        unique_labels = [lbl for lbl in hierarchy.nodes]# if (lbl not in hierarchy.children) or (len(hierarchy.children[lbl]) == 0)]
        unique_labels.sort()
    linear_labels = { lbl : i for i, lbl in enumerate(unique_labels) }
    
    # Compute target distances between classes
    sem_class_dist = np.zeros((len(unique_labels), len(unique_labels))) 
    for i in range(len(unique_labels)):
        for j in range(i + 1, len(unique_labels)):
            sem_class_dist[i,j] = sem_class_dist[j,i] = hierarchy.metric2(unique_labels[i], unique_labels[j])#lcs_height(unique_labels[i], unique_labels[j])
    
    # Compute class embeddings
    embedding = unitsphere_embedding(1. - sem_class_dist)

In [4]:
convert = {}
with open('Cifar-Hierarchy/class_names.txt') as conversion: 
    for conv in conversion: 
        num, classname = conv.strip().split()
        convert[classname] = num

In [7]:
#Compute similarity
i = 'FU'
j = 'eruptive'
if class_list is not None: 
    sim = np.matmul(embedding[linear_labels[i]].T,embedding[linear_labels[j]])
else:
    sim = np.matmul(embedding[linear_labels[convert[i]]].T,embedding[linear_labels[convert[j]]])
print("Similarity: %f" % (sim))

Similarity: 0.800000


In [None]:
'''Hierarchy snippet for testing purposes: 
variable
-- intrinsic
---- AGN
---- in_stars
------ eruptive
-------- UV_ceti
-------- RS_CVn 
-------- RCB
-------- FU
-------- LBV
-------- WR
-------- GCAS'''