In [2]:
import numpy as np

# 1

In [32]:
animals = ['Cow', 'Dolhpin', 'Chicken', 'Seal', 'Penguin', 'Bat']

hypotheses = [
    [1,1,0,1,0,1], # Viviparous (definitionally all mammals in set)
    [0,0,1,0,1,0], # Oviparous
    [0,0,1,0,1,1], # Winged
    [0,0,1,0,1,0], # Winged non-flying
    [1,0,1,0,1,1], # Separate 2 hind limbs
    [0,1,0,1,1,0], # Marine habitat
    [1,0,1,0,0,0], # Commonly found on farms
    [0,0,0,1,1,0], # Commonly found in cold areas
    [0,1,0,1,1,0], # Carnivorous
    [1,0,0,0,0,0], # Herbivorous
    [0,0,1,0,0,1], # Omnivorous
    [1,1,1,1,1,1], # Animals
]

In [33]:
hypotheses = np.array(hypotheses)
display(hypotheses.shape)

hypotheses_size = hypotheses.sum(axis=1)
display(hypotheses_size)

hypotheses_names = ['Viviparous', 'Oviparous', 'Winged', 'Winged non-flying', 'Separate 2 hind limbs', 'Marine habitat', 'Commonly found on farms', 'Commonly found in cold areas', 'Carnivorous', 'Herbivorous', 'Omnivorous', 'Animals']
hypotheses_named = {name: hypothesis for name, hypothesis in zip(hypotheses_names, hypotheses)}

# display(hypotheses_named)

(12, 6)

array([4, 2, 3, 2, 4, 3, 2, 2, 3, 1, 2, 6])

# 2

## Prior distribution

A prior distribution I think may heuristically address the concern described below is to have the prior probability of each hypothesis $h_i$ be proportional to the inverse of the sum of the L0 norm distances (themselves normalized) to all hypothesis (including to $h_i$ itself).

## Motivation and Justification

It is plausible to expect that, when coming up with hypotheses, I may have created more hypotheses involving certain subsets of animals simply because they came to mind more readily. In the extreme, I could include many copies of nearly-identical hypotheses despite the fact that they may not be representative of all plausible hypotheses.

The motivation for this approach is that a hypothesis begins with a prior likelihood of 1 divided by a denominator. Because a hypothesis has a normalized L0 similarity of 1 to itself, that denominator has at the very least a value of 1. The normalized similarity of each other hypothesis is added to the denominator. Hence, the more similarities which exist relative to other hypotheses, the smaller the prior likelihood will be.

Consider example hypotheses $h_1, h_2, h_3$, where $h_1 = h_2$ and $h_3 = \neg h_1$. Under this prior distribution, since $h_1 = h_2$, both should have a prior prior equal to $1/(1+1+0) = 1/2$. $h_3$ will have a prior equal to $1/(1+0+0) = 1$.

Note: Some hypotheses which are defined by membership, others functionally by non-membership. The L0 norm does not identify that some hypotheses may be quite similar, just with their bits flipped. One way to address this such that our distribution still achieves similar results would be to not only compute L0 scores with all other hypotheses, but with the negations of those hypotheses as well. However, we leave that for future work.

In [28]:
similarities = np.empty(shape=(len(hypotheses), len(hypotheses)))
for i, h_i in enumerate(hypotheses):
    sim = [len(animals) - np.linalg.norm(h_i - h_o, ord=0) for o, h_o in enumerate(hypotheses)]
    similarities[i] = np.array(sim)
# display(similarities)

similarities_norm = similarities / len(animals) # max similarity should = 1, least smilarity = 0
# display(similarities_norm.round(decimals=2))

similarities_norm_sum = similarities_norm.sum(axis=1)
# display(similarities_norm_sum)

h_likelihoods = 1 / similarities_norm_sum
# display(h_likelihoods.round(decimals=2))

h_probabilities_prior = h_likelihoods / h_likelihoods.sum()
display(h_probabilities_prior.round(decimals=2))

array([0.11, 0.07, 0.07, 0.07, 0.08, 0.09, 0.08, 0.08, 0.09, 0.09, 0.08,
       0.09])

# 3

In [34]:
def prob_posterior_h_given_x(h, x, f_prob_x_given_h):
    # h is the index of a particular hypothesis
    # x is an index of a particular animal
    # f_prob_x_given_h is a function that depends on weak/strong sampling
    numerator = h_probabilities_prior[h] * f_prob_x_given_h(x,h)
    denominator = [h_probabilities_prior[h_prime] * f_prob_x_given_h(x,h_prime) for h_prime in range(len(hypotheses))]
    return numerator / denominator

In [35]:
# Weak sampling

def prob_x_given_h_weak(x, h):
    return 1 if h[x] == 1 else 0

def prob_posterior_h_given_x_weak(h, x):
    return prob_posterior_h_given_x(h, x, prob_x_given_h_weak)

In [36]:
# Strong sampling

def prob_x_given_h_strong(x, h):
    return 1/hypotheses_size[h] if h[x] == 1 else 0

def prob_posterior_h_given_x_strong(h, x):
    return prob_posterior_h_given_x(h, x, prob_x_given_h_strong)

# 4

# 5