In [13]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter, defaultdict
from egg.core.interaction import Interaction
from egg.core.language_analysis import calc_entropy, TopographicSimilarity
from sklearn.metrics import mutual_info_score
from torch import nn
from typing import Dict, Tuple, List

In [2]:
import os
os.chdir("..")
print("CWD is now:", os.getcwd()) 

CWD is now: /Users/nabilasiregar/code/emergent-communication


In [12]:
from pathlib import Path

candidates = list(Path("logs/msgs/experiment_bee_with_leakage_10nodes").rglob("interactions/**/run*"))
print("Found interaction files:")
for p in candidates:
    print(" →", p)

Found interaction files:
 → logs/msgs/experiment_bee_with_leakage_10nodes/interactions/validation/epoch_10/run2
 → logs/msgs/experiment_bee_with_leakage_10nodes/interactions/validation/epoch_10/run3
 → logs/msgs/experiment_bee_with_leakage_10nodes/interactions/validation/epoch_10/run1


In [50]:
def load_sequences(interaction_path):
    interaction = torch.load(interaction_path, map_location="cpu")
    msgs  = interaction.message.cpu().numpy()

    tok0 = msgs[:, 0].astype(int)
    tok1 = msgs[:, 1] 
    return tok0, tok1, interaction

In [43]:
bee_path   = "logs/msgs/experiment_bee_with_leakage_10nodes/interactions/validation/epoch_10/run1"

In [53]:
direction_token,  distance_token,  bee_int  = load_sequences(bee_path)

1. Mutual Information measures the statistical dependence between two random variables. I use it to check that the direction token carries no information about the distance concept (and vice versa), meaning they are independent. (Shanon et al, 1948)
2. Even if each token slot is independent overall, I also want each symbol in a slot to consistently map to the same concept, regardless of what the other slot emits. CI formalizes this: for each concept c, find the symbol s that most often signals c, and measure how exclusively s points back to c. High CI means that symbol meanings dont drift depending on the other token. This alignment-based metric was introduced for emergent communication by Bogin et al. (2018) under the name context-independence
3. A compositional protocol should at least allow the reconstruction of a joint message embedding by directly composing the embeddings of its parts (Andreas, J. Measuring Compositionality in Representation Learning. 2019) and (Korbak, T., Zubek, J., & Rączaszek-Leonardi, J. Measuring Non-Trivial Compositionality in Emergent Communication)

In [59]:
tok0 = np.array(direction_token, dtype=int) 
tok1 = np.array(distance_token, dtype=float)

# MI
# discretize distance for MI
num_bins = 10
bins = np.quantile(tok1, np.linspace(0,1,num_bins+1))
tok1_disc = np.digitize(tok1, bins[1:-1])
mi_0_1 = mutual_info_score(tok0, tok1_disc)
mi_1_0 = mutual_info_score(tok1_disc, tok0)

# CI
def ci_score(sym_arr, other_arr):
    N = len(sym_arr)
    p_sc = defaultdict(float)
    p_cs = defaultdict(float)
    for s,o in zip(sym_arr, other_arr):
        p_sc[(s,o)] += 1
        p_cs[(o,s)] += 1
    for k in p_sc: p_sc[k] /= N
    for k in p_cs: p_cs[k] /= N

    total = 0.0
    for o in np.unique(other_arr):
        best_symbol = max((s for (oo,s) in p_cs if oo==o), key=lambda s: p_cs[(o,s)])
        total += p_cs[(o,best_symbol)] * p_sc.get((best_symbol,o), 0)
    return total / len(np.unique(other_arr))

ci_dir  = ci_score(tok0, tok1_disc)
ci_dist = ci_score(tok1_disc, tok0)

def tre_probe(tok0, tok1, emb_dim=16, steps=300, lr=5e-2):
    V = int(max(tok0.max(), tok1_disc.max())) + 1
    E = nn.Embedding(V, emb_dim)
    g = nn.Sequential(nn.Linear(2*emb_dim, emb_dim), nn.Tanh())
    opt = torch.optim.Adam(list(E.parameters())+list(g.parameters()), lr=lr)

    x0 = torch.tensor(tok0)
    x1 = torch.tensor(tok1_disc)
    for _ in range(steps):
        h_msg = E(x0) + E(x1)
        h_hat = g(torch.cat([E(x0), E(x1)], dim=-1))
        loss = ((h_msg - h_hat)**2).mean()
        opt.zero_grad(); loss.backward(); opt.step()
    return float(loss)

tre = tre_probe(tok0, tok1)

print(f"MI(direction;distance)   = {mi_0_1:.4f}")
print(f"MI(distance;direction)   = {mi_1_0:.4f}")
print(f"CI_dir→dist (slot1|slot0)= {ci_dir:.4f}")
print(f"CI_dist→dir (slot0|slot1)= {ci_dist:.4f}")
print(f"TRE                       = {tre:.4f}")

MI(direction;distance)   = 0.0000
MI(distance;direction)   = 0.0000
CI_dir→dist (slot1|slot0)= 0.4616
CI_dist→dir (slot0|slot1)= 0.9216
TRE                       = 0.0000


from MI, the two tokens are independent from each other.A CI of 0.46 means that direction symbols are not consistent signallers of any one distance.A CI of 0.92 means that each distance symbol almost always appears with the same direction token.