In [18]:
import numpy as np
import gym
import gym_minigrid
from gym_minigrid.minigrid import *
from gym_minigrid.wrappers import *
from gym_minigrid.register import register
import geomloss
from geomloss import SamplesLoss
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import ot
from scipy.stats import spearmanr, pearsonr

from envs import MiniGridRewardWrapper
import ml_metrics as mlm

import gym

from stable_baselines.common.policies import FeedForwardPolicy, CnnPolicy, register_policy
from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines import PPO2, ACKTR, SAC, DQN
import imageio
from mutual_info import *

from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw

from scipy.stats import rankdata

import pickle


In [76]:
from generalization_unint import NoiseWrapper, GridExample, MiniGridBinaryRewardWrapper
from generalization_unint import MiniGridForwardBackwardUpDownActionsOnly, MiniGridNoisyTransitions
from generalization_unint import metric_V_distance, metric_edit_distance, metric_edit_distance_scored
from generalization_unint import metric_sinkhorn_distance, metric_wasserstein_distance, make_matrix_blocks

In [20]:
# Interpretable env settings
isettings = [(0, 2, None, None),
                (0, 5, None, None),
                (1, 4, None, None),
                (2, 0, None, None),
                (5, 0, None, None),
                (4, 1, None, None),
                (0, 2, 1, None),
                (0, 5, 1, None),
                (0, 5, 2, None),
                (0, 5, 4, None),
                (0, 2, None, 1),
                (0, 5, None, 1),
                (0, 5, None, 2),
                (0, 5, None, 4),
                (0, 5, 2, 3),
                (0, 5, 3, 2),
                (0, None, 2, None),
                (2, None, 0, None)]

iblocks = ['move agent & goal',
              'add lava',
              'add wall',
              'add lava & wall',
              'remove goal']

iblock_sizes = [6, 4, 4, 2, 2]


In [21]:
# Uninterpretable env settings
usettings = [((0, 5, None, None), (0., 0.5)),
                ((0, 2, None, None), (0., 0.5)),
                ((0, 5, 1, None), (0., 0.5)),
                ((0, 5, 2, 3), (0., 0.5)),
                ((0, 5, None, None), (0.5, 0.5)),
                ((0, 2, None, None), (0.5, 0.5)),
                ((0, 5, 1, None), (0.5, 0.5)),
                ((0, 5, 2, 3), (0.5, 0.5))]

# 2, 1, 8, 15

ublocks = ['noise mean 0.', 'noise mean 0.5']

ublock_sizes = [4, 4]

In [22]:
blocks = iblocks + ublocks
block_sizes = iblock_sizes + ublock_sizes

In [23]:
algorithm = [PPO2, DQN, SAC, ACKTR][0]
postfix = ['ppo', 'dqn', 'sac', 'acktr'][0]

In [27]:
noise_level = 0.

In [28]:
# Load up all the models

imodels = []
# Go over each of the environments
for i, setting in enumerate(isettings):
    imodels.append([])
    for j in range(5):
        print (f"fat/exp1/{'.'.join([str(e) for e in setting]) + '.' + str(noise_level) + '.' + str(j)}.{postfix}")
        model = algorithm.load(f"fat/exp1/{'.'.join([str(e) for e in setting]) + '.' + str(noise_level) + '.' + str(j)}.{postfix}")
        imodels[-1].append(model)

umodels = []
# Go over each of the uninterpretable environments
for i, (setting, noise_setting) in enumerate(usettings):
    umodels.append([])
    for j in range(5):
        model = algorithm.load(f"fat/exp2/{'.'.join([str(e) for e in setting]) + '.' + str(noise_level) + '.' + str(noise_setting[0]) + '.' + str(j)}.{postfix}")
        umodels[-1].append(model)


fat/exp1/0.2.None.None.0.0.0.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.2.None.None.0.0.1.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.2.None.None.0.0.2.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.2.None.None.0.0.3.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.2.None.None.0.0.4.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.None.0.0.0.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.None.0.0.1.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.None.0.0.2.ppo
Loading a model

fat/exp1/0.5.None.2.0.0.1.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.2.0.0.2.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.2.0.0.3.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.2.0.0.4.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.4.0.0.0.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.4.0.0.1.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.4.0.0.2.ppo
Loading a model without an environment, this model cannot be trained until it has a valid environment.
fat/exp1/0.5.None.4.0.0.3.ppo
Loading a model without an environment,

In [29]:
print (len(imodels), len(umodels))

18 8


In [37]:
print (len(isettings), len(usettings))

18 8


In [30]:
def iwrap(env, noise):
    return MiniGridNoisyTransitions(MiniGridForwardBackwardUpDownActionsOnly(FullyObsWrapper(env)), noise)
    

def uwrap(env, noise, noise_setting):
    return NoiseWrapper(MiniGridNoisyTransitions(MiniGridForwardBackwardUpDownActionsOnly(FullyObsWrapper(env)),
                                                 noise), *noise_setting)

In [31]:
def collect_trajectories(model, env, n_trajs=10):
    trajectories = []
    for rep in range(n_trajs):
        trajectories.append([])
        obs = env.reset()
        for step in range(1000):
            action = model.predict(obs)[0]
            next_obs, rew, done, info = env.step(action)
            trajectories[-1].append((obs, action, rew))
            obs = next_obs
            if done:
                break
        trajectories[-1].append((obs, None, None))
    return trajectories

In [34]:
def make_gif(model, env, path):
    images = []
    obs = env.reset()
    img = env.render(mode='rgb_array',
                     highlight=False)
    for i in range(1000):
        images.append(img)
        action, _ = model.predict(obs)
        obs, _, done, _ = env.step(action)
        img = env.render(mode='rgb_array',
                         highlight=False)
        if done:
            break
    images.append(img)

    imageio.mimsave(path, images, fps=1)

In [49]:
def test_policy(model, env, n_trajs=10):
    returns = []
    for rep in range(n_trajs):
        ret = 0
        obs = env.reset()
        for step in range(1000):
            obs, rew, done, info = env.step(model.predict(obs)[0])
            ret += rew
            if done:
                break
        returns.append(ret)
    return np.mean(returns), np.std(returns)

In [175]:
def metric_wasserstein_distance(source_trajectories, target_trajectories):
    source_data = np.array([e[0] for traj in source_trajectories for e in traj])
    target_data = np.array([e[0] for traj in target_trajectories for e in traj])

    # loss, x, y = sinkhorn_optimization(source_data, target_data, blur=0.001)
    # return loss
    return ot_wasserstein(source_data, target_data)


def ot_wasserstein(source_data, target_data):
    print (source_data.shape)
    source_data = source_data.reshape((source_data.shape[0], -1), order='F')
    target_data = target_data.reshape((target_data.shape[0], -1), order='F')

    source_data, source_counts = np.unique(source_data, axis=0, return_counts=True)
    target_data, target_counts = np.unique(target_data, axis=0, return_counts=True)
    
    M = ot.dist(source_data[:, :int(2 * source_data.shape[1]/3)], target_data[:, :int(2 * source_data.shape[1]/3)], 'hamming')
#     M += ot.dist(source_data[:, int(2 * source_data.shape[1]/3):], target_data[:, int(2 * source_data.shape[1]/3):], 'euclidean')
    M /= M.max()

    source_dist, target_dist = source_counts / np.sum(source_counts), target_counts / np.sum(target_counts)
    distance = ot.emd2(source_dist, target_dist, M)
    return distance

In [35]:
n_trajs = 50

trajectories_across_isettings = []
scenarios_across_isettings = []
for i, setting in enumerate(isettings):
    # Collect some trajectories from the task by running the policy trained on the task
    env = MiniGridBinaryRewardWrapper(iwrap(GridExample(*setting, max_steps=20), noise_level))
    scenarios_across_isettings.append(env.reset())
    trajectories_across_isettings.append(collect_trajectories(imodels[i][0], env, n_trajs)) # all the models trained to be optimal
    # Make a gif of what this policy has learned to do
    make_gif(imodels[i][0], env, f"fat/exp1/gif_{'.'.join([str(e) for e in setting])}.gif")


In [36]:
trajectories_across_usettings = []
scenarios_across_usettings = []
for i, (setting, noise_setting) in enumerate(usettings):
    # Collect some trajectories from the task by running the policy trained on the task
    env = MiniGridBinaryRewardWrapper(uwrap(GridExample(*setting, max_steps=20), noise_level, noise_setting))
    scenarios_across_usettings.append(env.reset())
    trajectories_across_usettings.append(collect_trajectories(umodels[i][0], env, n_trajs)) # all the models trained to be optimal
    # Make a gif of what this policy has learned to do
    make_gif(umodels[i][0], env, f"fat/exp2/gif_{'.'.join([str(e) for e in setting])}.gif")

In [44]:
print (len(trajectories_across_isettings), len(trajectories_across_usettings))
print (len(scenarios_across_isettings), len(scenarios_across_usettings))
print (scenarios_across_usettings[0].astype(int).transpose(2,0,1))

18 8
18 8
[[[ 2  2  2  2]
  [ 2 10  1  2]
  [ 2  1  1  2]
  [ 2  1  8  2]
  [ 2  2  2  2]]

 [[ 5  5  5  5]
  [ 5  0  0  5]
  [ 5  0  0  5]
  [ 5  0  1  5]
  [ 5  5  5  5]]

 [[ 0  0  0  0]
  [ 0  0  0  0]
  [ 0  0  0  0]
  [ 0  0  0  0]
  [ 0  0  0  0]]]


In [47]:
weighting = np.array([[[1, 1, 1, 1], 
                      [1, 3, 3, 1], 
                      [1, 4, 4, 1], 
                      [1, 3, 3, 1],
                      [1, 1, 1, 1]],
                     [[1, 1, 1, 1], 
                      [1, 1, 1, 1], 
                      [1, 1, 1, 1], 
                      [1, 1, 1, 1],
                      [1, 1, 1, 1]],
                     [[0, 0, 0, 0], 
                      [0, 0, 0, 0], 
                      [0, 0, 0, 0], 
                      [0, 0, 0, 0],
                      [0, 0, 0, 0]]]).transpose(1, 2, 0)
total_settings = len(isettings) + len(usettings)

In [None]:
evaluation_matrix = []
distances = []


In [176]:
dists = []
for i in range(total_settings):
    dists.append([])
    if i < len(isettings):
        source_trajectories, source_scenario = trajectories_across_isettings[i], scenarios_across_isettings[i]
    else:
        source_trajectories, source_scenario = trajectories_across_usettings[i - len(isettings)], scenarios_across_usettings[i - len(isettings)]
        
    # Go over the target environments
    for j in range(total_settings):
        if j < len(isettings):
            target_trajectories, target_scenario = trajectories_across_isettings[j], scenarios_across_isettings[j]
        else:
            target_trajectories, target_scenario = trajectories_across_usettings[j - len(isettings)], scenarios_across_usettings[j - len(isettings)]
        
        metrics = [metric_V_distance(source_trajectories, target_trajectories),
                   metric_edit_distance(source_scenario, target_scenario),
                   metric_edit_distance_scored(source_scenario, target_scenario, weighting),
                   metric_sinkhorn_distance(source_trajectories, target_trajectories),
                   metric_wasserstein_distance(source_trajectories, target_trajectories)]
        dists[-1].append(metrics)

(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(200, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4, 3)
(100, 5, 4

(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(214, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(202, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4, 3)
(150, 5, 4

In [54]:

for i in range(total_settings):
    evaluation_matrix.append([])
    distances.append([])
    
    if i < len(isettings):
        setting = isettings[i]
        source_trajectories, source_scenario = trajectories_across_isettings[i], scenarios_across_isettings[i]
    else:
        setting, noise_setting = usettings[i - len(isettings)]
        source_trajectories, source_scenario = trajectories_across_usettings[i - len(isettings)], scenarios_across_usettings[i - len(isettings)]
    

    # Go over the target environments
    for j in range(total_settings):
        if j < len(isettings):
            other_setting = isettings[j]
            other_env = MiniGridBinaryRewardWrapper(iwrap(GridExample(*other_setting, max_steps=20), noise_level))
            target_trajectories, target_scenario = trajectories_across_isettings[j], scenarios_across_isettings[j]
        else:
            other_setting, other_noise_setting = usettings[j - len(isettings)]
            other_env = MiniGridBinaryRewardWrapper(uwrap(GridExample(*other_setting, max_steps=20), noise_level, other_noise_setting))
            target_trajectories, target_scenario = trajectories_across_usettings[j - len(isettings)], scenarios_across_usettings[j - len(isettings)]
            
        # Evaluate the return when you run the source task's policy on the target task
        return_means = []
        for k in range(5):
            print (i, j, k)
            if i < len(isettings):
                model = imodels[i][k]
            else:
                model = umodels[i - len(isettings)][k]
            return_mean, return_std = test_policy(model, other_env, n_trajs=n_trajs)
            return_means.append(return_mean)

        return_mean = np.mean(return_means)

        evaluation_matrix[-1].append(return_mean)

        metrics = [metric_V_distance(source_trajectories, target_trajectories),
                   metric_edit_distance(source_scenario, target_scenario),
                   metric_edit_distance_scored(source_scenario, target_scenario, weighting),
                   metric_sinkhorn_distance(source_trajectories, target_trajectories),
                   metric_wasserstein_distance(source_trajectories, target_trajectories)]
        distances[-1].append(metrics)

evaluation_matrix = np.array(evaluation_matrix)


18 0 0
18 0 1
18 0 2
18 0 3
18 0 4
18 1 0
18 1 1
18 1 2
18 1 3
18 1 4
18 2 0
18 2 1
18 2 2
18 2 3
18 2 4
18 3 0
18 3 1
18 3 2
18 3 3
18 3 4
18 4 0
18 4 1
18 4 2
18 4 3
18 4 4
18 5 0
18 5 1
18 5 2
18 5 3
18 5 4
18 6 0
18 6 1
18 6 2
18 6 3
18 6 4
18 7 0
18 7 1
18 7 2
18 7 3
18 7 4
18 8 0
18 8 1
18 8 2
18 8 3
18 8 4
18 9 0
18 9 1
18 9 2
18 9 3
18 9 4
18 10 0
18 10 1
18 10 2
18 10 3
18 10 4
18 11 0
18 11 1
18 11 2
18 11 3
18 11 4
18 12 0
18 12 1
18 12 2
18 12 3
18 12 4
18 13 0
18 13 1
18 13 2
18 13 3
18 13 4
18 14 0
18 14 1
18 14 2
18 14 3
18 14 4
18 15 0
18 15 1
18 15 2
18 15 3
18 15 4
18 16 0
18 16 1
18 16 2
18 16 3
18 16 4
18 17 0
18 17 1
18 17 2
18 17 3
18 17 4


  check_result(result_code)


18 18 0
18 18 1
18 18 2
18 18 3
18 18 4
18 19 0
18 19 1
18 19 2
18 19 3
18 19 4
18 20 0
18 20 1
18 20 2
18 20 3
18 20 4
18 21 0
18 21 1
18 21 2
18 21 3
18 21 4
18 22 0
18 22 1
18 22 2
18 22 3
18 22 4
18 23 0
18 23 1
18 23 2
18 23 3
18 23 4
18 24 0
18 24 1
18 24 2
18 24 3
18 24 4
18 25 0
18 25 1
18 25 2
18 25 3
18 25 4
19 0 0
19 0 1
19 0 2
19 0 3
19 0 4
19 1 0
19 1 1
19 1 2
19 1 3
19 1 4
19 2 0
19 2 1
19 2 2
19 2 3
19 2 4
19 3 0
19 3 1
19 3 2
19 3 3
19 3 4
19 4 0
19 4 1
19 4 2
19 4 3
19 4 4
19 5 0
19 5 1
19 5 2
19 5 3
19 5 4
19 6 0
19 6 1
19 6 2
19 6 3
19 6 4
19 7 0
19 7 1
19 7 2
19 7 3
19 7 4
19 8 0
19 8 1
19 8 2
19 8 3
19 8 4
19 9 0
19 9 1
19 9 2
19 9 3
19 9 4
19 10 0
19 10 1
19 10 2
19 10 3
19 10 4
19 11 0
19 11 1
19 11 2
19 11 3
19 11 4
19 12 0
19 12 1
19 12 2
19 12 3
19 12 4
19 13 0
19 13 1
19 13 2
19 13 3
19 13 4
19 14 0
19 14 1
19 14 2
19 14 3
19 14 4
19 15 0
19 15 1
19 15 2
19 15 3
19 15 4
19 16 0
19 16 1
19 16 2
19 16 3
19 16 4
19 17 0
19 17 1
19 17 2
19 17 3
19 17 4
19 18 0
19

In [52]:
evaluation_matrix_archive = evaluation_matrix
distances_archive = distances

In [61]:
evaluation_matrix_fixed = np.concatenate((evaluation_matrix[:18], evaluation_matrix[19:]), axis=0)
distances_fixed = distances[:18] + distances[19:]

In [117]:
pickle.dump((evaluation_matrix, distances), open('fat/combined_eval_data_updated', 'wb'))

In [67]:
evaluation_matrix = np.array(evaluation_matrix_fixed)
distances = distances_fixed

In [74]:
print (evaluation_matrix.shape)

(26, 26)


In [150]:
def plot_matrix(matrix, figsize, xticklabels, yticklabels, xlabel, ylabel, save_path, vmin=None, vmax=None, mask=None, square=True):
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111)
    ax.plot([1],[1])
    ax.tick_params(axis=u'both', which=u'both',length=0)
    plt.rc('font', size=18)
    p = sns.heatmap(matrix, vmin=vmin, vmax=vmax,
                    mask=mask,
                    xticklabels=xticklabels,
                    yticklabels=yticklabels,
                    linewidths=0, linecolor='white',
                    annot=np.round(matrix.astype(np.float), 1), square=square, cbar=False, cmap="Blues_r")

    plt.xlabel(xlabel, fontsize=20)
    plt.ylabel(ylabel, fontsize=20)
    plt.xticks(fontsize=20, rotation=0)
    plt.yticks(fontsize=20, rotation=0)
    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight')
    plt.close()

In [151]:
evaluation_matrix_mod, mask = make_matrix_blocks(evaluation_matrix, block_sizes)

locs = np.insert(np.array(block_sizes).cumsum(), 0, 0)
locs = (locs[1:] - locs[:-1]) / 2. + locs[:-1] + np.arange(len(block_sizes))
locs[-1] = min(locs[-1], evaluation_matrix_mod.shape[0] - 1)
print(locs)

labels = ['' for _ in range(len(evaluation_matrix_mod))]
for e, i in zip(blocks, locs):
    labels[int(i)] = e

print (labels)

plot_matrix(evaluation_matrix_mod,
            (18, 18),
            labels,
            labels,
            'Target Task', 'Source Task',
            f'fat/evaluation_matrix_{postfix}_mod.png', 0, 1, mask)

[ 3.  9. 14. 18. 21. 25. 30.]
['', '', '', 'move agent & goal', '', '', '', '', '', 'add lava', '', '', '', '', 'add wall', '', '', '', 'add lava & wall', '', '', 'remove goal', '', '', '', 'noise mean 0.', '', '', '', '', 'noise mean 0.5', '']


In [152]:
labels = ['' for _ in range(len(evaluation_matrix_mod))]
counter = 1
for i, e in enumerate(evaluation_matrix_mod[:, 0]):
    if not np.isnan(e):
        labels[i] = str(counter)
        counter += 1

In [156]:
#2, 1, 8, 15
slice_positions = np.where(labels == np.array([['2', '1', '8', '15']]).transpose(1, 0))[1]

In [181]:
plot_matrix(evaluation_matrix_mod,
                (18, 18),
                labels,
                labels,
                'Target Task', 'Source Task',
                f'fat/evaluation_matrix_{postfix}_mod.png', 0, 1, mask)

plot_matrix(evaluation_matrix_mod[:22, :22],
           (13, 13),
           labels[:22],
           labels[:22],
           'Target Task', 'Source Task',
            f'fat/evaluation_matrix_18.18_{postfix}_mod.png', 0, 1, mask[:22, :22])

plot_matrix(evaluation_matrix_mod[23:, 23:],
           (8, 8),
           labels[23:],
           labels[23:],
           'Target Task', 'Source Task',
            f'fat/evaluation_matrix_8.8_{postfix}_mod.png', 0, 1, mask[23:, 23:])

plot_matrix(evaluation_matrix_mod[:, 23:][slice_positions],
           (8, 8),
           labels[23:],
            ['2', '1', '8', '15'],
           'Target Task', 'Source Task',
           f'fat/evaluation_matrix_4.8_{postfix}_mod.png', 0, 1, mask[:, 23:][slice_positions])

plot_matrix(evaluation_matrix_mod[23:, :][:, slice_positions],
           (8, 8),
            ['2', '1', '8', '15'],
           labels[23:],
           'Target Task', 'Source Task',
           f'fat/evaluation_matrix_8.4_{postfix}_mod.png', 0, 1, mask[23:, :][:, slice_positions])

In [177]:
dists = np.array(dists)
dists = dists.transpose(2, 0, 1)


In [92]:
distances = np.array(distances)
print(distances.shape)
print(distances)
distances = distances.transpose(2, 0, 1)
print (distances.shape)

(26, 26, 5)
[[[0.00000000e+00 0.00000000e+00 0.00000000e+00 4.95281403e-30
   0.00000000e+00]
  [9.90000000e-03 4.00000000e+00 8.00000000e+00 5.58888893e-01
   5.58888889e-01]
  [1.00000000e-02 6.00000000e+00 1.60000000e+01 6.11111124e-01
   6.11111111e-01]
  ...
  [0.00000000e+00 2.00000000e+01 0.00000000e+00 8.69575291e-01
   8.69565217e-01]
  [9.90000000e-03 2.50000000e+01 1.20000000e+01 9.26013965e-01
   9.25925926e-01]
  [1.04880600e-02 2.60000000e+01 1.20000000e+01 9.22503105e-01
   9.22355149e-01]]

 [[9.90000000e-03 4.00000000e+00 8.00000000e+00 5.58890411e-01
   5.58888889e-01]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00 2.22876632e-29
   0.00000000e+00]
  [1.99000000e-02 6.00000000e+00 1.60000000e+01 6.66667048e-01
   6.66666667e-01]
  ...
  [9.90000000e-03 2.40000000e+01 8.00000000e+00 8.98571659e-01
   8.98205128e-01]
  [0.00000000e+00 2.10000000e+01 4.00000000e+00 9.06084250e-01
   9.05625000e-01]
  [5.88060000e-04 2.30000000e+01 7.00000000e+00 8.95174473e-01
   8.9465

In [178]:
distance_types = ['Value', 'Edit', 'Scored Edit', 'Sinkhorn', 'Wasserstein']
for i, dist_mat in enumerate(dists):
    print('--------')
    print (distance_types[i])
    print('--------')
    dist_mat_mod, mask = make_matrix_blocks(dist_mat, block_sizes)

    plot_matrix(dist_mat_mod,
                (18, 18),
                labels,
                labels,
                'Target Task', 'Source Task',
                f'fat/distance_matrix_{distance_types[i].lower()}_mod.png', mask=mask)
    
    plot_matrix(dist_mat_mod[:22, :22],
           (13, 13),
           labels[:22],
           labels[:22],
           'Target Task', 'Source Task',
            f'fat/distance_matrix_18.18_{distance_types[i].lower()}_mod.png', mask=mask[:22, :22])

    plot_matrix(dist_mat_mod[23:, 23:],
               (8, 8),
               labels[23:],
               labels[23:],
               'Target Task', 'Source Task',
                f'fat/distance_matrix_8.8_{distance_types[i].lower()}_mod.png', mask=mask[23:, 23:])

    plot_matrix(dist_mat_mod[:, 23:][slice_positions],
               (8, 8),
               labels[23:],
                ['2', '1', '8', '15'],
               'Target Task', 'Source Task',
               f'fat/distance_matrix_4.8_{distance_types[i].lower()}_mod.png', mask=mask[:, 23:][slice_positions])

    plot_matrix(dist_mat_mod[23:, :][:, slice_positions],
               (8, 8),
                ['2', '1', '8', '15'],
               labels[23:],
               'Target Task', 'Source Task',
               f'fat/distance_matrix_8.4_{distance_types[i].lower()}_mod.png', mask=mask[23:, :][:, slice_positions])
    
    
    print ("Full")
    print(spearmanr(1 - evaluation_matrix.flatten(), dist_mat.flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix.flatten(), dist_mat.flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("18 x 18")
    print(spearmanr(1 - evaluation_matrix[:18, :18].flatten(), dist_mat[:18, :18].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[:18, :18].flatten(), dist_mat[:18, :18].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Int --> Target Non Int")
    print(spearmanr(1 - evaluation_matrix[:18, 18:].flatten(), dist_mat[:18, 18:].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[:18, 18:].flatten(), dist_mat[:18, 18:].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Non Int --> Target Int")
    print(spearmanr(1 - evaluation_matrix[18:, :18].flatten(), dist_mat[18:, :18].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[18:, :18].flatten(), dist_mat[18:, :18].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Non Int --> Target Non Int")
    print(spearmanr(1 - evaluation_matrix[18:, 18:].flatten(), dist_mat[18:, 18:].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[18:, 18:].flatten(), dist_mat[18:, 18:].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Non Int --> Target Non Int: Set 1")
    print(spearmanr(1 - evaluation_matrix[18:22, 18:22].flatten(), dist_mat[18:22, 18:22].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[18:22, 18:22].flatten(), dist_mat[18:22, 18:22].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Non Int --> Target Non Int: Set 2")
    print(spearmanr(1 - evaluation_matrix[18:22, 22:26].flatten(), dist_mat[18:22, 22:26].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[18:22, 22:26].flatten(), dist_mat[18:22, 22:26].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Non Int --> Target Non Int: Set 3")
    print(spearmanr(1 - evaluation_matrix[22:26, 18:22].flatten(), dist_mat[22:26, 18:22].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[22:26, 18:22].flatten(), dist_mat[22:26, 18:22].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))

    print ("Source Non Int --> Target Non Int: Set 4")
    print(spearmanr(1 - evaluation_matrix[22:26, 22:26].flatten(), dist_mat[22:26, 22:26].flatten()))
#     print(spearmanr(1 - success_matrix.flatten(), dist_mat.flatten()))
    print(pearsonr(1 - evaluation_matrix[22:26, 22:26].flatten(), dist_mat[22:26, 22:26].flatten()))
#     print(pearsonr(1 - success_matrix.flatten(), dist_mat.flatten()))


--------
Value
--------
Full
SpearmanrResult(correlation=0.24133456157066507, pvalue=2.0534804341341578e-10)
(0.2091733745502527, 4.035781756265126e-08)
18 x 18
SpearmanrResult(correlation=0.19250874964465997, pvalue=0.00049321912417934)
(0.2049267184290817, 0.0002040774839079054)
Source Int --> Target Non Int
SpearmanrResult(correlation=0.23364698954063406, pvalue=0.004825655122245124)
(0.18838038542750812, 0.02375017020909589)
Source Non Int --> Target Int
SpearmanrResult(correlation=0.38165076424605876, pvalue=2.3607695295351167e-06)
(0.21112397582249898, 0.011081877337951889)
Source Non Int --> Target Non Int
SpearmanrResult(correlation=0.4715203248870842, pvalue=8.393102359837231e-05)
(0.4197766767522568, 0.0005543679536356501)
Source Non Int --> Target Non Int: Set 1
SpearmanrResult(correlation=0.5321523814578613, pvalue=0.033848339672767264)
(0.6170144734264986, 0.010892242660149234)
Source Non Int --> Target Non Int: Set 2
SpearmanrResult(correlation=0.22737714847156218, pvalue

In [180]:

correlations = []
f1_scores = {e: [] for e in range(1, 11)}
relevant_k = []
for i, dist_mat_i in enumerate(dists):
    correlations.append([])
    for j, dist_mat_j in enumerate(dists):
        print (distance_types[i], distance_types[j])
        print (spearmanr(dist_mat_i.flatten(), dist_mat_j.flatten()),
               pearsonr(dist_mat_i.flatten(), dist_mat_j.flatten()))
        correlations[-1].append(spearmanr(dist_mat_i.flatten(), dist_mat_j.flatten())[0])

#     for e in range(1, 11):
#         f1_scores[e].append([])

#     for task in range(len(env_settings))[:-2]:
#         print (i, task)

#         generalizes = np.where(success_matrix[task])[0]
#         if i == 0:
#             relevant_k.append(len(generalizes))
#         ranks = np.argsort(dist_mat_i[task])#rankdata(dist_mat_i[task], method='ordinal')
#         print (generalizes, ranks)
#         for k in range(1, 11):
#             pre = precision(generalizes, ranks, k=k)
#             rec = recall(generalizes, ranks, k=k)
#             try:
#                 f1 = 2 * pre * rec/ (pre + rec)
#             except ZeroDivisionError:
#                 f1 = 0.
#             f1_scores[k][-1].append(f1)

# for k in range(1, 11):
#     print (np.array(f1_scores[k]))
# print (relevant_k)




# plt.figure(figsize=(10, 8))
# sns.heatmap(np.array(correlations),
#             xticklabels=distance_types,
#             yticklabels=distance_types,
#             linewidths=3, linecolor='black',
#             annot=True, square=True, cbar=False)
# plt.ylabel('Distance Type')
# plt.xlabel('Distance Type')
# plt.title(f'Spearman correlation between distances')
# plt.tight_layout()
# plt.savefig(f'fat/exp1/dist_correlation_matrix.png')
# plt.close()

Value Value
SpearmanrResult(correlation=1.0, pvalue=0.0) (0.9999999999999998, 0.0)
Value Edit
SpearmanrResult(correlation=0.2640705670266258, pvalue=3.0079368358234766e-12) (-0.14082389980904877, 0.00023975331523942296)
Value Scored Edit
SpearmanrResult(correlation=0.4103374181227754, pvalue=7.699352168475916e-29) (0.022854115513796482, 0.5530574906846264)
Value Sinkhorn
SpearmanrResult(correlation=0.3679579899101786, pvalue=4.222254871014484e-23) (0.0719735245176582, 0.061445710511177)
Value Wasserstein
SpearmanrResult(correlation=0.5702208689910094, pvalue=1.487742083008249e-59) (0.21031579800301062, 3.391043809658621e-08)
Edit Value
SpearmanrResult(correlation=0.26407056702662585, pvalue=3.0079368358234434e-12) (-0.14082389980904877, 0.00023975331523942296)
Edit Edit
SpearmanrResult(correlation=1.0, pvalue=0.0) (1.0, 0.0)
Edit Scored Edit
SpearmanrResult(correlation=0.3314814809216976, pvalue=8.435316283557734e-19) (0.06912828127465441, 0.07246937557208814)
Edit Sinkhorn
SpearmanrRe