In [29]:
import numpy as np
import pickle
from bounds import *

### compute bounds for real-world datasets

In [35]:
# load pre-computed results if there is any

with open(f'all_results.pickle', 'rb') as f:
    results = pickle.load(f)

In [23]:
# compute bounds separately for each of the 3 modality pairs for sentiment, humor, and sarcasm datasets (with 3 modalities)

for dataset in ['humor', 'sarcasm', 'mosei', 'mosi']:
    with open(f'{dataset}_cluster.pickle', 'rb') as f:
        data = pickle.load(f)
    if f'{dataset}_vision_text' not in results:
        x1 = np.array(data['test']['vision'])
        x2 = np.array(data['test']['text'])
        y = np.array(data['test']['labels'])
        _, counts = np.unique(y, return_counts=True)
        pk = counts / np.sum(counts)
        ent = entropy(pk, base=2)
        P, maps = convert_data_to_distribution(x1, x2, y)
        all_quantities, all_bounds = get_bounds(P)
        results[f'{dataset}_vision_text'] = dict()
        results[f'{dataset}_vision_text']['all_quantities'] = all_quantities
        results[f'{dataset}_vision_text']['all_bounds'] = all_bounds
        results[f'{dataset}_vision_text']['all_quantities']['entropy'] = ent
        results[f'{dataset}_vision_text']['all_quantities']['n_classes'] = len(counts)
        print(dataset, 'vision+text', all_bounds)
        with open(f'all_results.pickle', 'wb') as f:
            pickle.dump(results, f)

    if f'{dataset}_vision_audio' not in results:
        x1 = np.array(data['test']['vision'])
        x2 = np.array(data['test']['audio'])
        y = np.array(data['test']['labels'])
        _, counts = np.unique(y, return_counts=True)
        pk = counts / np.sum(counts)
        ent = entropy(pk, base=2)
        P, maps = convert_data_to_distribution(x1, x2, y)
        all_quantities, all_bounds = get_bounds(P)
        results[f'{dataset}_vision_audio'] = dict()
        results[f'{dataset}_vision_audio']['all_quantities'] = all_quantities
        results[f'{dataset}_vision_audio']['all_bounds'] = all_bounds
        print(dataset, 'vision+audio', all_bounds)
        results[f'{dataset}_vision_audio']['all_quantities']['entropy'] = ent
        results[f'{dataset}_vision_audio']['all_quantities']['n_classes'] = len(counts)
        with open(f'all_results.pickle', 'wb') as f:
            pickle.dump(results, f)

    if f'{dataset}_audio_text' not in results:
        x1 = np.array(data['test']['audio'])
        x2 = np.array(data['test']['text'])
        y = np.array(data['test']['labels'])
        _, counts = np.unique(y, return_counts=True)
        pk = counts / np.sum(counts)
        ent = entropy(pk, base=2)
        P, maps = convert_data_to_distribution(x1, x2, y)
        all_quantities, all_bounds = get_bounds(P)
        results[f'{dataset}_audio_text'] = dict()
        results[f'{dataset}_audio_text']['all_quantities'] = all_quantities
        results[f'{dataset}_audio_text']['all_bounds'] = all_bounds
        print(dataset, 'audio+text', all_bounds)
        results[f'{dataset}_audio_text']['all_quantities']['entropy'] = ent
        results[f'{dataset}_audio_text']['all_quantities']['n_classes'] = len(counts)
        with open(f'all_results.pickle', 'wb') as f:
            pickle.dump(results, f)

In [20]:
# avmnist

with open(f'avmnist_cluster.pickle', 'rb') as f:
    data = pickle.load(f)
if 'avmnist' not in results:
    x1 = np.array(data['test']['image'])
    x2 = np.array(data['test']['audio'])
    y = np.array(data['test']['labels'])
    _, counts = np.unique(y, return_counts=True)
    pk = counts / np.sum(counts)
    ent = entropy(pk, base=2)
    P, maps = convert_data_to_distribution(x1, x2, y)
    all_quantities, all_bounds = get_bounds(P)
    results['avmnist'] = dict()
    results['avmnist']['all_quantities'] = all_quantities
    results['avmnist']['all_bounds'] = all_bounds
    results['avmnist']['all_quantities']['entropy'] = ent
    results['avmnist']['all_quantities']['n_classes'] = len(counts)
    print('avmnist', all_bounds)
    with open(f'all_results.pickle', 'wb') as f:
        pickle.dump(results, f)

In [21]:
# mimic

with open(f'mimic_cluster.pickle', 'rb') as f:
    data = pickle.load(f)
if 'mimic' not in results:
    x1 = np.array(data['test']['modal1'])
    x2 = np.array(data['test']['modal2'])
    y = np.array(data['test']['labels'])
    _, counts = np.unique(y, return_counts=True)
    pk = counts / np.sum(counts)
    ent = entropy(pk, base=2)
    P, maps = convert_data_to_distribution(x1, x2, y)
    all_quantities, all_bounds = get_bounds(P)
    results['mimic'] = dict()
    results['mimic']['all_quantities'] = all_quantities
    results['mimic']['all_quantities']['entropy'] = ent
    results['mimic']['all_quantities']['n_classes'] = len(counts)
    results['mimic']['all_bounds'] = all_bounds
    print('mimic', all_bounds)
    with open(f'all_results.pickle', 'wb') as f:
        pickle.dump(results, f)

In [36]:
# enrico

with open(f'enrico_cluster.pickle', 'rb') as f:
    data = pickle.load(f)
if 'enrico' not in results:
    x1 = np.array(data['test']['image'])
    x2 = np.array(data['test']['wireframe'])
    y = np.array(data['test']['labels'])
    _, counts = np.unique(y, return_counts=True)
    pk = counts / np.sum(counts)
    ent = entropy(pk, base=2)
    P, maps = convert_data_to_distribution(x1, x2, y)
    all_quantities, all_bounds = get_bounds(P)
    results['enrico'] = dict()
    results['enrico']['all_quantities'] = all_quantities
    results['enrico']['all_bounds'] = all_bounds
    results['enrico']['all_quantities']['entropy'] = ent
    results['enrico']['all_quantities']['n_classes'] = len(counts)
    print('enrico', all_bounds)
    with open(f'all_results.pickle', 'wb') as f:
        pickle.dump(results, f)

In [37]:
with open('all_results.pickle', 'rb') as f:
    data = pickle.load(f)
for dataset in data:
    print(dataset)
    for q in ['R', 'U2', 'U1', 'S']:
        print(data[dataset]['all_quantities'][q])
    print(data[dataset]['all_bounds'])
    print()

sarcasm_vision_text
0.16099189806501513
0.012885638908304407
0.018920194901199654
0.49403082275072646
{'lower_R': 0.035792193369361236, 'lower_U1': 0.029758818482420252, 'lower_U2': 0.018572587075325386, 'lower_diff': 0.0727645267110582, 'upper': 0.7919976015823501}

sarcasm_vision_audio
0.16227887205457028
0.011598910990317037
0.02860549232864884
0.31061942091086114
{'lower_R': 0.005208499062894667, 'lower_U1': -0.011797147241049966, 'lower_U2': 0.038803897539506554, 'lower_diff': 0.05787696275263708, 'upper': 0.7823120580833305}

sarcasm_audio_text
0.1771434108991574
0.01374067109713172
0.0027702216436247583
0.5083906674354036
{'lower_R': 0.05735795841094382, 'lower_U1': 0.06833116294873684, 'lower_U2': 0.04814128370518645, 'lower_diff': 0.11104172930663579, 'upper': 0.7911410298169537}

humor_vision_text
0.012326482789573602
0.029332138602113696
6.345698359307539e-16
0.20928762498147135
{'lower_R': -0.008161148287795461, 'lower_U1': 0.021170990311625535, 'lower_U2': -0.0241817811425