# 1.1 Data

In [1]:
# Imports
import numpy as np
from glob import glob
from os import path
from scipy.special import softmax
from scipy.optimize import minimize, LinearConstraint, Bounds
from scipy.stats import binom

data_dir = "data"
file_paths = glob(path.join(data_dir, "*.txt"))

# N x M x K
# 5 FILES x 7 ROWS  x 5 COLUMNS
# ROW 1: Audio 
# ROW 2: Visual
# ROW 3-7: Visual going from 'b' (row 3) to 'd' (row 7)
# Columns: Audio from 'b' (col 1) to 'd' (col 5)
data = np.array([np.loadtxt(fname) for fname in file_paths])
N, M, K = data.shape

AUDIO_DATA       = data[:, 0, :]
VISUAL_DATA      = data[:, 1, :]
AUDIOVISUAL_DATA = data[:, 2:, :]

In [18]:
def objective_function(theta, data):
    likelihood = 0
    theta_a = theta[0:K]
    theta_v = theta[K: ]

    p_a = softmax(theta_a).reshape(-1,1)
    p_v = softmax(theta_v).reshape(-1,1)
    
    # Outer product for all combinations
    p_av = (p_a @ p_v.T) / (p_a @ p_v.T + (1 - p_a) @ (1 - p_v).T)

    likelihood += binom.pmf(data[0 ], 24, p_a ).sum()
    likelihood += binom.pmf(data[1 ], 24, p_v ).sum()
    likelihood += binom.pmf(data[2:], 24, p_av).sum()

    return -np.log(likelihood)

theta = np.concatenate([AUDIO_DATA[0], VISUAL_DATA[0]])
opt_result = minimize(objective_function, theta, args=(data[0]))
opt_result.fun, np.round(softmax(opt_result.x[0:K]), 3), np.round(softmax(opt_result.x[K:]), 3), opt_result.success

(-1.0986122899482371,
 array([0. , 0. , 0. , 0.5, 0.5]),
 array([0.   , 0.   , 0.333, 0.333, 0.333]),
 True)