# 1.1 Data

In [111]:
# Imports
import numpy as np
from glob import glob
from os import path
from scipy.special import comb
from scipy.optimize import minimize
from scipy.stats import binom

data_dir = "data"
file_paths = glob(path.join(data_dir, "*.txt"))

# N x M x K
# 5 FILES x 7 ROWS  x 5 COLUMNS
# ROW 1: Audio 
# ROW 2: Visual
# ROW 3-7: Visual going from 'b' (row 3) to 'd' (row 7)
# Columns: Audio from 'b' (col 1) to 'd' (col 5)
data = np.array([np.loadtxt(fname) for fname in file_paths], dtype=np.int64)
N, M, K = data.shape

AUDIO_DATA       = data[:, 0, :]
VISUAL_DATA      = data[:, 1, :]
AUDIOVISUAL_DATA = data[:, 2:, :]

In [112]:
def baseline_softmax(theta):
    e = np.exp(theta)
    return e / (e +1 )

In [113]:
def binomial_pmf(k, n, p):
    return comb(n, k) * np.power(p, k) * np.power(1 - p, n - k)

In [117]:
args = (data[0], 24, np.vstack([p_a.T, p_v.T, p_av]))
np.isclose(binom.logpmf(*args), np.log(binomial_pmf(*args)))

array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [153]:
def objective_function(theta, subject_data):
    theta_a = theta[0:K]
    theta_v = theta[K: ]

    p_a = np.array([baseline_softmax(theta) for theta in theta_a]).reshape(-1,1)
    p_v = np.array([baseline_softmax(theta) for theta in theta_v]).reshape(-1,1)
    
    # Outer product for all combinations
    p_av = (p_a @ p_v.T) / (p_a @ p_v.T + (1 - p_a) @ (1 - p_v).T)
    # likelihoods = binom.logpmf(subject_data, 24, np.vstack([p_a.T, p_v.T, p_av]))
    likelihoods = np.log(binomial_pmf(subject_data, 24, np.vstack([p_a.T, p_v.T, p_av])))

    return -(likelihoods.sum())

theta_set = []
for i in range(5):
    theta = np.random.randn(K*2)
    opt_result = minimize(objective_function, theta, args=(data[i]))
    objective, theta_a, theta_v, success = (
        opt_result.fun, 
        (opt_result.x[0:K]), 
        (opt_result.x[K:]), 
        opt_result.success
    )

    theta_set.append(np.concatenate([theta_a,theta_v]))
    print("Subject ", i)
    print("Converged:",success, "NLL:",objective)
    print("theta_a", theta_a)
    print("theta_v", theta_v)
    p_a = np.array([baseline_softmax(theta) for theta in theta_a]).reshape(-1,1)
    p_v = np.array([baseline_softmax(theta) for theta in theta_v]).reshape(-1,1)
    p_av = (p_a @ p_v.T) / (p_a @ p_v.T + (1 - p_a) @ (1 - p_v).T)
    print("===")
    print("p_a", p_a.flatten())
    print("p_v", p_v.flatten())
    print("p_av\n", np.round(p_av, 3))
    print("===")
    print("data")
    print(np.round(binom.pmf(data[i][2:], 24, p_av), 3))
    print("\n\n")

Subject  0
Converged: True NLL: 70.25765786050867
theta_a [-4.78771456 -1.95605347 -0.25823521  1.43526758  1.67202756]
theta_v [-1.11834348  0.51165125  1.54670807  4.28161025  5.03519658]
===
p_a [0.00826264 0.12389479 0.43579758 0.80772074 0.84184596]
p_v [0.24631868 0.62519349 0.82443777 0.98636801 0.99353712]
p_av
 [[0.003 0.014 0.038 0.376 0.562]
 [0.044 0.191 0.399 0.911 0.956]
 [0.202 0.563 0.784 0.982 0.992]
 [0.579 0.875 0.952 0.997 0.998]
 [0.635 0.899 0.962 0.997 0.999]]
===
data
[[0.937 0.718 0.398 0.15  0.16 ]
 [0.016 0.044 0.097 0.103 0.198]
 [0.025 0.138 0.196 0.653 0.818]
 [0.163 0.017 0.216 0.924 0.964]
 [0.143 0.059 0.375 0.94  0.971]]



Subject  1
Converged: True NLL: 41.79765647023454
theta_a [-6.5547331  -1.91458432  0.34489198  3.09946669  3.8780471 ]
theta_v [-1.63230406  1.07303258  2.55000152  4.14860534  6.12230231]
===
p_a [0.00142134 0.1284667  0.58537835 0.95687074 0.97972825]
p_v [0.16351497 0.7451732  0.92757362 0.98445892 0.9978114 ]
p_av
 [[0.    0.00

In [161]:
print("\\\\\n".join([f"Subject {i+1} & " + " & ".join(np.char.mod('%.2f', x)) for i, x in enumerate(p_set)]))

Subject 1 & -4.79 & -1.96 & -0.26 & 1.44 & 1.67 & -1.12 & 0.51 & 1.55 & 4.28 & 5.04\\
Subject 2 & -6.55 & -1.91 & 0.34 & 3.10 & 3.88 & -1.63 & 1.07 & 2.55 & 4.15 & 6.12\\
Subject 3 & -2.96 & -1.13 & -0.11 & 1.30 & 1.61 & -1.53 & -0.37 & 0.87 & 3.58 & 5.65\\
Subject 4 & -2.66 & -1.97 & -1.27 & 0.83 & 1.69 & -3.70 & -2.17 & -1.18 & 2.63 & 4.25\\
Subject 5 & -4.43 & -2.16 & -0.98 & 0.65 & 1.05 & -3.57 & -1.19 & 1.07 & 4.38 & 7.72
