In [1]:
import numpy as np
from scipy.special import expit as sigmoid
Phi = sigmoid(np.load('train.90.npy'))

In [2]:
N, D = Phi.shape

In [3]:
import pickle
with open('cache/df_train.90.pickle', mode='rb') as h:
    df = pickle.load(h)

In [4]:
df.head()

Unnamed: 0,img,label,size_ratio,pseudo_label,edges
0,2009_001061.jpg,10,0.995339,"(22, 23)","(9, 14)"
1,2008_006600.jpg,8,0.007645,"(22, 23)","(9, 12)"
2,2011_002107.jpg,10,0.947467,"(22, 23)","(9, 14)"
3,2008_005436.jpg,16,0.606795,"(24, 26)","(17, 21)"
4,2009_002712.jpg,6,0.468864,"(22, 23)","(9, 10)"


In [5]:
np.count_nonzero(np.argmax(Phi[:, :20], axis=1) == df['label'])

1649

In [6]:
E = 24
theta = np.ones(D + E, dtype=float)

In [7]:
with open('cache/hex.pickle', mode='rb') as h:
    hex_data = pickle.load(h)
state_space = hex_data['state_space']  # full state space
state_space = np.array(filter(lambda x: x[:20].any(), state_space))  # let S=20 for debug
S = len(state_space)

In [8]:
H_e = hex_data['H_e']
state_edges = hex_data['state_edges']

In [22]:
# N * S * D
# N: number of images
# S: state space size
# D: dimension of phi
X = np.tile(Phi[:, None, :], (1, S, 1))
Y = np.tile(state_space, (N, 1, 1))
unary = X * Y + (1 - X) * np.logical_not(Y)

In [23]:
# N * S * E
def pairwise_step(phi):
    pw = np.zeros((S, E), dtype=float)
    for i in range(0, S):
        for j in state_edges[i]:
            pw[i, j] = phi[H_e[j]].prod()  # * t[j]
    return pw
pairwise = np.array(map(pairwise_step, Phi), dtype=float)

In [43]:
W = np.tile(theta[None, None, :D], (N, S, 1))
T = np.tile(theta[None, None, D:], (N, S, 1))
log_P_tilde = (W * unary).sum(axis=2) + (T * pairwise).sum(axis=2)
Z = np.exp(log_P_tilde).sum(axis=1)
# P_norm = P_tilde / P_tilde.sum(axis=1)

In [47]:
log_P_norm = log_P_tilde - np.tile(np.log(Z)[:, None], (1, S))

In [49]:
log_P_norm[0]

array([-4.99927762, -2.03498703, -6.01084823, -5.98001463, -4.97665503,
       -2.00590618, -1.99631843, -6.01802779, -3.99078483, -4.00389729,
       -6.01704073, -5.99997567, -2.03077958, -5.99449136, -2.04092165,
       -6.0177941 , -6.01195582, -5.99519743, -2.02183859, -2.01873598])

In [42]:
P_tilde.sum(axis=1)

array([ 470.3519977 ,  473.28946369,  471.61457744, ...,  475.3842826 ,
        473.13354113,  472.99636686])

In [41]:
P_tilde.sum(axis=1)

array([ 470.3519977 ,  473.28946369,  471.61457744, ...,  475.3842826 ,
        473.13354113,  472.99636686])

In [32]:
# nabla_w
C = 1.0  # must be float. 1 for debug
from_data = unary[np.arange(N), df['label'], :]
from_Z = (np.tile(P_norm[:, :, None], (1, 1, D)) * unary).sum(axis=1)
nabla_w = (from_data - from_Z).sum(axis=0) * (-C/N)

In [33]:
nabla_w

array([-0.97153319, -0.9757649 , -0.8979649 , -0.97476427, -0.97277348,
       -0.91973563, -0.95542345, -0.96922843, -0.9301918 , -0.94603137,
       -0.91776871, -0.88804788, -0.96216467, -0.95573428, -0.94928916,
       -0.95633447, -0.89541982, -0.96874516, -0.91567007, -0.9511269 ,
       -0.69576526, -0.76992989, -0.47067425, -0.59412211, -0.45409491,
       -0.7959779 , -0.72634569])

In [14]:
# nabla_t
def nabla_t_step(e):
    state_mask = np.zeros((N, S, E), dtype=bool)
    state_mask[:, filter(lambda i: e in state_edges[i], range(0, S)), :] = 1
    from_data = (pairwise * state_mask)[np.arange(N), df['label'], :].sum()
    from_Z = (np.tile(P_norm[:, :, None], (1, 1, E)) * pairwise * state_mask).sum()
    return from_data - from_Z

In [34]:
nabla_t = np.array(map(nabla_t_step, range(0, E)), dtype=float) * (-C/N)

In [35]:
nabla_t

array([ -3.48785026e-06,  -1.87488974e-04,  -1.25813926e-05,
        -2.41897860e-03,  -0.00000000e+00,  -2.37321180e-03,
        -0.00000000e+00,  -0.00000000e+00,  -2.12864539e-05,
        -1.08380258e-01,  -0.00000000e+00,  -7.12300095e-02,
        -2.79085163e-02,  -8.65440937e-03,  -0.00000000e+00,
        -2.40948398e-04,  -1.20501818e-03,  -6.88651929e-02,
        -1.70641045e-05,  -8.83476403e-05,  -1.06504823e-03,
        -0.00000000e+00,  -0.00000000e+00,  -6.51296220e-02])

In [38]:
nabla = np.concatenate((nabla_w, nabla_t)) + theta

In [39]:
nabla

array([ 0.02846681,  0.0242351 ,  0.1020351 ,  0.02523573,  0.02722652,
        0.08026437,  0.04457655,  0.03077157,  0.0698082 ,  0.05396863,
        0.08223129,  0.11195212,  0.03783533,  0.04426572,  0.05071084,
        0.04366553,  0.10458018,  0.03125484,  0.08432993,  0.0488731 ,
        0.30423474,  0.23007011,  0.52932575,  0.40587789,  0.54590509,
        0.2040221 ,  0.27365431,  0.99999651,  0.99981251,  0.99998742,
        0.99758102,  1.        ,  0.99762679,  1.        ,  1.        ,
        0.99997871,  0.89161974,  1.        ,  0.92876999,  0.97209148,
        0.99134559,  1.        ,  0.99975905,  0.99879498,  0.93113481,
        0.99998294,  0.99991165,  0.99893495,  1.        ,  1.        ,
        0.93487038])

In [53]:
objective = log_P_norm[np.arange(N), df['label']].sum() * (-C/N) + np.dot(theta, theta) / 2

In [54]:
objective

29.373343829517967