In [1]:
import csv
import numpy as np
import re
import collections
import sys

Q2_MODEL_PATH = "data/model.txt"
Q2_TRAIN_PATH = "data/train.txt"
train_data = {}

In [2]:
def load_Q2_model():
    with open(Q2_MODEL_PATH, 'r') as f:
        lines = f.readlines()
    Wj = lines[:26*128]
    Tij = lines[26*128:]
    Wj = np.array(Wj, dtype=np.float32).reshape((26, 128))
    Tij = np.array(Tij, dtype=np.float32).reshape((26, 26), order='F')
    return Wj, Tij
    
def load_Q2_data():
    with open(Q2_TRAIN_PATH, 'r') as f:
        lines = f.readlines()
    for l in lines:
        letter = re.findall(r'[a-z]', l)
        letter = letter[0]
        l = re.findall(r'\d+', l)
        l = list(map(int, l))
        letter_id = l[0]
        next_id = l[1]
        word_id = l[2]
        pos = l[3]
        p_ij = np.array(l[4:])
        global train_data
        # store letter in dictionary as letter_id -> letter, next_id, word_id, position, pixel_values
        train_data.update({letter_id: [letter, next_id, word_id, pos, p_ij]})
    train_data = collections.OrderedDict(train_data)
    return train_data

In [3]:
# for reference
# def calc_partition_fn(X_train, Wj, Tij):
#     weights = np.zeros([X_train.shape[0], Wj.shape[0]])
#     alpha = np.zeros([X_train.shape[0], Wj.shape[0]]) # init alpha to zeros
#     alpha[0,] = 1 # init first Xj to 1
#     print(alpha[0,])
#     # max_i = np.amax(np.exp(np.dot(X_train, Wj.T)), axis=0) # get M = max(<Ws, Xj>)
#     # calculating max
#     values = np.zeros([X_train.shape[0], Wj.shape[0]])
#     for j in range(Wj.shape[0]):
#         values[0, j] = np.exp(np.dot(X_train[0], Wj[j]))
#     for j in range(1, X_train.shape[0]):
#         for s in range(1, Wj.shape[0] - 1):
#             values[j, s] = np.exp(np.dot(X_train[j], Wj[s]))
#     max_i = np.amax(values, axis=0)
#     print("max_i: " + str(max_i) + " shape: " + str(max_i.shape))

#     for j in range(1, X_train.shape[0]):
#         for s in range(1, Wj.shape[0] - 1):
#             # phi = max_i[s] + np.dot(X_train[j], Wj[s]) - max_i[s]
#             phi = np.exp(np.dot(X_train[j], Wj[s]) - max_i[s])
#             alpha[j, s] = alpha[j-1, s] * phi
            
#             # print("alpha[" + str(j) + ', ' + str(s) + ']: ' + str(alpha[j, s]))
#             # print("phi [" + str(j) + ", " + str(s) + "]: " + str(phi))
#     alpha[j,] += max_i
#     for j in range(1, X_train.shape[0]):
#         print("a[" + str(j) + "]: " + str(alpha[j,]))

In [4]:
# # write intermediate forward pass
# def forward_pass(X_train, Wj, Tij, alpha, j): # j=1 to m, maybe reuse alpha calculated in partition_fn?
#     for s in range(1, Wj.shape[0]):
#         phi = np.exp(np.dot(X_train[j][1:], Wj[s]))
#         alpha[j, s] = alpha[j-1, s] * phi
#     return alpha

# # write intermediate backward pass
# def backward_pass(X_train, Wj, Tij, beta): # j=0 to m-1
#     for s in range(1, Wj.shape[0]):
#         phi = np.exp(np.dot(X_train[j+1][1:], Wj[s]))
#         beta[j, s] = beta[j+1, s] * phi
#     return beta

# write results to .txt
# result = open(r'z.txt', 'w+')
# result.write('alpha[' + str(j) + ']: ' + str([alpha[j, s] for s in range(1, Wj.shape[0])]) + '\n')
# result.close()

# write entire alpha calc
def forward_pass(X_train, Wj, Tij): # required size...?
    alpha = np.zeros([X_train.shape[0], Wj.shape[0]]) # init alpha values to zeros
    alpha[0,] = 1 # init first alpha[Xj,]  to 1
    
    for j in range(1, X_train.shape[0]):
        if X_train[j][0] != X_train[j-1][0]:
            alpha[j,] = 1 # init alpha[Xj,] to 1 if new word
        else:
            for s in range(0, Wj.shape[0]):
                phi = np.exp(np.dot(X_train[j][1:], Wj[s]))
                alpha[j, s] = alpha[j-1, s] * phi
    return alpha

# write entire beta calc
def backward_pass(X_train, Wj, Tij):
    beta = np.zeros([X_train.shape[0], Wj.shape[0]])
    beta[-1,] = 1 # init last beta[Xj,] to 1
    
    for j in range(X_train.shape[0] - 2, -1, -1):
        if X_train[j][0] != X_train[j+1][0]:
            beta[j,] = 1 # init beta[Xj,] to 1 if last letter of new word
        else:
            for s in range(0, Wj.shape[0]):
                phi = np.exp(np.dot(X_train[j][1:], Wj[s]))
                beta[j, s] = beta[j+1, s] * phi
    return beta
    
# write fn to calculate p(y|X)
def conditional_prob(X_train, Wj, Tij):
    dist = np.zeros([X_train.shape[0], Wj.shape[0]])
    dist[0,] = 1 # set init letter to 1, correct?
    alpha = forward_pass(X_train, Wj, Tij)
    beta = backward_pass(X_train, Wj, Tij)
    for j in range(1, X_train.shape[0] - 1):
        for s in range(0, Wj.shape[0] - 1):
            alpha_j_a = alpha[j, s]
            beta_j1_b = beta[j+1, s+1]
            phi = np.exp(np.dot(X_train[j+1][1:], Wj[s]) + Tij[s, s+1])
            dist[j, s] = alpha_j_a * beta_j1_b * phi
    dist = dist / alpha
    dist = np.sum(dist, axis=0)
    dist = np.log(dist)
    result = open(r'dist.txt', 'w+')
    for j in range(0, X_train.shape[0]):
        result.write('dist[' + str(j) + ']: ' + str([alpha[j, s] for s in range(1, Wj.shape[0])]) + '\n')
    result.close()

# calculate log p(y|X) without exploding weights
# calculate grad(log p(y|X)) wrt Wj
# calculate grad(log p(y|X)) wrt Tij

In [6]:
Wj, Tij = load_Q2_model()
train_data = load_Q2_data()
t_list = list(train_data.items())

y_train = np.empty([len(t_list), 26], dtype=np.int8) # [n, 26], change it to n
X_train = np.empty([len(t_list), 129], dtype=np.int16) # [n, 128], change it to n
C_train = np.empty([26], dtype=np.float64) # [n, 26], change it to n

for index, i in enumerate(t_list):
    y_train[index] = ord(i[1][0][0]) - 97
    X_train[index][0] = i[1][2]
    X_train[index][1:] = i[1][4]

print("y_train shape: " + str(y_train.shape))
print("X_train.shape: " + str(X_train.shape))
print("Wj.shape: " + str(Wj.shape))
print("Tij.shape: " + str(Tij.shape))

conditional_prob(X_train, Wj, Tij)
# print(forward_pass(X_train, Wj, Tij))

# partition_fn(X_train, Wj, Tij)
# calc_partition_fn(X_train, Wj, Tij)

y_train shape: (25953, 26)
X_train.shape: (25953, 129)
Wj.shape: (26, 128)
Tij.shape: (26, 26)
