In [216]:
import numpy as np
import pandas as pd
import csv
import itertools
from scipy.special import logsumexp
import scipy.optimize as sciop
import time

In [2]:
PATH = './'

In [3]:
transition_gradient = pd.read_csv(PATH + 'model/transition-gradient.txt', delimiter=' ', header=None)
transition_params = pd.read_csv(PATH + 'model/transition-params.txt', delimiter=' ', header=None)
feature_gradient = pd.read_csv(PATH + 'model/feature-gradient.txt', delimiter=' ', header=None)
feature_params = pd.read_csv(PATH + 'model/feature-params.txt', delimiter=' ', header=None)

In [4]:
train_labels = open(PATH + 'data/train_words.txt').read().splitlines()
test_labels = open(PATH + 'data/test_words.txt').read().splitlines()

In [5]:
AllChars = "etainoshrd"
CharMapping = \
{'e': 0,
 't': 1,
 'a': 2,
 'i': 3,
 'n': 4,
 'o': 5,
 's': 6,
 'h': 7,
 'r': 8,
 'd': 9
}

In [6]:
pd.set_option('display.float_format', '{:.4e}'.format)
# pd.reset_option('display.float_format')

In [9]:
def computePotentials(wordFeats, featureParams):
    phis = []
    for letter_id in range(len(wordFeats)):
        phis.append(np.sum(np.multiply(featureParams, wordFeats[letter_id, :]), axis=1))
    return np.array(phis).T

In [11]:
def computeEnergy(wordFeats, wordLabel, Potentials, transitionParams):
    wordLabel_CharIdx = [CharMapping[x] for x in wordLabel]
    sum_phis = np.sum(Potentials[wordLabel_CharIdx, range(0, len(wordFeats))])
    
    arr = [(x, y) for (x, y) in zip(wordLabel_CharIdx[:-1], wordLabel_CharIdx[1:])]
    transition_param_sum = 0
    for elem in arr:
        transition_param_sum += transitionParams.iloc[elem]
    return transition_param_sum + sum_phis
    

In [84]:
####### Nodes are 1 indexed
def messageDict(word, featParams, transParams, wordPhis=None):
    """ Returns Messages from one node to another Dictionaries in log-space """
    ########### Compute Word Potentials #########
    if wordPhis is None:
        wordPhis = computePotentials(word, featParams)
        
    logM = dict()
    logM[len(word) + 1, len(word)] = np.zeros(len(AllChars))
    logM[0, 1] = np.zeros(len(AllChars))
    
    #### Backward Messages ####
    for fromNode in range(len(word), 1, -1):
        logM[fromNode, fromNode - 1] = logsumexp(wordPhis[:, fromNode-1] + \
                              logM[fromNode + 1, fromNode] + \
                              transParams.values, axis = 1)    
        
    #### Forward Messages ####
    for fromNode in range(1, len(word)):
        logM[fromNode, fromNode + 1] = logsumexp(wordPhis[:, fromNode-1] + \
                              logM[fromNode - 1, fromNode] + \
                                transParams.values, axis=1)    
    return logM

def singleVariableMarginal(node, word, featParams, transParams, wordPhis=None, logPartition=None):
    if wordPhis is None:
        wordPhis = computePotentials(word, featParams)
    
    _messageDict = messageDict(word, featParams, transParams, wordPhis)
    if (node > 0):
        m_left = _messageDict[node-1, node]
    if (node < len(word)+1):
        m_right = _messageDict[node+1, node]
    
    total_incoming_message = m_left + m_right
    
    ############
    if (logPartition is None):
        logPartition = logsumexp(wordPhis[:, node-1] + total_incoming_message)
        
    logMarginalProbs = wordPhis[:, node-1] + total_incoming_message - logPartition
    return np.exp(logMarginalProbs)

def pairwiseMarginal(nodeL, nodeR, word, featParams, transParams, logPartition=None):
    """ PairWise Marginals """
    wordPhis = computePotentials(word, featParams)
    _messageDict = messageDict(word, featParams, transParams, wordPhis)
    
    if (logPartition is None):
        logPartition = logsumexp(wordPhis[:, 0] + _messageDict[2, 1])
        
    chars_to_consider = AllChars
    pairwise_marginals = np.zeros( (len(chars_to_consider), len(chars_to_consider)) )
    
    mLeft = 0
    mRight = 0
    if (nodeL >= 1):
        mLeft = _messageDict[nodeL - 1, nodeL]
    if (nodeR <= len(word)):
        mRight = _messageDict[nodeR + 1, nodeR]
        
    logSingleMarginalsL = wordPhis[:, nodeL-1] + mLeft
    logSingleMarginalsL = np.stack( [logSingleMarginalsL]*len(AllChars) ).T
    logSingleMarginalsR = wordPhis[:, nodeR-1] + mRight
    logSingleMarginalsR = np.stack( [logSingleMarginalsR]*len(AllChars) )
    
    SingleMarginals = logSingleMarginalsL + logSingleMarginalsR
    jointMarginal = SingleMarginals + transParams - logPartition

    return np.exp(jointMarginal)


In [26]:
preds = []
for testWordIdx in range(1, 201):
    testWord = pd.read_csv(PATH + 'data/test_img{}.txt'.format(testWordIdx), header=None, delimiter=" ")
    testWord = testWord.values
    
    testWordPhis = computePotentials(testWord, feature_params)
    singleVarMarginals = []
    for node in range(1, len(testWord)+1):
        singleVarMarginals.append(singleVariableMarginal(node, testWord, feature_params, transition_params, \
                                                         testWordPhis))
    singleVarMarginals = np.array(singleVarMarginals)
#     print([AllChars[x] for x in np.argmax(singleVarMarginals, axis=1)])
    preds.append(''.join([AllChars[x] for x in np.argmax(singleVarMarginals, axis=1)]))

print("Predictions:")
print(preds)

Predictions:
['trat', 'hire', 'riser', 'edison', 'shore', 'tenth', 'hot', 'tests', 'trains', 'order', 'taare', 'rose', 'roton', 'ihtention', 'shots', 'starts', 'andee', 'rhinr', 'retained', 'the', 'hehderson', 'rise', 'either', 'read', 'insisted', 'theatoe', 'tao', 'shot', 'eitstein', 'tne', 'roe', 'tension', 'restored', 'donated', 'don', 'entries', 'attesied', 'iron', 'session', 'heirs', 'need', 'hair', 'rather', 'tree', 'initiate', 'stressed', 'dish', 'dore', 'oath', 'estate', 'adoitton', 'norrneastern', 'arid', 'sins', 'area', 'and', 'nad', 'ann', 'nose', 'shnre', 'threateneo', 'sead', 'edtth', 'antonio', 'thiro', 'tender', 'rosd', 'sent', 'trehd', 'traded', 'rated', 'iit', 'resadent', 'resisthni', 'hero', 'anne', 'adoeroids', 'ten', 'raoio', 'strerts', 'serena', 'statd', 'thtoat', 'ordeas', 'noted', 'ardse', 'reridted', 'deied', 'serds', 'oas', 'sri', 'thnt', 'ratio', 'threat', 'noise', 'tet', 'sin', 'riots', 'sanitation', 'are', 'net', 'neao', 'oset', 'haa', 'assertton', 'theories

In [27]:
#### Write predictions to a file ############
predictionsOutfile23 = "predictionsOutfile23.txt"
with open(predictionsOutfile23, 'w') as f:
    for pred in preds:
        predString = pred
        f.write(predString)
        f.write("\n")

In [28]:
#### Character Level accuracy ########
tp = 0
total = 0
for pred_id, pred in enumerate(preds):
    testWordLabel = np.array([x for x in test_labels[pred_id]])
    pred = np.array([x for x in pred])
    tp += len(np.where(testWordLabel == pred)[0])
    total += len(testWordLabel)

print("Character Level Test-Accuracy:", tp/total*100, "%")

Character Level Test-Accuracy: 89.91674375578168 %


# Question 1:


In [213]:
####### Compute Log Likelihood ######
def loglikelihood(W, dataSize):
    featParams, transParams = W[:len(AllChars)*featSize], W[len(AllChars)*featSize:]
    featParams = pd.DataFrame(np.reshape(featParams, (len(AllChars), featSize)))
    transParams = pd.DataFrame(np.reshape(transParams, (len(AllChars), len(AllChars))))
    loglikelihood = 0
    for trainWordIdx in range(0, dataSize):
        trainWord = pd.read_csv(PATH + 'data/train_img{}.txt'.format(trainWordIdx+1), header=None, delimiter=" ")
        trainWord = trainWord.values
        trainWord_label = train_labels[trainWordIdx]
        trainWord_labelIdxs = [CharMapping[x] for x in trainWord_label]

        trainWordPhis = computePotentials(trainWord, featParams)
        trainWordlogM = messageDict(trainWord, featParams, transParams)
        marginal = trainWordPhis[:, 0] + trainWordlogM[(2, 1)]
        logZ = logsumexp(marginal)
    #     print(logZ)

        trainWordEnergy = computeEnergy(trainWord, trainWord_label, trainWordPhis, transParams)
    #     print(trainWordEnergy)
        loglikelihood += trainWordEnergy - logZ
    return loglikelihood/dataSize

    print("Log likelihood for first {} train data points:".format(dataSize), loglikelihood/dataSize )

In [167]:
avglll = loglikelihood((feature_params, transition_params), 50)

Log likelihood for first 50 train data points: -4.583959036355723


In [193]:
def _computeGradWF(word, wordLabel, singleMarginals):
    wordLabelList = np.array([x for x in wordLabel])
    mask = [np.where(wordLabelList == c)[0] for c in CharMapping] # 10 x L(word)
    selectedX = [np.sum(word[m, :], axis=0) for m in mask] # 10 x _ x 321

    secondTerm = []
    for c_id, c in enumerate(AllChars):
        probbyc = np.expand_dims(singleMarginals[:, c_id], axis=1)
        secondTerm.append(np.sum(word*probbyc, axis=0))
    secondTerm = np.array(secondTerm)
    grad = selectedX - secondTerm
    return grad

def computeGradF(W, dataSize):
    wF, wT = W[:len(AllChars)*featSize], W[len(AllChars)*featSize:]
    wF = pd.DataFrame(np.reshape(wF, (len(AllChars), featSize)))
    wT = pd.DataFrame(np.reshape(wT, (len(AllChars), len(AllChars))))
    grad = 0
    for trainWordIdx in range(dataSize):
        trainWord = pd.read_csv(PATH + 'data/train_img{}.txt'.format(trainWordIdx+1), header=None, delimiter=" ")
        trainWord = trainWord.values
        trainWord_label = train_labels[trainWordIdx]
        trainWord_labelIdxs = [CharMapping[x] for x in trainWord_label]
        trainWordPhis = computePotentials(trainWord, wF)

        singleVarMargs = []
        for node in range(1, len(trainWord)+1):
            sVM = singleVariableMarginal(node, trainWord, wF, wT, trainWordPhis)
            singleVarMargs.append(sVM)
        grad += _computeGradWF(trainWord, trainWord_label, np.array(singleVarMargs))
    return grad/dataSize

def _computeGradWT(word, wordLabel, _pairwiseMarginals):
    gradwt = np.zeros((len(AllChars), len(AllChars)))
    for c in CharMapping:
        for cprime in CharMapping:
            firstTerm = 0
            secondTerm = 0
            for letter_id in range(0, len(word)-1):
                if ((wordLabel[letter_id] == c) and (wordLabel[letter_id + 1] == cprime)):
                    firstTerm += 1
                secondTerm += _pairwiseMarginals[letter_id][CharMapping[c]][CharMapping[cprime]]
            gradwt[CharMapping[c], CharMapping[cprime]] = (firstTerm - secondTerm)
    return gradwt

def computeGradT(W, dataSize):
    wF, wT = W[:len(AllChars)*featSize], W[len(AllChars)*featSize:]
    wF = pd.DataFrame(np.reshape(wF, (len(AllChars), featSize)))
    wT = pd.DataFrame(np.reshape(wT, (len(AllChars), len(AllChars))))
    gradWT = 0
    for trainWordIdx in range(dataSize):
        trainWord = pd.read_csv(PATH + 'data/train_img{}.txt'.format(trainWordIdx+1), header=None, delimiter=" ")
        trainWord = trainWord.values
        trainWord_label = train_labels[trainWordIdx]
        trainWord_labelIdxs = [CharMapping[x] for x in trainWord_label]
        trainWordPhis = computePotentials(trainWord, wF)

        pairwiseMargs = []
        for node in range(1, len(trainWord)):
            pM = pairwiseMarginal(node, node+1, trainWord, wF, wT)
            pairwiseMargs.append(pM.values)
        gradWT += _computeGradWT(trainWord, trainWord_label, np.array(pairwiseMargs))
    return gradWT/dataSize

In [205]:
def gradlll(W, dataSize):
    """ Compute the gradient of log likelihood at wF and wT """
    gradWF = computeGradF(W, dataSize)
    gradWT = computeGradT(W, dataSize)

    grads = np.concatenate( (gradWF.flatten(), gradWT.flatten()) )
    return grads


In [211]:
def nll(W, dataSize):
    return -1*loglikelihood(W, dataSize)

def gradnll(W, dataSize):
    return -1*gradlll(W, dataSize)

In [None]:
training_times = {}
accuracies = {}

In [230]:
weights = {}

In [229]:
featSize = 321
dataSize = 100
### train with given data ###
x0 = np.concatenate( (np.ones((len(AllChars)*featSize)), np.ones((len(AllChars)*len(AllChars)))) )
# x0 = (np.ones((len(AllChars), featSize)), np.ones(len(AllChars), len(AllChars)))
startTime = time.time()
res = sciop.minimize(fun=nll, x0=x0, args=dataSize, jac=gradnll, method='BFGS', options={'disp': True, 'maxiter': 40})
endTime = time.time()
training_time = endTime - startTime
print("Training Time:", training_time)
training_times[dataSize] = training_time

         Current function value: 0.000001
         Iterations: 19
         Function evaluations: 56
         Gradient evaluations: 45
Training Time: 445.5863754749298


In [231]:
res.x
weights[dataSize] = res.x

In [236]:
def getAccuracy(W):
    wF, wT = W[:len(AllChars)*featSize], W[len(AllChars)*featSize:]
    wF = pd.DataFrame(np.reshape(wF, (len(AllChars), featSize)))
    wT = pd.DataFrame(np.reshape(wT, (len(AllChars), len(AllChars))))
    
    #### Predictions on test set
    preds = []
    for testWordIdx in range(1, 201):
        testWord = pd.read_csv(PATH + 'data/test_img{}.txt'.format(testWordIdx), header=None, delimiter=" ")
        testWord = testWord.values

        testWordPhis = computePotentials(testWord, wF)
        singleVarMarginals = []
        for node in range(1, len(testWord)+1):
            singleVarMarginals.append(singleVariableMarginal(node, testWord, wF, wT, \
                                                             testWordPhis))
        singleVarMarginals = np.array(singleVarMarginals)
    #     print([AllChars[x] for x in np.argmax(singleVarMarginals, axis=1)])
        preds.append(''.join([AllChars[x] for x in np.argmax(singleVarMarginals, axis=1)]))

#     print("Predictions:")
#     print(preds)
    
    #### Character Level accuracy ########
    tp = 0
    total = 0
    for pred_id, pred in enumerate(preds):
        testWordLabel = np.array([x for x in test_labels[pred_id]])
        pred = np.array([x for x in pred])
        tp += len(np.where(testWordLabel == pred)[0])
        total += len(testWordLabel)
    
    print("Character Level Test-Accuracy:", tp/total*100, "%")
    return tp/total*100

In [237]:
acc = getAccuracy(res.x)
accuracies[dataSize] = acc

Character Level Test-Accuracy: 91.0268270120259 %


In [238]:
training_times, accuracies, len(weights)

({50: 272.1810564994812, 100: 445.5863754749298},
 {50: None, 100: 91.0268270120259},
 1)

### Baad mein

In [168]:
def _computeGradWcf(word, wordLabel, singleMarginals, c, f):
    wordLabelList = np.array([x for x in wordLabel])
    mask = np.where(wordLabelList == c)
    selectedX = word[mask, f]
    singleMar = singleMarginals[:, CharMapping[c]]
    grad = np.sum(selectedX) - np.sum(singleMar*word[:, f])
    return grad
    
def computeFeatureGrad(c, f):
    grad = 0
    for trainWordIdx in range(0, 50):
        trainWord = pd.read_csv(PATH + 'data/train_img{}.txt'.format(trainWordIdx+1), header=None, delimiter=" ")
        trainWord = trainWord.values
        trainWord_label = train_labels[trainWordIdx]
        trainWord_labelIdxs = [CharMapping[x] for x in trainWord_label]
        trainWordPhis = computePotentials(trainWord, feature_params)
        
        singleVarMargs = []
        for node in range(1, len(trainWord)+1):
            sVM = singleVariableMarginal(node, trainWord, feature_params, transition_params, trainWordPhis)
            singleVarMargs.append(sVM)
        thisGrad = _computeGradWcf(trainWord, trainWord_label, np.array(singleVarMargs), c, f)
        grad += thisGrad
    return grad

def _computeGradWccp(word, wordLabel, _pairwiseMarginals, c, cprime):
    firstTerm = 0
    secondTerm = 0
    for letter_id in range(0, len(word)-1):
        if ((wordLabel[letter_id] == c) and (wordLabel[letter_id + 1] == cprime)):
            firstTerm += 1
        secondTerm += _pairwiseMarginals[letter_id][CharMapping[c]][CharMapping[cprime]]
    return (firstTerm - secondTerm)
    
def computeTransitionGrad(c, cprime):
    grad = 0
    for trainWordIdx in range(0, 50):
        trainWord = pd.read_csv(PATH + 'data/train_img{}.txt'.format(trainWordIdx+1), header=None, delimiter=" ")
        trainWord = trainWord.values
        trainWord_label = train_labels[trainWordIdx]
        trainWordPhis = computePotentials( trainWord, feature_params )
        
        pairwiseMargs = []
        for node in range(1, len(trainWord)):
            pM = pairwiseMarginal(node, node+1, trainWord, feature_params, transition_params)
            pairwiseMargs.append(pM.values)
        thisGrad = _computeGradWccp(trainWord, trainWord_label, np.array(pairwiseMargs), c, cprime)
        grad += thisGrad
    return grad

In [None]:
print(firstWordPhisDF.to_latex())
print(df_margProbDist.to_latex())
print(allMessages_21.to_latex())
print(probs_pd.to_latex())
print(jointFirstWord12.to_latex())
print(jointFirstWord23.to_latex())
print(jointFirstWord34.to_latex())

In [195]:
def f(Z):
    x, y = Z
    return (((1-x)*(1-x)) + 100*((y-x*x)*(y-x*x)))

def gradf(Z):
    x, y = Z
    return np.array([-2*(1-x) - 400*x*(y-x*x), 200*(y-x*x)])

x0 = [0, 0]
res = sciop.minimize(fun=f, x0=x0, jac=gradf, method='BFGS', options={'disp': True})
res.x

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 19
         Function evaluations: 24
         Gradient evaluations: 24


array([1., 1.])