# Theano version of positive/negative samples

In [1]:
import theano.tensor as T
import theano
import numpy as np
from collections import OrderedDict
import sys
sys.path.append('../')
import negsamp
import matplotlib.pylab as plt
from progressbar import ProgressBar as progressbar 
%matplotlib inline

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 4007)


## Parameter specification

In [2]:
# Params
d = 100
f = 4096
hidden = 4096
V = 291
m=25

numlayers = 2
lr = 0.01
epochs = 100
batchsize=256
weightfile = None # 'params-2layer.npz'
pretrain = False

## Data ingestion. Currently just read in numpy files.

In [3]:
data = np.load('../data/iaprtc_alexfc7.npz')
D = open('../data/iaprtc_dictionary.txt').read().splitlines()
train_ims = [ im.split('/')[-1] for im in open('../data/iaprtc_trainlist.txt').read().splitlines() ]
test_ims = [ im.split('/')[-1] for im in open('../data/iaprtc_testlist.txt').read().splitlines() ]
xTr = data['xTr'].T
yTr = data['yTr'].T
xTe = data['xTe'].T
yTe = data['yTe'].T
wc = yTr.sum(axis=0)+0.01-0.01

## Load in parameters/weights

In [4]:
if weightfile and pretrain:
    Wi = theano.shared(np.load(weightfile)['Wi'])
    Wh = theano.shared(np.random.ranf((hidden, f)))
    Wc = theano.shared(np.load(weightfile)['Wc'])
elif weightfile:
    Wi = theano.shared(np.load(weightfile)['Wi'])
    Wh = theano.shared(np.load(weightfile)['Wh'])
    Wc = theano.shared(np.load(weightfile)['Wc'])
else:
    # Need to change these to normal distributed
    Wh = theano.shared(np.random.ranf((hidden, f))-0.5)
    Wi = theano.shared(np.random.ranf((d, hidden))-0.5)
    Wc = theano.shared(np.random.ranf((V, d))-0.5)
    
    print "Initialized Wh, Wi, and Wc shapes: {},{},{}".format(Wh.get_value().shape,Wi.get_value().shape,Wc.get_value().shape)

Initialized Wh, Wi, and Wc shapes: 
(4096, 4096)
(100, 4096)
(291, 100)


## Helper and utility functions

In [5]:
def showdata( Wcn, minblk=True, thetitle=None, colorbar=False, blackwhite=False ):
    if minblk:
        Wcmind1 = np.array(Wcn.shape).min()
        Wcmind2 = np.array(Wcn.shape).min()
    else:
        Wcmind1 = Wcn.shape[0]
        Wcmind2 = Wcn.shape[1]
    plt.figure
    if blackwhite:
        print '%d, %d'%(Wcmind1,Wcmind2)
        plt.imshow(Wcn[:Wcmind1,:Wcmind2], cmap='Greys_r', interpolation='nearest')
    else:
        plt.imshow(Wcn[:Wcmind1,:Wcmind2]);
    if thetitle:
        plt.title(thetitle)
    if colorbar:
        plt.colorbar()
    plt.show()

    
# Define the sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define rectified linear unit (relu)
def relu(x):
    return np.maximum(x, 0)

## Mathy Part

### Cost Function:
1. One Layer: $$ y_p = \sigma(W_c W_i x^T) $$
2. Two Layers: $$ y_p = \sigma( W_c W_i \sigma( W_h x^T )) $$

We are doing SGD only (no momentum)

In [8]:
# Negative sampler
ns = negsamp.NegativeSampler(wc / wc.sum())

# Define functionality
x = T.matrix()
p = T.matrix()
n = T.matrix()

# Cross correlation
if numlayers==1:
    xcorr = Wc.dot(Wi.dot(x.T)).T
else:
    # xcorr = Wc.dot(Wi.dot(T.nnet.sigmoid(Wh.dot(x.T)))).T
    xcorr = Wc.dot(Wi.dot(T.nnet.relu(Wh.dot(x.T)))).T

# LOSS FUNCTION
# Because p and n are {-1,0,1}, these two are the same
# loss = -(T.log(T.nnet.sigmoid(p*xcorr)) + T.log(T.nnet.sigmoid(-n*xcorr))).mean()
# loss = -T.log(T.nnet.sigmoid( (p-n).dot(xcorr)  )).mean()
# loss = (-p * T.log(T.nnet.sigmoid(xcorr)) + n * T.log(T.nnet.sigmoid(xcorr))).mean()
loss = -(p*T.log(T.nnet.sigmoid(xcorr)) + n*T.log(T.nnet.sigmoid(-xcorr))).mean()
# 
# Cross-entropy
# loss = (n-p)*( T.log( T.nnet.sigmoid(xcorr) ) ).mean()
#
# Binary cross-entropy
# loss = -(p*(T.log( T.nnet.sigmoid(xcorr))) + (1-p)*(T.log( 1-T.nnet.sigmoid(xcorr) ))).mean()

# Define the gradient updates. Use positive for maximization
if numlayers==1:
    params = [Wi, Wc]
    gWi, gWc = T.grad(loss, params)
    sgd = OrderedDict( { Wi: Wi - lr*gWi, Wc: Wc - lr*gWc } )
else:
    params = [Wi, Wc, Wh]
    gWi, gWc, gWh = T.grad(loss, params)
    sgd = OrderedDict( { Wi: Wi - lr*gWi, Wc: Wc - lr*gWc, Wh: Wh - lr*gWh } )

# Compile to theano functionality
train = theano.function( [x,p,n], outputs=loss, updates=sgd, allow_input_downcast=True )
predict= theano.function( [x], outputs=xcorr, allow_input_downcast=True )

## Do SGD on the cost function above

In [None]:
progbar = progressbar(len(yTr))
# progloss= []

# Iterate through the data size
for j in xrange(epochs):
    print "Epoch "+str(j)
    k=0
    totloss = 0.0
    batloss = 0.0
    randorder = np.random.permutation(len(yTr))
    for i in range(0,len(randorder),batchsize):
        
        indata = xTr[i:i+batchsize]
        outdata= yTr[i:i+batchsize]

        nsv = ns.negsampv(outdata, m)
        lossval = train( indata, outdata, nsv )
        totloss += lossval
        batloss += lossval

        k+=1
        if k % 16 == 0:
            # Progress and loss
            progbar.animate(k*batchsize)
            print('\nlosses (inst, bat, tot)=({},{},{})'.format(lossval, batloss, totloss))
            
    print ""
    print "Total loss on epoch "+str(j)+" = "+str(totloss)+"\n"
    progloss.append(totloss)
    print "Progress: {}".format( progloss )
    
    if numlayers==1:
        np.savez('params-ix.npz', Wi=Wi.get_value(), Wc=Wc.get_value(), Epoch=j)      
    else:
        np.savez('params-ix.npz', Wi=Wi.get_value(), Wh=Wh.get_value(), Wc=Wc.get_value(), Epoch=j)      


[                  0%                  ] 
losses (inst, bat, tot)=(2.39360935084,41.193562883,41.193562883)


### Save the arrays to parameter files

In [None]:
if numlayers==1:
    np.savez('params-ix.npz', Wi=Wi.get_value(), Wc=Wc.get_value(), Epoch=j)      
else:
    np.savez('params-ix.npz', Wi=Wi.get_value(), Wh=Wh.get_value(), Wc=Wc.get_value(), Epoch=j)      

# Numpy verification and debugging

### Get arrays from GPU, and make sample inputs

In [None]:
# Get parameters from the GPU arrays
W0 = Wh.get_value()
W1 = Wi.get_value()
W2 = Wc.get_value()
# plt.stem(W0.dot(xTe[40]))

xD = xTe[:256]
yD = yTe[:256]
npp = yD
npn = ns.negsampv(yD, m)

### Define functionality to check the shapes of current arrays

In [None]:
numlayers=2
if numlayers==1:
    npxcorr = W2.dot(W1.dot(xD.T))
else:
    npxcorr = W2.dot(W1.dot(sigmoid(W0.dot(xD.T))))

print 'Shapes, p:'+str(npp.shape)+', n:'+str(npn.shape)+', xcorr:'+str(npxcorr.shape)

In [None]:
h1 = W1.dot(xD.T)
plt.stem(h1[1]); plt.figure()
plt.stem(h1[2]); plt.figure()
plt.stem(h1[3]); plt.figure()
plt.imshow(W1[:,:300]); plt.colorbar()

In [None]:
npxcorr.shape