In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from theano import *
from lasagne.layers import EmbeddingLayer, InputLayer, get_output
import lasagne
import lasagne.layers
import theano.tensor as T
import theano
import numpy as np
from helpers import SimpleMaxingLayer

Couldn't import dot_parser, loading of dot files will not be possible.


In [3]:
from wordvecs import WordVectors, EmbeddingLayer

wordvectors = WordVectors(fname="/data/matthew/GoogleNews-vectors-negative300.bin", negvectors=False)

In [4]:
#from sentiment_sents import Sentiment

# just load the sentences from the CNN system
#sentiment = Sentiment("prevwork/CNN_sentence/")

In [5]:
from stanfordSentimentReader import StanfordSentimentReader

sentiment = StanfordSentimentReader('/data/matthew/stanfordSentimentTreebank')

In [6]:
len(sentiment.test_X), len(sentiment.train_X), len(sentiment.dev_X)

(2210, 8544, 1101)

In [7]:
len(wordvectors.vectors)

3000000

In [8]:
# make the labels binary since that is what we are currently targetting in the training
sentiment.train_Y = (np.array(sentiment.train_Y) > .6).astype('int32')
sentiment.test_Y = (np.array(sentiment.test_Y) > .6).astype('int32')
sentiment.dev_Y = (np.array(sentiment.dev_Y) > .6).astype('int32')

In [9]:
len(sentiment.test_Y), len(sentiment.train_Y), len(sentiment.dev_Y)

(2210, 8544, 1101)

In [10]:
for w in sentiment.test_X:
    wordvectors.tokenize(w)
for w in sentiment.train_X:
    wordvectors.tokenize(w)
for w in sentiment.dev_X:
    wordvectors.tokenize(w)

In [11]:
len(wordvectors.vectors)

3003779

In [12]:
len(wordvectors.word_location)

19537

In [24]:
class SentimentExp(object):
    
    def __init__(self, train_X, train_Y, wordvecs=wordvectors):
        self.train_X = train_X
        self.train_Y = train_Y
        self.wordvecs = wordvecs
        
        self.input_size = 10  # not used
        self.batch_size = 50
        
        self.learning_rate = .01
        self.momentum = .9
        
        #self.train_X_rep = np.array([[self.getRep(x)] for x in self.train_X])
        self.train_X_rep = np.array([wordvecs.tokenize(x) for x in self.train_X])
        
        self._setup()

    def getRep(self, sent):
        ret = []
        for i in xrange(self.input_size):
            if i < len(sent):
                ret.append(self.wordvecs[sent[i]])
            else:
                ret.append(np.zeros(self.wordvecs.vector_size))
        return np.matrix(ret).reshape((1, self.input_size, self.wordvecs.vector_size))

    def _setup(self):
        self.x_batch = T.imatrix('x')
        self.y_batch = T.ivector('y')
        
        self.input_l = lasagne.layers.InputLayer((None, 50))
        
        self.embedding_l = EmbeddingLayer(
            self.input_l,
            W=self.wordvecs.get_numpy_matrix(),
            add_word_params=True,
        ) 
        
        self.first_l = lasagne.layers.Conv2DLayer(
            self.embedding_l,
            num_filters=80,
            filter_size=(2, self.wordvecs.vector_size),
            name='conv1',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.first_l_max = lasagne.layers.Pool2DLayer(
            self.first_l,
            name='maxing1',
            pool_size=(49,1),  # the number 9 should be 50-1 since that would mean it maxes over the whole input....
            mode='max',
        )
        
        self.first_l_max_simple = SimpleMaxingLayer(
            self.first_l,
            name='maxing1',
        )
        
        self.hidden1_l = lasagne.layers.DenseLayer(
            self.first_l_max,
            num_units=40,
            name='dens1',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.hidden1_l_drop = lasagne.layers.DropoutLayer(
            self.hidden1_l,
            name='drop1',
            p=.5,
        )
        
        self.hidden2_l = lasagne.layers.DenseLayer(
            self.hidden1_l_drop,
            num_units=15,
            name='dens2',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.hidden2_l_drop = lasagne.layers.DropoutLayer(
            self.hidden2_l,
            name='drop2',
            p=.25,
        )
        
        self.out_l = lasagne.layers.DenseLayer(
            self.hidden1_l_drop,
            num_units=1,
            name='dens3',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.output = lasagne.layers.get_output(self.out_l, self.x_batch)
        
        self.loss_vec_old = (self.output.reshape((self.output.size,)) - self.y_batch) ** 2
        self.output_diff = T.neq((self.output.flatten() > .5),(self.y_batch > .5))
        self.loss_vec = lasagne.objectives.binary_crossentropy(T.clip(self.output.reshape((self.output.size,)), .01, .99), self.y_batch)
        
        self.loss_val = T.dot(self.loss_vec, self.output_diff)
        
        self.all_params = lasagne.layers.get_all_params(self.out_l)
        
        self.updates = lasagne.updates.adagrad(self.loss_val, self.all_params, .001)
        #self.updates = lasagne.updates.apply_momentum(self.updates_adagrad)
        
        self.train_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.mean(), self.loss_vec],
            updates=self.updates,
        )
        
        self.loss_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.sum(), self.loss_vec, self.output_diff.sum()],
        )
        
    def _make_zero(self):
        self.embedding_l.W.get_value(borrow=True)[0,:] = 0
        
    def train(self):
        for s in xrange(0, len(self.train_X_rep), self.batch_size):
            end = s + self.batch_size
            if end > len(self.train_X_rep):
                end = len(self.train_X_rep)
            X_vals = np.array(self.train_X_rep[s:end]).astype('int32')
            y_vals = np.array(self.train_Y[s:end]).astype('int32')
            loss, _ = self.train_func(X_vals, y_vals)
            self._make_zero()
            
    def test_loss(self, test_X, test_Y):
        test_X_rep = np.array([self.wordvecs.tokenize(x) for x in test_X])
        loss_sum = 0.0
        wrong = 0.0
        for s in xrange(0, len(test_X_rep), self.batch_size):
            end = s + self.batch_size
            if end > len(test_X_rep):
                end = len(test_X_rep)
            X_vals = np.array(test_X_rep[s:end]).astype('int32')
            y_vals = np.array(test_Y[s:end]).astype('int32')
            loss, _, output_diff = self.loss_func(X_vals, y_vals)
            wrong += output_diff
            loss_sum += loss
        return loss_sum / len(test_X_rep), wrong / len(test_X_rep)
    
experiment = SentimentExp(sentiment.train_X, sentiment.train_Y)

In [28]:
experiment.all_params

[Embeddings, conv1.W, conv1.b, dens1.W, dens1.b, dens3.W, dens3.b]

In [29]:
print experiment.test_loss(sentiment.train_X, sentiment.train_Y), experiment.test_loss(sentiment.dev_X, sentiment.dev_Y), experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.94516480938609315, 0.22975187265917604) (1.1210415627551737, 0.27520435967302453) (0.98793939293940025, 0.24253393665158371)


In [30]:
p_res = []

for i in xrange(50):
    if i % 10 == 9:
        r = i, experiment.test_loss(sentiment.train_X, sentiment.train_Y), experiment.test_loss(sentiment.dev_X, sentiment.dev_Y)
        p_res.append(r)
        print r
    experiment.train()

(9, (0.61890593660937065, 0.2731741573033708), (0.66302997196318336, 0.38510445049954589))
(19, (0.59905316823446575, 0.2247191011235955), (0.65877878242446353, 0.36784741144414168))
(29, (0.58898645997067434, 0.18761704119850187), (0.65519232288619911, 0.37148047229791098))
(39, (0.57921442715922844, 0.16982677902621723), (0.66763269934807135, 0.37420526793823794))
(49, (0.56708638114222465, 0.1408005617977528), (0.65835189129929028, 0.38601271571298817))


In [31]:
experiment.test_loss(sentiment.train_X, sentiment.train_Y)

(0.56215729250109614, 0.14173689138576778)

In [32]:
experiment.test_loss(sentiment.dev_X, sentiment.dev_Y)

(0.65719801928959309, 0.35240690281562215)

In [33]:
experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.64638981333090362, 0.3357466063348416)

In [34]:
p_res

[(9,
  (0.61890593660937065, 0.2731741573033708),
  (0.66302997196318336, 0.38510445049954589)),
 (19,
  (0.59905316823446575, 0.2247191011235955),
  (0.65877878242446353, 0.36784741144414168)),
 (29,
  (0.58898645997067434, 0.18761704119850187),
  (0.65519232288619911, 0.37148047229791098)),
 (39,
  (0.57921442715922844, 0.16982677902621723),
  (0.66763269934807135, 0.37420526793823794)),
 (49,
  (0.56708638114222465, 0.1408005617977528),
  (0.65835189129929028, 0.38601271571298817))]

In [None]:
experiment.first_l.b.get_value(borrow=True)

In [None]:
experiment.

In [26]:
test_func2 = theano.function(
    [experiment.x_batch],
    [
        lasagne.layers.get_output(experiment.first_l, experiment.x_batch),
        lasagne.layers.get_output(experiment.first_l_max, experiment.x_batch),
        T.max(lasagne.layers.get_output(experiment.first_l, experiment.x_batch), axis=2)
    ]
)

In [27]:
test_func2(np.array(experiment.train_X_rep[0:1]).astype('int32'))[1].shape

(1, 80, 1, 1)

In [None]:
test_func = theano.function(
    [experiment.x_batch, experiment.y_batch],
    [experiment.loss_vec.mean(), experiment.loss_vec, experiment.output, 
     T.grad(experiment.loss_vec.mean(), experiment.out_l.get_params()[0]),
     experiment.out_l.get_params()[0], experiment.y_batch, 
     #lasagne.layers.get_output(experiment.first_l, experiment.x_batch)
    ]
)

In [None]:
test_func(np.array(experiment.train_X_rep[0:50]).astype('int32'),np.array(experiment.train_Y[0:50]).astype('int32'))

In [None]:
gg_k = theano.shared(0.)
gg_i = T.iscalar('x')

gg_res, gg_update = theano.scan(lambda: {gg_k:(gg_k + 1)}, sequences=[range(gg_i)])

In [None]:
gg_update

In [None]:
a1 = T.matrix()
f1 = theano.function([a1], [a1.shape])

In [None]:
t1 = np.array([[1,2,3,0,0],[4,5,6,7,0],[2,2,2,2,2]])

In [None]:
t1.dtype

In [None]:
f1(t1)

In [None]:
%reload_ext wordvecs

In [None]:
np.zeros(10)