In [1]:
from theano import *
from lasagne.layers import EmbeddingLayer, InputLayer, get_output
import lasagne
import lasagne.layers
import theano.tensor as T
import theano
import numpy as np

Couldn't import dot_parser, loading of dot files will not be possible.


In [2]:
from wordvecs import WordVectors

wordvectors = WordVectors(fname="../GoogleNews-vectors-negative300.bin", negvectors=False)

from sentiment_sents import Sentiment

# just load the sentences from the CNN system
sentiment = Sentiment("prevwork/CNN_sentence/")

In [3]:
len(wordvectors.vectors)

3000000

In [4]:
class SentimentExp(object):
    
    def __init__(self, train_X, train_Y, wordvecs=wordvectors):
        self.train_X = train_X
        self.train_Y = train_Y
        self.wordvecs = wordvecs
        
        self.input_size = 10
        self.batch_size = 10
        
        self.learning_rate = .01
        self.momentum = .9
        
        self.train_X_rep = np.array([[self.getRep(x)] for x in self.train_X])
        
        self._setup()
        
    def getRep(self, sent):
        ret = []
        for i in xrange(self.input_size):
            if i < len(sent):
                ret.append(self.wordvecs[sent[i]])
            else:
                ret.append(np.zeros(self.wordvecs.vector_size))
        return np.matrix(ret).reshape((1, self.input_size, self.wordvecs.vector_size))

    def _setup(self):
        self.x_batch = T.tensor4('x')
        self.y_batch = T.ivector('y')
        
        self.input_l = lasagne.layers.InputLayer((self.batch_size, 1, self.input_size, self.wordvecs.vector_size))
        
        self.first_l = lasagne.layers.Conv2DLayer(
            self.input_l,
            num_filters=100,
            filter_size=(2, self.wordvecs.vector_size),
            name='conv1',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.first_l_max = lasagne.layers.MaxPool2DLayer(
            self.first_l,
            pool_size=(1,9)
        )
        
        self.hidden1_l = lasagne.layers.DenseLayer(
            self.first_l_max,
            num_units=50,
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.hidden1_l_drop = lasagne.layers.DropoutLayer(
            self.hidden1_l,
            p=.25,
        )
        
        
        self.out_l = lasagne.layers.DenseLayer(
            self.hidden1_l_drop,
            num_units=1,
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.output = lasagne.layers.get_output(self.out_l, self.x_batch)
        
        self.loss_vec_old = (self.output.reshape((self.output.size,)) - self.y_batch) ** 2
        self.output_diff = T.neq((self.output.flatten() > .5),(self.y_batch > .5)).sum()
        self.loss_vec = lasagne.objectives.binary_crossentropy(T.clip(self.output.reshape((self.output.size,)), .01, .99), self.y_batch)
        
        self.all_params = lasagne.layers.get_all_params(self.out_l)
        
        self.updates = lasagne.updates.adagrad(self.loss_vec.mean(), self.all_params, .01)
        #self.updates = lasagne.updates.apply_momentum(self.updates_adagrad)
        
        self.train_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.mean(), self.loss_vec],
            updates=self.updates,
        )
        
        self.loss_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.sum(), self.loss_vec, self.output_diff],
        )
        
    def train(self):
        for s in xrange(0, len(self.train_X_rep), self.batch_size):
            X_vals = np.array(self.train_X_rep[s:(s + self.batch_size)])
            y_vals = np.array(self.train_Y[s:(s + self.batch_size)]).astype('int32')
            loss, _ = self.train_func(X_vals, y_vals)
            
    def test_loss(self, test_X, test_Y):
        test_X_rep = np.array([[self.getRep(x)] for x in test_X])
        loss_sum = 0.0
        wrong = 0.0
        for s in xrange(0, len(test_X_rep), self.batch_size):
            X_vals = np.array(self.train_X_rep[s:(s + self.batch_size)])
            y_vals = np.array(self.train_Y[s:(s + self.batch_size)]).astype('int32')
            loss, _, output_diff = self.loss_func(X_vals, y_vals)
            wrong += output_diff
            loss_sum += loss
        return loss_sum / len(test_X_rep), wrong / len(test_X_rep)
    
experiment = SentimentExp(sentiment.train_X, sentiment.train_Y)



In [5]:
experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(1.9271358860780028, 0.49742086752637749)

In [19]:
for i in xrange(30):
    experiment.train()

In [20]:
experiment.test_loss(sentiment.train_X, sentiment.train_Y)

(0.66375977811338371, 0.41031652989449002)

In [21]:
experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.66549026150221469, 0.40726846424384527)

In [23]:
for a in xrange(10):
    for i in xrange(30):
        experiment.train()
    print experiment.test_loss(sentiment.train_X, sentiment.train_Y), experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.65781617586048158, 0.4033997655334115) (0.65864224859370946, 0.4033997655334115)
(0.65284003624535103, 0.39214536928487692) (0.65007462346657308, 0.38065650644783117)
(0.64309654065091437, 0.37409144196951932) (0.64461577900407996, 0.37268464243845251)
(0.63415913546176195, 0.36354044548651815) (0.63584843480132358, 0.36318874560375147)
(0.62133706366877117, 0.35252051582649474) (0.61819202019044595, 0.35076201641266119)
(0.61186462088244442, 0.34138335287221572) (0.61136543486472883, 0.3399765533411489)
(0.60211213987674261, 0.33024618991793669) (0.60446660743327818, 0.33259085580304809)
(0.58853938584680199, 0.31254396248534583) (0.59016738084616105, 0.32051582649472449)
(0.57889038805932092, 0.30996483001172331) (0.58027425585488246, 0.31359906213364597)
(0.56627723388382178, 0.29683470105509963) (0.56742439470765882, 0.30386869871043376)


In [None]:
for a in xrange(10):
    for i in xrange(30):
        experiment.train()
    print experiment.test_loss(sentiment.train_X, sentiment.train_Y), experiment.test_loss(sentiment.test_X, sentiment.test_Y)

In [14]:
test_func = theano.function(
    [experiment.x_batch, experiment.y_batch],
    [experiment.loss_vec.mean(), experiment.loss_vec, experiment.output, 
     T.grad(experiment.loss_vec.mean(), experiment.out_l.get_params()[0]),
     experiment.out_l.get_params()[0], experiment.y_batch, 
     #lasagne.layers.get_output(experiment.first_l, experiment.x_batch)
    ]
)

In [24]:
test_func(np.array(experiment.train_X_rep[0:10]),np.array(experiment.train_Y[0:10]).astype('int32'))

[array(0.512237808911631),
 array([ 0.58180769,  0.2986674 ,  0.01005034,  0.30346001,  0.97740184,
         0.92071332,  0.36261844,  0.52049098,  0.31382833,  0.83333974]),
 array([[ 0.55888716],
        [ 0.74180609],
        [-0.02160361],
        [ 0.73825941],
        [ 0.62371252],
        [ 0.60176513],
        [ 0.69585189],
        [ 0.59422872],
        [ 0.73064444],
        [ 0.43459542]]),
 array([[-0.1001953 ],
        [-0.05368159],
        [-0.13112429],
        [-0.29577643],
        [ 0.11312037],
        [-0.16495259],
        [-0.09471503],
        [-0.31290631],
        [-0.02202011],
        [-0.04184033],
        [ 0.33743156],
        [ 0.0953935 ],
        [ 0.37693405],
        [-0.1261617 ],
        [-0.31001282],
        [-0.09407441],
        [-0.01603207],
        [ 0.03523483],
        [-0.28481946],
        [ 0.28543564],
        [ 0.03463661],
        [ 0.09321152],
        [ 0.53869045],
        [ 0.54057898],
        [ 0.19591136],
        [ 0.106086