In [1]:
from theano import *
from lasagne.layers import EmbeddingLayer, InputLayer, get_output
import lasagne
import lasagne.layers
import theano.tensor as T
import theano
import numpy as np

In [2]:
from wordvecs import WordVectors

wordvectors = WordVectors(fname="/data/matthew/GoogleNews-vectors-negative300.bin", negvectors=False)

from sentiment_sents import Sentiment

# just load the sentences from the CNN system
sentiment = Sentiment("prevwork/CNN_sentence/")

In [3]:
len(wordvectors.vectors)

3000000

In [4]:
class SentimentExp(object):
    
    def __init__(self, train_X, train_Y, wordvecs=wordvectors):
        self.train_X = train_X
        self.train_Y = train_Y
        self.wordvecs = wordvecs
        
        self.input_size = 10
        self.batch_size = 10
        
        self.learning_rate = .01
        self.momentum = .9
        
        self.train_X_rep = np.array([[self.getRep(x)] for x in self.train_X])
        
        self._setup()
        
    def getRep(self, sent):
        ret = []
        for i in xrange(self.input_size):
            if i < len(sent):
                ret.append(self.wordvecs[sent[i]])
            else:
                ret.append(np.zeros(self.wordvecs.vector_size))
        return np.matrix(ret).reshape((1, self.input_size, self.wordvecs.vector_size))

    def _setup(self):
        self.x_batch = T.tensor4('x')
        self.y_batch = T.ivector('y')
        
        self.input_l = lasagne.layers.InputLayer((self.batch_size, 1, self.input_size, self.wordvecs.vector_size))
        
        self.first_l = lasagne.layers.Conv2DLayer(
            self.input_l,
            num_filters=100,
            filter_size=(2, self.wordvecs.vector_size),
            name='conv1',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.first_l_max = lasagne.layers.MaxPool2DLayer(
            self.first_l,
            pool_size=(1,9)
        )
        
        self.hidden1_l = lasagne.layers.DenseLayer(
            self.first_l_max,
            num_units=50,
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.hidden1_l_drop = lasagne.layers.DropoutLayer(
            self.hidden1_l,
            p=.25,
        )
        
        
        self.out_l = lasagne.layers.DenseLayer(
            self.hidden1_l_drop,
            num_units=1,
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.output = lasagne.layers.get_output(self.out_l, self.x_batch)
        
        self.loss_vec_old = (self.output.reshape((self.output.size,)) - self.y_batch) ** 2
        self.output_diff = T.neq((self.output.flatten() > .5),(self.y_batch > .5)).sum()
        self.loss_vec = lasagne.objectives.binary_crossentropy(T.clip(self.output.reshape((self.output.size,)), .01, .99), self.y_batch)
        
        self.all_params = lasagne.layers.get_all_params(self.out_l)
        
        self.updates = lasagne.updates.adagrad(self.loss_vec.mean(), self.all_params, .01)
        #self.updates = lasagne.updates.apply_momentum(self.updates_adagrad)
        
        self.train_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.mean(), self.loss_vec],
            updates=self.updates,
        )
        
        self.loss_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.sum(), self.loss_vec, self.output_diff],
        )
        
    def train(self):
        for s in xrange(0, len(self.train_X_rep), self.batch_size):
            X_vals = np.array(self.train_X_rep[s:(s + self.batch_size)])
            y_vals = np.array(self.train_Y[s:(s + self.batch_size)]).astype('int32')
            loss, _ = self.train_func(X_vals, y_vals)
            
    def test_loss(self, test_X, test_Y):
        test_X_rep = np.array([[self.getRep(x)] for x in test_X])
        loss_sum = 0.0
        wrong = 0.0
        for s in xrange(0, len(test_X_rep), self.batch_size):
            X_vals = np.array(self.train_X_rep[s:(s + self.batch_size)])
            y_vals = np.array(self.train_Y[s:(s + self.batch_size)]).astype('int32')
            loss, _, output_diff = self.loss_func(X_vals, y_vals)
            wrong += output_diff
            loss_sum += loss
        return loss_sum / len(test_X_rep), wrong / len(test_X_rep)
    
experiment = SentimentExp(sentiment.train_X, sentiment.train_Y)



In [5]:
experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(1.96447922654103, 0.49976553341148888)

In [6]:
for i in xrange(30):
    experiment.train()

In [7]:
experiment.test_loss(sentiment.train_X, sentiment.train_Y)

(0.66851129851808089, 0.4112543962485346)

In [8]:
experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.67122119794100388, 0.41652989449003519)

In [9]:
for a in xrange(30):
    for i in xrange(30):
        experiment.train()
    print experiment.test_loss(sentiment.train_X, sentiment.train_Y), experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.66388316478248988, 0.40633059788980069) (0.66474253603649258, 0.40808909730363424)
(0.66274806755047166, 0.40574443141852284) (0.66225249328675462, 0.40515826494724499)
(0.65785578142608914, 0.40082063305978899) (0.65884283285573486, 0.40175849941383351)
(0.65727619583062391, 0.39882766705744432) (0.65849965178438508, 0.40410316529894491)
(0.65296838451322214, 0.39179366940211019) (0.65437862553757198, 0.39859320046893315)
(0.65016349795351103, 0.38980070339976552) (0.65284976424894681, 0.39495896834701055)
(0.64628572937323314, 0.38604923798358731) (0.64672449388498576, 0.3853458382180539)
(0.63919562783281514, 0.37151230949589681) (0.64194379848230554, 0.38089097303634234)
(0.63297061203097715, 0.36576787807737399) (0.63310950977570113, 0.37116060961313013)
(0.6299980158280104, 0.35650644783118407) (0.62551188427002935, 0.35885111371629541)
(0.61672973409990395, 0.34947245017584994) (0.61590539488710672, 0.3459554513481829)
(0.60679452054998229, 0.34161781946072683) (0.61103623039

In [15]:
for a in xrange(30):
    for i in xrange(30):
        experiment.train()
    print experiment.test_loss(sentiment.train_X, sentiment.train_Y), experiment.test_loss(sentiment.test_X, sentiment.test_Y)

(0.38952828786908505, 0.17667057444314185) (0.39206873468067305, 0.17866354044548652)
(0.38043483175432624, 0.170926143024619) (0.38512070176808499, 0.17878077373974208)
(0.38610296200502925, 0.17678780773739741) (0.39308066984619927, 0.18030480656506448)
(0.39554778139391478, 0.17456037514654163) (0.38243363466646668, 0.17631887456037515)
(0.386200265016311, 0.18089097303634233) (0.3857410003310367, 0.18030480656506448)
(0.37595950630907993, 0.17139507620164127) (0.38190551848933874, 0.17209847596717467)
(0.38000809741969149, 0.17069167643610786) (0.38623100050767972, 0.17936694021101993)
(0.38585288481447888, 0.17139507620164127) (0.37866630664976825, 0.16682297772567409)
(0.38320463002406918, 0.17491207502930833) (0.38471597699086041, 0.17549824150058616)
(0.38115490652845974, 0.17409144196951934) (0.37929606335055688, 0.17456037514654163)
(0.37938493620545888, 0.17139507620164127) (0.37956517720338717, 0.17502930832356389)
(0.37197219163554751, 0.17116060961313012) (0.3799358858574

In [10]:
test_func = theano.function(
    [experiment.x_batch, experiment.y_batch],
    [experiment.loss_vec.mean(), experiment.loss_vec, experiment.output, 
     T.grad(experiment.loss_vec.mean(), experiment.out_l.get_params()[0]),
     experiment.out_l.get_params()[0], experiment.y_batch, 
     #lasagne.layers.get_output(experiment.first_l, experiment.x_batch)
    ]
)

In [14]:
test_func(np.array(experiment.train_X_rep[0:10]),np.array(experiment.train_Y[0:10]).astype('int32'))

[array(0.4591309667630588),
 array([ 0.07750704,  0.61353829,  0.05907035,  0.97992656,  0.34614112,
         0.01005034,  0.49995063,  0.0756924 ,  0.48670261,  1.44273033]),
 array([[ 0.92542051],
        [ 0.54143173],
        [ 0.05735955],
        [ 0.62466134],
        [ 0.70741265],
        [-0.29911896],
        [ 0.6065606 ],
        [ 0.92710133],
        [ 0.6146498 ],
        [ 0.76371825]]),
 array([[ -8.66662033e-02],
        [ -3.51529348e-01],
        [  3.11204463e-01],
        [  2.20292754e-01],
        [  1.03859721e-01],
        [  3.85837135e-01],
        [ -1.68557975e-01],
        [ -2.24012511e-01],
        [ -4.20686046e-01],
        [  5.02028522e-02],
        [  8.16099461e-02],
        [  5.05576201e-02],
        [ -2.11320017e-01],
        [ -1.07135243e-01],
        [  1.08823628e-01],
        [ -1.11018641e-01],
        [ -5.49380369e-01],
        [ -8.30568215e-02],
        [ -2.37320920e-01],
        [ -1.28241658e-02],
        [  8.92973157e-02],
    