In [1]:
from theano import *
from lasagne.layers import EmbeddingLayer, InputLayer, get_output
import lasagne
import lasagne.layers
import theano.tensor as T
import theano
import numpy as np

Couldn't import dot_parser, loading of dot files will not be possible.


In [2]:
from wordvecs import WordVectors

wordvectors = WordVectors(fname="../enwiki-20141208-pages-articles-multistream-links-output4.bin", negvectors=True)

from sentiment_sents import Sentiment

# just load the sentences from the CNN system
sentiment = Sentiment("prevwork/CNN_sentence/")

In [3]:
len(wordvectors.vectors)

6091842

In [98]:
class SentimentExp(object):
    
    def __init__(self, train_X, train_Y, wordvecs=wordvectors):
        self.train_X = train_X
        self.train_Y = train_Y
        self.wordvecs = wordvecs
        
        self.input_size = 10
        self.batch_size = 10
        
        self.learning_rate = .01
        self.momentum = .9
        
        self.train_X_rep = np.array([[self.getRep(x)] for x in self.train_X])
        
        self._setup()
        
    def getRep(self, sent):
        ret = []
        for i in xrange(self.input_size):
            if i < len(sent):
                ret.append(self.wordvecs[sent[i]])
            else:
                ret.append(np.zeros(self.wordvecs.vector_size))
        return np.matrix(ret).reshape((1, self.input_size, self.wordvecs.vector_size))

    def _setup(self):
        self.x_batch = T.tensor4('x')
        self.y_batch = T.ivector('y')
        
        self.input_l = lasagne.layers.InputLayer((self.batch_size, 1, self.input_size, self.wordvecs.vector_size))
        
        self.first_l = lasagne.layers.Conv2DLayer(
            self.input_l,
            num_filters=100,
            filter_size=(2, 100),
            name='conv1',
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.hidden1_l = lasagne.layers.DenseLayer(
            self.first_l,
            num_units=20,
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.out_l = lasagne.layers.DenseLayer(
            self.hidden1_l,
            num_units=1,
            nonlinearity=lasagne.nonlinearities.tanh,
        )
        
        self.output = lasagne.layers.get_output(self.out_l, self.x_batch)
        
        self.loss_vec_old = (self.output.reshape((self.output.size,)) - self.y_batch) ** 2
        self.loss_vec = lasagne.objectives.binary_crossentropy(T.clip(self.output.reshape((self.output.size,)), .01, .99), self.y_batch)
        
        self.all_params = lasagne.layers.get_all_params(self.out_l)
                
        self.updates_adagrad = lasagne.updates.sgd(self.loss_vec.mean(), self.all_params, .01)
        self.updates = lasagne.updates.apply_momentum(self.updates_adagrad)
        
        self.train_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.mean(), self.loss_vec],
            updates=self.updates,
        )
        
        self.loss_func = theano.function(
            [self.x_batch, self.y_batch],
            [self.loss_vec.sum(), self.loss_vec],
        )
        
    def train(self):
        for s in xrange(0, len(self.train_X_rep), self.batch_size):
            X_vals = np.array(self.train_X_rep[s:(s + self.batch_size)])
            y_vals = np.array(self.train_Y[s:(s + self.batch_size)]).astype('int32')
            loss, _ = self.train_func(X_vals, y_vals)
            
    def test_loss(self, test_X, test_Y):
        test_X_rep = np.array([[self.getRep(x)] for x in test_X])
        loss_sum = 0.0
        for s in xrange(0, len(test_X_rep), self.batch_size):
            X_vals = np.array(self.train_X_rep[s:(s + self.batch_size)])
            y_vals = np.array(self.train_Y[s:(s + self.batch_size)]).astype('int32')
            loss, _ = self.loss_func(X_vals, y_vals)
            loss_sum += loss
        return loss_sum / len(test_X_rep)
    
experiment = SentimentExp(sentiment.train_X, sentiment.train_Y)

In [105]:
experiment.test_loss(sentiment.test_X, sentiment.test_Y)

2.2957588380951175

In [103]:
for i in xrange(100): 
    experiment.train()

In [104]:
experiment.test_loss(sentiment.train_X, sentiment.train_Y)

2.2957588380951175

In [116]:
test_func = theano.function(
    [experiment.x_batch, experiment.y_batch],
    [experiment.loss_vec.mean(), experiment.loss_vec, experiment.output, 
     T.grad(experiment.loss_vec.mean(), experiment.out_l.get_params()[0]),
     experiment.out_l.get_params()[0], experiment.y_batch, 
     lasagne.layers.get_output(experiment.first_l, experiment.x_batch)
    ]
)

In [120]:
test_vals = test_func(np.array(experiment.train_X_rep[0:10]),np.array(experiment.train_Y[0:10]).astype('int32'))

In [127]:
test_vals[-1].mean(axis=2).shape

(10, 100, 1)

In [108]:
experiment.first_l.get_params()[0].get_value()

array([[[[-0.01349302, -0.00608859,  0.0208124 , ..., -0.02355321,
           0.02836251, -0.03281177],
         [-0.06802425,  0.01071418, -0.01613487, ...,  0.07777614,
          -0.00097107, -0.07921554]]],


       [[[ 0.00791473,  0.01187771,  0.00932595, ...,  0.00439272,
          -0.01906564, -0.0132316 ],
         [ 0.0147234 ,  0.00565511, -0.00957525, ..., -0.0163597 ,
           0.02380438, -0.00538994]]],


       [[[-0.00546171, -0.01469458,  0.03451102, ..., -0.0143753 ,
           0.0149421 ,  0.03860468],
         [-0.01975556, -0.00938707, -0.01511112, ...,  0.01245079,
          -0.00920737,  0.00821898]]],


       ..., 
       [[[-0.02139801,  0.02059551, -0.03883676, ...,  0.03206572,
           0.04447567,  0.00384297],
         [ 0.00414955, -0.0157647 , -0.00565899, ...,  0.00136444,
          -0.04871137, -0.02776733]]],


       [[[-0.02685343, -0.00425098,  0.05734088, ..., -0.02492608,
          -0.06397128, -0.05122684],
         [ 0.004655  , -0.02941601,

In [113]:
np.array(experiment.train_Y).sum() / float(len(experiment.train_Y))

0.50257913247362251

In [119]:
Out[117]

TypeError: 'type' object has no attribute '__getitem__'