In [1]:
from google.colab import drive
drive.mount('/content/drive/')

import sys
CUR_PATH = '/content/drive/My Drive/journal_club/randsent/'
sys.path.append(CUR_PATH)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
import torch
import torch.nn as nn
import numpy as np

import utils
from models import RandLSTM
import os

In [0]:
sys.path.insert(0, CUR_PATH + 'SentEval/')
import senteval

In [0]:
def prepare(params, samples):
    words = set([])
    for l in samples:
        for w in l:
            if w not in words:
                words.add(w)
    word2id = {w:i for i, w in enumerate(['<p>'] + list(words))}
    params.word2id = word2id
    params.lut = utils.load_vecs(params, word2id)
    return params


def batcher(params, batch):
    network = params['network']
    for n,i in enumerate(batch):
        if len(i) == 0:
            batch[n] = ['<p>']
    with torch.no_grad():
        vec = network.encode(batch, params)
    return vec


In [0]:
class Params():
    def __init__(self):
        self.n_folds = 5
        self.se_batch_size = 64
        self.gpu = True
        self.word_emb_file = CUR_PATH + 'glove.840B.300d.txt'
        self.word_emb_dim = 300
        self.input_dim = 300
        self.output_dim = 2048
        self.max_seq_len = 96
        self.bidirectional = True 
        self.init = 'uniform'
        self.activation = None
        self.pooling = 'mean'
        self.num_layers = 1
        self.senteval_feat_dim = self.output_dim if not self.bidirectional else 2*self.output_dim

        
        
params = Params()

In [0]:
network = RandLSTM(params)

In [0]:
se = senteval.engine.SE({
    'task_path': os.path.join(CUR_PATH + '/SentEval/', 'data'),
    'word_emb_file': params.word_emb_file, 'word_emb_dim': params.word_emb_dim,
    'usepytorch': True, 'kfold': params.n_folds, 'feat_dim': params.senteval_feat_dim, # True
    'seed': 111, 'batch_size': params.se_batch_size, 'network': network,
    'classifier': {'nhid': 0}
}, batcher, prepare)

In [13]:
%%time

import warnings 
warnings.filterwarnings('ignore')

results = se.eval(['MR'])

CPU times: user 6min 24s, sys: 34.6 s, total: 6min 59s
Wall time: 7min 37s


In [14]:
print('MR results:')
print('Accuracy train: {}  Accuracy test: {}'.format(results['MR']['devacc'], results['MR']['acc']))

MR results:
Accuracy train: 76.57  Accuracy test: 75.88
