In [1]:
import numpy as np
import lasagne
import theano
import theano.tensor as T
import random
import sys
import batch_char as batch
import time
import pickle as pkl
import io
import os
import evaluate_t2v as evaluate

from collections import OrderedDict
from t2v import tweet2vec, init_params, load_params
from settings_char import N_BATCH, MAX_LENGTH, MAX_CLASSES

#setting up conditions for Theano:
theano.config.gcc.cxxflags = "-Wno-c++11-narrowing"
os.environ['THEANO_FLAGS'] = "device=cpu,floatX=float32"

data_path = "/users/kumaraprasannajayaraju/Downloads/NLP_Final_Project/Method 2/data/test_DS.txt"
save_path = "/users/kumaraprasannajayaraju/Downloads/NLP_Final_Project/Method 2/data"

def classify(tweet, t_mask, params, n_classes, n_chars):
    # tweet embedding
    emb_layer = tweet2vec(tweet, t_mask, params, n_chars)
    # Dense layer for classes
    l_dense = lasagne.layers.DenseLayer(emb_layer, n_classes, W=params['W_cl'], b=params['b_cl'], nonlinearity=lasagne.nonlinearities.softmax)

    return lasagne.layers.get_output(l_dense), lasagne.layers.get_output(emb_layer)

def main(args):

#     data_path = "/users/kumaraprasannajayaraju/Downloads/test_DS.txt"
#     model_path = "/users/kumaraprasannajayaraju"
#     save_path = "/users/kumaraprasannajayaraju/Downloads"
    if len(args)>3:
        m_num = int(args[3])

    print("Preparing Data...")
    # Test data
    Xt = []
    yt = []
    with io.open(data_path,'r',encoding='utf-8') as f:
        for line in f:
            (yc, Xc) = line.rstrip('\n').split('\t')
            Xt.append(Xc[:MAX_LENGTH])
            yt.append(yc.split(','))

    # Model
    print("Loading model params...")
    if len(args)>3:
        print("model")
        params = load_params('%s/model-nlp-t2v-p2_%d.npz' % (save_path,m_num))
    else:
        print("Loading best_model")
        params = load_params('%s/best_model-nlp-t2v-p2.npz' % save_path)

    print("Loading dictionaries...")
    with open('%s/dict-nlp-t2v-p2.pkl' % save_path, 'rb') as f:
        chardict = pkl.load(f)
    with open('%s/label_dict-nlp-t2v-p2.pkl' % save_path, 'rb') as f:
        labeldict = pkl.load(f)
    n_char = len(chardict.keys()) + 1
    n_classes = min(len(labeldict.keys()) + 1, MAX_CLASSES)

    # iterators
    test_iter = batch.BatchTweets(Xt, yt, labeldict, batch_size=N_BATCH, max_classes=MAX_CLASSES, test=True)

    print("Building network...")
    # Tweet variables
    tweet = T.itensor3()
    #print(tweet)
    targets = T.imatrix()
    #print(targets)

    # masks
    t_mask = T.fmatrix()
    
    #print(t_mask)

    # network for prediction
#     predictions = classify(tweet, t_mask, params, n_classes, n_char)[0]
#     embeddings = classify(tweet, t_mask, params, n_classes, n_char)[1]
    
    predictions, embeddings = classify(tweet, t_mask, params, n_classes, n_char)

    # Theano function
    print("Compiling theano functions...")
    predict = theano.function([tweet,t_mask],predictions)
    encode = theano.function([tweet,t_mask],embeddings)

    # Test
    print("Testing...")
    out_data = []
    out_pred = []
    out_emb = []
    out_target = []
    for xr,y in test_iter:
        x, x_m = batch.prepare_data(xr, chardict, n_chars=n_char)
        p = predict(x,x_m)
        e = encode(x,x_m)
        ranks = np.argsort(p)[:,::-1]

        for idx, item in enumerate(xr):
            out_data.append(item)
            out_pred.append(ranks[idx,:])
            out_emb.append(e[idx,:])
            out_target.append(y[idx])

    # Save
    print("Saving...")
    with open('%s/data-nlp-t2v-p2.pkl'%save_path,'wb') as f:
        pkl.dump(out_data,f)
    with open('%s/predictions-nlp-t2v-p2.npy'%save_path,'wb') as f:
        np.save(f,np.asarray(out_pred))
    with open('%s/embeddings-nlp-t2v-p2.npy'%save_path,'wb') as f:
        np.save(f,np.asarray(out_emb))
    with open('%s/targets-nlp-t2v-p2.pkl'%save_path,'wb') as f:
        pkl.dump(out_target,f)
        

if __name__ == '__main__':
    main(sys.argv[1:])
    evaluate.main(save_path)

Preparing Data...
Loading model params...
Loading best_model
Loading dictionaries...
Building network...
Compiling theano functions...
Testing...
Saving...
Precision @ 1 = 3.590909090909091
Recall @ 10 = 0.42748917748917753
Mean rank = 26
