In [70]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import gzip
import numpy as np
from random import randint
import torch.utils.data as data_utils
from torch.autograd import Variable
from sklearn.metrics.pairwise import cosine_similarity
from numpy import linalg as LA
import torch.nn.utils.rnn as rnn_utils


torch.manual_seed(1)

<torch._C.Generator at 0x104a525e8>

In [75]:
#-------------------------------------NET GLOBAL VARIABLES-------------------------------------#
#-------------------------------------GLOBAL VARIABLES-------------------------------------#
NUM_TRAINING_EXAMPLES = 1000 #FOR DATA BATCHER, WHEN DEPLOYED SHOULD BE ALL TRAINING EXAMPLES
PARAMETER_MAX_TITLE_LENGTH = 38
NUM_FEATURES_PER_WORD = 200 #DO NOT CHANGE. FIXED at 200
MAX_TITLE_LENGTH, MAX_BODY_LENGTH = find_maximum_title_and_body_length(lookup)
KERNEL_SIZE = 3 #MAKE SURE THIS NUMBER IS ODD SO THAT THE PADDING MAKES SENSE
PADDING = (KERNEL_SIZE - 1) / 2
INPUT_SIZE = 200
HIDDEN_SIZE = 100
LEARNING_RATE = 1e-1
MARGIN = 1.0
NUM_EPOCHS = 25
BATCH_SIZE = 50
NUM_BATCHES = NUM_TRAINING_EXAMPLES/BATCH_SIZE

In [177]:
#-------------------------------------HELPER FUNCTIONS-------------------------------------#
NUM_NEGATIVE_SAMPLES=20

def N_random_values_in_list(full_list, N):
    x=0
    lower_bound  = 0
    upper_bound = len(full_list)-1
    sample_list=[]
    random_nums=[]
    while x < min(N,len(full_list)):
        random_num = randint(lower_bound, upper_bound) # inclusive range
        if random_num in random_nums:
            continue
        else:
            random_nums.append(random_num)
            x += 1
    return [full_list[i] for i in random_nums]

def convert_to_list(filename):
    if filename.endswith('gz'):
        with gzip.open(filename,'r')as f:
            text_tokens = f.readlines()
    else:
        with open(filename, 'r') as f:
            text_tokens = f.readlines()
    text_tokens = [token.replace('\n','').split('\t') for token in text_tokens]
    text_tokens = [[token[0], token[1].split(' '), token[2].split(' ')] for token in text_tokens]
                   
    return text_tokens

#Sample:question_id, similar_question_id, negative_question_id
def convert_to_samples(filename):
    my_list=convert_to_list(filename)
    new_samples=[]
    for original_sample in my_list:
        for similar in original_sample[1]:
            random_negative_samples = N_random_values_in_list(original_sample[2],NUM_NEGATIVE_SAMPLES)
            new_samples.append([original_sample[0], similar, random_negative_samples])# change this to include all negative 
                                                                                     # examples later
    return new_samples
def make_lookup_table_for_training_data(filename):
    lookup={}
    text_token_list=convert_to_list(filename)
    for token in text_token_list:
        lookup[token[0]] = {'title':token[1],'question':token[2]}
    return lookup
        
#takes  sample_ids of [[q1,p1,n1],[q2,p2,n2]....]
#outputs titles like [[q1_title, p1_title, n1_title],[q2_title,p2_title,n2_title]...]
def convert_sampleids_to_titles(sample_ids,lookup):
    #each sample_id [question_id, pos_id, [neg_ids]]
    #print type(sample_ids)==list, "first"
   
    titles = []
    for sample_id in sample_ids:
        
         #flatten list: [question_id, pos_id, [neg_ids]] --> [question_id, pos_id, neg_id1, neg_id2, ...]
        sample_id= sample_id[:2]+sample_id[2][:]
        #sample_id : question_id, similar_question_id, negative_question_id
        try:
            titles.append([lookup[str(identity)]['title'] for identity in sample_id])
           # print type(sample_id)==list
        except:
            print sample_id, "is sample id", type(sample_id)==list
    return titles
def remove_non_ascii(text):
    return ''.join([i if ord(i) < 128 else '' for i in text])

def extract_features(word):
    try:
        word=remove_non_ascii(word)
        word=word.encode('utf-8')
    except:
        print(word)
    return word_to_vec.get(word,None)

def find_maximum_title_and_body_length(lookup_table):
    max_len_title = -1
    max_len_question = -1
    max_len_question_id = 0
    for key, dict_val in lookup_table.iteritems():
        len_title = len(dict_val['title'])
        len_question = len(dict_val['question'])
        if len_title > max_len_title:
             max_len_title = len_title
        if len_question > max_len_question:
            max_len_question = len_question
            max_len_question_id = key
    return max_len_title, max_len_question

def title_to_feature_matrix(title_word_list):
    feature_matrix = []
    total_count_of_embeds=0
    for idx, word in enumerate(title_word_list):
        if idx == PARAMETER_MAX_TITLE_LENGTH:
            break
        else:
            word_features = extract_features(word)
            if word_features is not None:
                feature_matrix.append(word_features)
                total_count_of_embeds+=1
        
    #Pad the feature with zeros to ensure all inputs to the net have the same dimension
    feature_matrix += [[0] * NUM_FEATURES_PER_WORD] * (PARAMETER_MAX_TITLE_LENGTH - total_count_of_embeds)
    print np.array(feature_matrix).T.shape
    
    return np.array(feature_matrix).T



#array is structured like a batch of features 50x200x38
def find_start_of_padding_for_batch(batch):
    vec_lengths_in_batch = []
    for batch_num in range(0, len(batch)):
        single_vec = batch[batch_num]
        length = find_start_of_padding_single_vec(single_vec) + 1
        vec_lengths_in_batch.append(length)
    return vec_lengths_in_batch

#batch = 200x38
def find_start_of_padding_single_vec(single_vec):
    for idx in range(len(single_vec[0])-1, -1, -1):
        if single_vec[0][idx] != 0.:
            return idx
    #if the whole sequence is 0s
    return 0
def create_mask(word_length):
    return np.array([[1. / word_length] * HIDDEN_SIZE] * word_length + [[0] * HIDDEN_SIZE] * (MAX_TITLE_LENGTH - word_length)).T

In [81]:
#-------------------------------------LOAD DATA-------------------------------------#

word_embeddings = 'askubuntu/vector/vectors_pruned.200.txt.gz'
f = gzip.open(word_embeddings, 'r')
wv_text = [ ]
lines = f.readlines()
for line in lines:
    wv_text.append(line.strip())

word_to_vec = {}

for line in wv_text:
    parts = line.split()
    word = parts[0]
    vector = np.array([float(v) for v in parts[1:]])
    word_to_vec[word] = vector
f.close()

In [82]:


#text_tokenized.txt.gz has id \t title \t question body
text_tokenized='askubuntu/text_tokenized.txt.gz'

#train_random.txt
#(1) the query question ID, (2) the list of similar question IDs, and (3) the list of randomly selected question IDs.
train_random_filename='askubuntu/train_random.txt'

#Each line contains (1) the query question ID, (2) the list of similar question IDs, (3) the list of 20 candidate question IDs and (4) the associated BM25 scores of these questions computed by the Lucene search engine. The second field (the set of similar questions) is a subset of the third field.
dev_filename='askubuntu/dev.txt'
test_filename='askubuntu/test.txt'

train_samples = convert_to_samples(train_random_filename)
dev_samples = convert_to_samples(dev_filename)
test_samples = convert_to_samples(test_filename)

lookup = make_lookup_table_for_training_data(text_tokenized)
train_titles_only = convert_sampleids_to_titles(train_samples, lookup)

In [86]:
#-------------------------------------CREATE DATA BATCHER-------------------------------------#
#for each tuple of titles make a feature vector that is num_titles x 200 x 38
# where num_titles = 1 (target) + 1 (positive) + n (negative) 
features = []
for i in range(len(train_titles_only[:NUM_TRAINING_EXAMPLES])):   # we should include all but this is just for simplicity 
    if i%1000 == 0:
        print i

    sample = train_titles_only[i]
    
    target_title = sample[0]
    positive_title = sample[1]
    negative_titles = sample[2:]
    
#     print "\n"
#     print "Target title: {}".format(" ".join(target_title))
#     print "Positive title: {}".format(" ".join(positive_title))
#     for negative in negative_titles:
#         print "Negative title: {}".format(" ".join(negative))
#     print "\n"
#     a
#     if i > 10:
#         a
#     print "target_title: {}".format(target_title)
#     print "positive title: {}".format(positive_title)
#     print "negative titles: {}".format(negative_titles)
    
    target_features = title_to_feature_matrix(target_title)
    positive_features = title_to_feature_matrix(positive_title)
    n_negative_features = [title_to_feature_matrix(negative_title) for negative_title in negative_titles]
    
#     print "Target features shape: {}".format(target_features.shape)
#     print "Positive features shape: {}".format(positive_features.shape)
#     print "Negative features[0] shape: {}".format(n_negative_features[0].shape)
#     print "Num negative features: {}".format(len(n_negative_features)) 
    all_features = [target_features, positive_features] + n_negative_features
    features.append(all_features)


print "Done Loop"
features = np.array(features) 
targets = torch.LongTensor(len(features), 1).zero_()
training_dataset = data_utils.TensorDataset(torch.FloatTensor(features), targets)
train_loader = data_utils.DataLoader(training_dataset, batch_size = BATCH_SIZE, shuffle = True)
print "Succesfully made the data batcher"

0
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38

(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)
(200, 38)


In [60]:
lstm = nn.LSTM(3,3)
inputs = [autograd.Variable(torch.randn(1, 3))
          for _ in range(5)]  # make a sequence of length 5
hidden = (
          autograd.Variable(torch.randn(1, 1, 3)),
          autograd.Variable(torch.randn(1, 1, 3))
         )
for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)


In [20]:
training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

In [44]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim):
        super(LSTM, self).__init__()
        self.hidden_dim=hidden_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(BATCH_SIZE, 1, self.hidden_dim)),
                autograd.Variable(torch.zeros(BATCH_SIZE, 1, self.hidden_dim)))

    def forward(self, sequence):
        lstm_out, self.hidden = self.lstm(sequence, self.hidden)
        return self.hidden


In [None]:
1+2

In [None]:
EMBEDDING_DIM, HIDDEN_DIM = 200, HIDDEN_SIZE
#lstm = LSTM(EMBEDDING_DIM, HIDDEN_DIM)
lstm = nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM)

criterion = nn.MultiMarginLoss(p=1, margin=MARGIN, weight = None, size_average=True) #HAHA just put these in to look smart 
optimizer = torch.optim.Adam(lstm.parameters(), lr = LEARNING_RATE)

cos = nn.CosineSimilarity(dim=1, eps=1e-6)

#h_0 (num_layers * num_directions, batch, hidden_size): 
#tensor containing the initial hidden state for each element in the batch.
h0=autograd.Variable(torch.zeros(1, BATCH_SIZE,HIDDEN_DIM))

#c_0 (num_layers * num_directions, batch, hidden_size): 
#tensor containing the initial cell state for each element in the batch.
c0=autograd.Variable(torch.zeros(1,BATCH_SIZE,HIDDEN_DIM))

for epoch in range(NUM_EPOCHS):
    running_loss = 0.0
    for idx,(sample, label) in enumerate(train_loader):
        #Sample shape: [50, 22, 200, 38]
        #50 - Batch size, 22 - Num questions per data point
        
        #CNN : BATCH X WORD EMBEDDINGS X WORD
        #LSTM : WORD X BATCH X WORD EMBEDDING
        
        sample = Variable(sample, requires_grad = True)
        label = Variable(label, requires_grad = True)

        #RE-ORDER DIMENSIONS OF THE SAMPLE
        sample = sample.permute(1, 0, 2, 3)

        batch_cos_similarities = []
        batch_num = idx
        
        target_question_features = sample[0] # 50 x 200 x 38
        positive_question_features = sample[1] # 50 x 200 x 38
        N_negative_question_features = sample[2:] #20 x 50 x 200 x 38
        
        #Determine lengths to know how many vectors to take the average across.
        target_question_lengths = find_start_of_padding_for_batch(target_question_features.data)
        positive_question_lengths = find_start_of_padding_for_batch(positive_question_features.data)
        N_negative_questions_lengths = [find_start_of_padding_for_batch(negative.data) for negative in N_negative_question_features]
        
        target_questions_masks = [create_mask(_) for _ in target_question_lengths] #DIM = 50 x 100 x 38
        positive_questions_masks = [create_mask(_) for _ in positive_question_lengths] #DIM = 50 x 100 x 38
        N_negative_questions_masks = [[create_mask(length) for length in length_list] #DIM = 50 x 20 x 100 x 38
                                      for length_list in N_negative_questions_lengths]
        
        target_question_features=target_question_features.permute(2,0,1)
        positive_question_features=positive_question_features.permute(2,0,1)
        N_negative_question_features=N_negative_question_features.permute(0,3,1,2)
        

        # outs are #38x50x200 out, hiddens are 1 x 50 x 200
        # each of the 38 outs are the hidden state at time step t (what we want for mean pooling)
        # hiddens is just the last hidden state (don't really need)
        target_question_lstm_outs, target_question_lstm_hiddens = lstm(target_question_features,(h0,c0)) 
        positive_question_lstm_outs, positive_question_lstm_hiddens = lstm(positive_question_features,(h0,c0))
       # 
        N_negative_question_lstm_output_tuple_list = [lstm(negative,(h0,c0)) for negative in N_negative_question_features]
        N_negative_question_lstm_outs = [negative[0] for negative in N_negative_question_lstm_output_tuple_list]
        N_negative_question_lstm_hiddens = [negative[1] for negative in N_negative_question_lstm_output_tuple_list]
        
        
        #do permutations to make outputs 50 x 200 x 38
        target_question_lstm_outs=target_question_lstm_outs.permute(1,2,0)
        positive_question_lstm_outs=positive_question_lstm_outs.permute(1,2,0)
        N_negative_question_lstm_outs=[negative.permute(1,2,0) for negative in N_negative_question_lstm_outs]
        
        #apply mask
        target_question_net_output_masked = target_question_lstm_outs * Variable(torch.FloatTensor(target_questions_masks))
        positive_question_net_output_masked = positive_question_lstm_outs * Variable(torch.FloatTensor(positive_questions_masks))
        N_negative_questions_net_output_masked = [N_negative_question_lstm_outs[idx] * 
                                                  Variable(torch.FloatTensor(N_negative_questions_masks[idx]))
                                                  for idx in range(NUM_NEGATIVE_SAMPLES)]

        #AVG OVER WORDS
        target_question_net_output_masked_avged = torch.mean(target_question_net_output_masked, dim = 2) #DIM = 50 x 100
        positive_question_net_output_masked_avged = torch.mean(positive_question_net_output_masked, dim = 2) #DIM = 50 x 100
        N_negative_questions_net_output_masked_avged = [torch.mean(
                                                        N_negative_questions_net_output_masked[idx], dim = 2
                                                        )for idx in range(NUM_NEGATIVE_SAMPLES)] #DIM = 20 x 50 x 100
        
        cosine_similarity_pos = cos(target_question_net_output_masked_avged, positive_question_net_output_masked_avged)
        # ^ DIM = 50
        cosine_similarities_neg = [cos(target_question_net_output_masked_avged, N_negative_questions_net_output_masked_avged[idx]) for idx in range(NUM_NEGATIVE_SAMPLES)]
        
        # ^ DIM = 20 x 50
        

        cosine_similarities = torch.stack([cosine_similarity_pos] + cosine_similarities_neg) # DIM = 21 x 50

        optimizer.zero_grad()
        
        cosine_similarities = torch.t(cosine_similarities)
        label = torch.squeeze(label)
        loss = criterion(cosine_similarities, label)
        
        loss.backward()
            
        running_loss += loss.data[0]
            
        optimizer.step()
        if batch_num % 10 == 0:
            print "Epoch: {}, Batch: {}, Loss: {}".format(epoch, batch_num, loss.data[0])


    print "Loss after epoch " + str(epoch) + " :" + str(running_loss)
    


Epoch: 0, Batch: 0, Loss: 0.939241051674
Epoch: 0, Batch: 10, Loss: 0.856285095215
Loss after epoch 0 :17.3179203868
Epoch: 1, Batch: 0, Loss: 0.905528724194
Epoch: 1, Batch: 10, Loss: 0.695745944977
Loss after epoch 1 :15.4387479424
Epoch: 2, Batch: 0, Loss: 0.766999483109
Epoch: 2, Batch: 10, Loss: 0.748850703239
Loss after epoch 2 :14.1476266384
Epoch: 3, Batch: 0, Loss: 0.69806021452
Epoch: 3, Batch: 10, Loss: 0.729263603687
Loss after epoch 3 :13.8972815275
Epoch: 4, Batch: 0, Loss: 0.707460463047
Epoch: 4, Batch: 10, Loss: 0.717261254787
Loss after epoch 4 :13.7628142834


In [188]:
target_question_net_output_masked_avged

Variable containing:
1.00000e-03 *
 0.4680 -1.6092  0.8009  ...  -0.5326  0.4894  1.7314
 0.6429 -1.5708  0.6039  ...  -0.5645  0.7465  1.3764
 0.1967 -1.1479  0.7947  ...  -0.7216  0.8763  1.7626
          ...             ⋱             ...          
-0.2403 -0.6516  1.0618  ...  -0.0359  1.0470  1.3206
 0.0187 -0.9134  0.9245  ...  -0.2632  1.3120  1.1917
 0.4201 -0.7576  0.8120  ...  -0.5227  0.8460  1.7238
[torch.FloatTensor of size 50x100]

In [185]:
N_negative_questions_net_output_masked_avged[idx]

Variable containing:
1.00000e-04 *
 0.0218 -1.3793  1.0478  ...   0.6769 -1.1947  1.1406
 0.1382  1.4136 -3.0424  ...  -4.3693  2.5706  1.6472
 0.1578 -1.5553 -1.7582  ...   1.7066 -2.3264  3.8834
          ...             ⋱             ...          
 1.2092  0.7532 -1.5195  ...  -0.0819 -1.5802  2.1737
-1.5051 -2.9445  3.2706  ...  -0.7096  4.1243  1.9916
-2.3294  2.1728 -0.1822  ...   0.7777 -0.1049  1.0297
[torch.FloatTensor of size 38x50]

In [93]:
def toy_batch(seed=11, shape=(25, 1000, 123), classes=10):
    batch_size, max_len, features = shape
    np.random.seed(seed)

    # Samples
    bX = np.float32(np.random.uniform(-1, 1, (shape)))
    b_lenX = np.int32(np.linspace(max_len, max_len, batch_size))
    # print('::: Lengths of samples in batch: {}'.format(b_lenX))

    # Targets
    bY = np.int32(np.random.randint(low=0, high=classes - 1, size=batch_size))

    return bX, b_lenX, bY, classes


In [97]:
bX, b_lenX, bY, classes = toy_batch()
batch_size, max_len, features = bX.shape

# PyTorch compatibility: time first, batch second
bX = np.transpose(bX, (1, 0, 2))

# Create symbolic vars
bX = Variable(torch.from_numpy(bX))
bX = rnn_utils.pack_padded_sequence(bX, b_lenX[::-1])  # Pack those sequences for masking, plz


In [144]:
1+2

3