In [1]:
import os
import torch 
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
from packages.vocab import Vocab
from packages.batch import Batch
from models.languagemodel import RNNLM

In [2]:
# Hyper Parameters
embed_size = 128
hidden_size = 1024
num_layers = 1
num_epochs = 10
num_samples = 1000   # number of words to be sampled
batch_size = 100
seq_length = 50
learning_rate = 0.002

In [3]:
train_path = '/home/irteam/users/data/150kJavaScript/data_preprocessed/'
with open(os.path.join(train_path,'list_of_files.txt')) as f:
    file_list = f.read().split('\n')

In [4]:
batch = Batch(file_dir=train_path,file_list=file_list,batch_size=batch_size,
              in_seq=50,out_seq=50)

In [5]:
import pickle
with open('vocabs/id_counter.pckl','rb') as f:
    id_counter = pickle.load(f)
with open('vocabs/tok_counter.pckl','rb') as f:
    tok_counter = pickle.load(f)
from slimit.lexer import Lexer
lexer = Lexer()

In [6]:
word_list = [x for (x,_) in tok_counter.most_common(100)] + ['STRING'] + [x for (x,_) in id_counter.most_common(10000)]

In [7]:
vocab = Vocab(10000)

In [8]:
vocab.add_to_vocab(word_list)
# vocab.w2i = np.load('vocabs/word2idx_no_ids.txt')
# vocab.i2w = np.load('vocabs/idx2word_no_ids.txt')
# vocab.max_size = len(vocab.w2i)

Vocabulary max size reached!


In [9]:
model = torch.load('models/rnn_lm_types_2_epoch_10.pckl')
# model = RNNLM(vocab.max_size, embed_size=embed_size, hidden_size=hidden_size,
#              num_layers = num_layers)
model.cuda()

RNNLM (
  (embed): Embedding(10000, 128)
  (lstm): LSTM(128, 1024, batch_first=True)
  (linear): Linear (1024 -> 10000)
)

In [10]:
# Truncated Backpropagation 
def detach(states):
    return [state.detach() for state in states] 

In [16]:
total = 0
correct = 0
# Training
batch.next_epoch(batch_size)
batch.initialize_states(num_layers, hidden_size)
total_steps=100
step=0
while(batch.epoch_end==0):
    step+=1
    # update the minibatch inputs / outputs
    batch.get_minibatch(0)
    inputs_np = np.array([vocab.word_list_to_idx_list(line) for
                         line in batch.batch_in],dtype=int)
    targets_np = np.array([vocab.word_list_to_idx_list(line) for
                         line in batch.batch_out],dtype=int)
    inputs = Variable(torch.LongTensor(inputs_np)).cuda()
    targets = Variable(torch.LongTensor(targets_np)).cuda()

    outputs, states = model(inputs,batch.states)
    out = outputs.view(targets.size(0),targets.size(1),-1).max(2)[1]
    batch.states = detach(states)
    
    total_0 = (targets==0).data.cpu().numpy().sum()
    correct_0 = ((targets==0)*(targets==out)).data.cpu().numpy().sum()
    total += targets.size(0)*targets.size(1) - total_0
    correct += (targets==out).data.cpu().numpy().sum() - correct_0
    
    
    
    batch.next_minibatch()
    if step%100==0:
        print('Accuracy: %1.3f' % (correct/total))
#         print ('Epoch [%d/%d], Loss: %.3f, Steps: [%d/%d], Perplexity: %5.2f' %
#        (epoch+1, num_epochs, loss.data[0], step, total_steps, np.exp(loss.data[0])))
    

Accuracy: 0.702
Accuracy: 0.697
Accuracy: 0.696
Accuracy: 0.697
Accuracy: 0.699
Accuracy: 0.700
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.702
Accuracy: 0.701
Accuracy: 0.701
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.702
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.702
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.703
Accuracy: 0.704
Accuracy

KeyboardInterrupt: 

In [14]:
correct/total

0.70218386100673391

In [None]:
total = targets.size(0)*targets.size(1)
correct = (targets==out).data.cpu().numpy().sum()
print((correct-correct_0)/(total-total_0))

In [None]:
out = outputs.view(targets.size(0),targets.size(1),-1).max(2)[1]
for i in range(len(targets[0:30])):
    line0 = vocab.idx_list_to_word_list(inputs[i].cpu().data.numpy())
    line1 = vocab.idx_list_to_word_list(targets[i].cpu().data.numpy())
    line2 = vocab.idx_list_to_word_list(out[i].cpu().data.numpy())
    for tup in zip(line0,line1,line2):
        print('\t'.join([tup[0],tup[1],tup[2],str(tup[1]==tup[2])]))
        
#     line1 = targets[i]
#     line2 = out[i]
#     print(''.join(vocab.idx_list_to_word_list(line0[:20].cpu().data.numpy())))    
#     print(''.join(vocab.idx_list_to_word_list(line1[:20].cpu().data.numpy())))
#     print(''.join(vocab.idx_list_to_word_list(line2[:20].cpu().data.numpy())))
    print('\n\n')

In [None]:
#     with open(train_path+'list_of_files.txt') as f:
#         input_files = f.readlines()
#     i = 0
#     for file in input_files:
#         i+=1
#         with open(train_path+file.strip()) as f:
#             lines = f.readlines()
#         out_lines = []
#         for line in lines:
#             line = line.strip()
#             out_lines.append(vocab.word_list_to_idx_list(line.split())[:50])
#         batch_size = len(lines)
#         ids = torch.LongTensor(np.array(out_lines[:batch_size]))
#         states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda(),
#                   Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda())

# #     for i in range(0, ids.size(1) - seq_length, seq_length):
# #         # Get batch inputs and targets
#         inputs = Variable(ids[:, :seq_length-1]).cuda()
#         targets = Variable(ids[:, 1:seq_length].contiguous()).cuda()
        
#         # Forward + Backward + Optimize
#         model.zero_grad()
#         states = detach(states)
#         outputs, states = model(inputs, states) 
#         loss = criterion(outputs, targets.view(-1))
#         loss.backward()
#         torch.nn.utils.clip_grad_norm(model.parameters(), 0.5)
#         optimizer.step()

In [None]:
states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda(),
          Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda())

In [18]:
with open('/home/irteam/users/data/150kJavaScript/data/amber-smalltalk/amber/src/Kernel-Collections.js') as f:
    txt = f.read()

In [20]:
print(txt)

define("amber_core/Kernel-Collections", ["amber/boot", "amber_core/Kernel-Objects"], function($boot){
var $core=$boot.api,nil=$boot.nil,$recv=$boot.asReceiver,$globals=$boot.globals;
$core.addPackage('Kernel-Collections');
$core.packages["Kernel-Collections"].innerEval = function (expr) { return eval(expr); };
$core.packages["Kernel-Collections"].transport = {"type":"amd","amdNamespace":"amber_core"};
$core.addClass('Association', $globals.Object, ['key', 'value'], 'Kernel-Collections');
$globals.Association.comment="I represent a pair of associated objects, a key and a value. My instances can serve as entries in a dictionary.\x0a\x0aInstances can be created with the class-side method `#key:value:`";
$core.addMethod(
$core.method({
selector: "=",
protocol: 'comparing',
fn: function (anAssociation){
var self=this;
return $core.withContext(function($ctx1) {
var $3,$2,$5,$4,$6,$1;
$3=self._class();
$ctx1.sendIdx["class"]=1;
$2=$recv($3).__eq($recv(anAssociation)._class());
$ctx1.sendIdx["

In [22]:
lexer.input(txt)

In [23]:
for token in lexer:
    print(token)

LexToken(ID,'define',1,0)
LexToken(LPAREN,'(',1,6)
LexToken(STRING,'"amber_core/Kernel-Collections"',1,7)
LexToken(COMMA,',',1,38)
LexToken(LBRACKET,'[',1,40)
LexToken(STRING,'"amber/boot"',1,41)
LexToken(COMMA,',',1,53)
LexToken(STRING,'"amber_core/Kernel-Objects"',1,55)
LexToken(RBRACKET,']',1,82)
LexToken(COMMA,',',1,83)
LexToken(FUNCTION,'function',1,85)
LexToken(LPAREN,'(',1,93)
LexToken(ID,'$boot',1,94)
LexToken(RPAREN,')',1,99)
LexToken(LBRACE,'{',1,100)
LexToken(VAR,'var',1,102)
LexToken(ID,'$core',1,106)
LexToken(EQ,'=',1,111)
LexToken(ID,'$boot',1,112)
LexToken(PERIOD,'.',1,117)
LexToken(ID,'api',1,118)
LexToken(COMMA,',',1,121)
LexToken(ID,'nil',1,122)
LexToken(EQ,'=',1,125)
LexToken(ID,'$boot',1,126)
LexToken(PERIOD,'.',1,131)
LexToken(ID,'nil',1,132)
LexToken(COMMA,',',1,135)
LexToken(ID,'$recv',1,136)
LexToken(EQ,'=',1,141)
LexToken(ID,'$boot',1,142)
LexToken(PERIOD,'.',1,147)
LexToken(ID,'asReceiver',1,148)
LexToken(COMMA,',',1,158)
LexToken(ID,'$globals',1,159)
LexToken

LexToken(ID,'$core',1,7708)
LexToken(PERIOD,'.',1,7713)
LexToken(ID,'withContext',1,7714)
LexToken(LPAREN,'(',1,7725)
LexToken(FUNCTION,'function',1,7726)
LexToken(LPAREN,'(',1,7734)
LexToken(ID,'$ctx1',1,7735)
LexToken(RPAREN,')',1,7740)
LexToken(LBRACE,'{',1,7742)
LexToken(ID,'self',1,7744)
LexToken(PERIOD,'.',1,7748)
LexToken(ID,'_subclassResponsibility',1,7749)
LexToken(LPAREN,'(',1,7772)
LexToken(RPAREN,')',1,7773)
LexToken(SEMI,';',1,7774)
LexToken(RETURN,'return',1,7776)
LexToken(ID,'self',1,7783)
LexToken(SEMI,';',1,7787)
LexToken(RBRACE,'}',1,7789)
LexToken(COMMA,',',1,7790)
LexToken(FUNCTION,'function',1,7792)
LexToken(LPAREN,'(',1,7800)
LexToken(ID,'$ctx1',1,7801)
LexToken(RPAREN,')',1,7806)
LexToken(LBRACE,'{',1,7808)
LexToken(ID,'$ctx1',1,7809)
LexToken(PERIOD,'.',1,7814)
LexToken(ID,'fill',1,7815)
LexToken(LPAREN,'(',1,7819)
LexToken(ID,'self',1,7820)
LexToken(COMMA,',',1,7824)
LexToken(STRING,'"newBucket"',1,7825)
LexToken(COMMA,',',1,7836)
LexToken(LBRACE,'{',1,7837)
Le

LexToken(ID,'each',1,15057)
LexToken(RPAREN,')',1,15061)
LexToken(LBRACE,'{',1,15062)
LexToken(RETURN,'return',1,15064)
LexToken(ID,'$core',1,15071)
LexToken(PERIOD,'.',1,15076)
LexToken(ID,'withContext',1,15077)
LexToken(LPAREN,'(',1,15088)
LexToken(FUNCTION,'function',1,15089)
LexToken(LPAREN,'(',1,15097)
LexToken(ID,'$ctx2',1,15098)
LexToken(RPAREN,')',1,15103)
LexToken(LBRACE,'{',1,15105)
LexToken(RETURN,'return',1,15107)
LexToken(ID,'$recv',1,15114)
LexToken(LPAREN,'(',1,15119)
LexToken(ID,'each',1,15120)
LexToken(RPAREN,')',1,15124)
LexToken(PERIOD,'.',1,15125)
LexToken(ID,'_asJSON',1,15126)
LexToken(LPAREN,'(',1,15133)
LexToken(RPAREN,')',1,15134)
LexToken(SEMI,';',1,15135)
LexToken(RBRACE,'}',1,15137)
LexToken(COMMA,',',1,15138)
LexToken(FUNCTION,'function',1,15140)
LexToken(LPAREN,'(',1,15148)
LexToken(ID,'$ctx2',1,15149)
LexToken(RPAREN,')',1,15154)
LexToken(LBRACE,'{',1,15156)
LexToken(ID,'$ctx2',1,15157)
LexToken(PERIOD,'.',1,15162)
LexToken(ID,'fillBlock',1,15163)
LexToken

LexToken(ID,'$1',1,22673)
LexToken(SEMI,';',1,22675)
LexToken(RBRACE,'}',1,22677)
LexToken(COMMA,',',1,22678)
LexToken(FUNCTION,'function',1,22680)
LexToken(LPAREN,'(',1,22688)
LexToken(ID,'$ctx1',1,22689)
LexToken(RPAREN,')',1,22694)
LexToken(LBRACE,'{',1,22696)
LexToken(ID,'$ctx1',1,22697)
LexToken(PERIOD,'.',1,22702)
LexToken(ID,'fill',1,22703)
LexToken(LPAREN,'(',1,22707)
LexToken(ID,'self',1,22708)
LexToken(COMMA,',',1,22712)
LexToken(STRING,'"ifEmpty:"',1,22713)
LexToken(COMMA,',',1,22723)
LexToken(LBRACE,'{',1,22724)
LexToken(ID,'aBlock',1,22725)
LexToken(COLON,':',1,22731)
LexToken(ID,'aBlock',1,22732)
LexToken(RBRACE,'}',1,22738)
LexToken(COMMA,',',1,22739)
LexToken(ID,'$globals',1,22740)
LexToken(PERIOD,'.',1,22748)
LexToken(ID,'Collection',1,22749)
LexToken(RPAREN,')',1,22759)
LexToken(RBRACE,'}',1,22760)
LexToken(RPAREN,')',1,22761)
LexToken(SEMI,';',1,22762)
LexToken(RBRACE,'}',1,22764)
LexToken(COMMA,',',1,22765)
LexToken(ID,'args',1,22767)
LexToken(COLON,':',1,22771)
Lex

LexToken(FUNCTION,'function',1,30224)
LexToken(LPAREN,'(',1,30233)
LexToken(RPAREN,')',1,30234)
LexToken(LBRACE,'{',1,30235)
LexToken(VAR,'var',1,30237)
LexToken(ID,'self',1,30241)
LexToken(EQ,'=',1,30245)
LexToken(THIS,'this',1,30246)
LexToken(SEMI,';',1,30250)
LexToken(RETURN,'return',1,30252)
LexToken(ID,'$core',1,30259)
LexToken(PERIOD,'.',1,30264)
LexToken(ID,'withContext',1,30265)
LexToken(LPAREN,'(',1,30276)
LexToken(FUNCTION,'function',1,30277)
LexToken(LPAREN,'(',1,30285)
LexToken(ID,'$ctx1',1,30286)
LexToken(RPAREN,')',1,30291)
LexToken(LBRACE,'{',1,30293)
LexToken(VAR,'var',1,30295)
LexToken(ID,'$1',1,30299)
LexToken(SEMI,';',1,30301)
LexToken(ID,'$1',1,30303)
LexToken(EQ,'=',1,30305)
LexToken(ID,'$recv',1,30306)
LexToken(LPAREN,'(',1,30311)
LexToken(ID,'self',1,30312)
LexToken(PERIOD,'.',1,30316)
LexToken(ID,'_isEmpty',1,30317)
LexToken(LPAREN,'(',1,30325)
LexToken(RPAREN,')',1,30326)
LexToken(RPAREN,')',1,30327)
LexToken(PERIOD,'.',1,30328)
LexToken(ID,'_not',1,30329)
LexT

LexToken(RBRACE,'}',1,37165)
LexToken(COMMA,',',1,37166)
LexToken(ID,'args',1,37168)
LexToken(COLON,':',1,37172)
LexToken(LBRACKET,'[',1,37174)
LexToken(RBRACKET,']',1,37175)
LexToken(COMMA,',',1,37176)
LexToken(ID,'source',1,37178)
LexToken(COLON,':',1,37184)
LexToken(STRING,'"classTag\\x0a\\x09\\x22Returns a tag or general category for this class.\\x0a\\x09Typically used to help tools do some reflection.\\x0a\\x09Helios, for example, uses this to decide what icon the class should display.\\x22\\x0a\\x09\\x0a\\x09^ \'collection\'"',1,37186)
LexToken(COMMA,',',1,37431)
LexToken(ID,'referencedClasses',1,37433)
LexToken(COLON,':',1,37450)
LexToken(LBRACKET,'[',1,37452)
LexToken(RBRACKET,']',1,37453)
LexToken(COMMA,',',1,37454)
LexToken(ID,'messageSends',1,37456)
LexToken(COLON,':',1,37468)
LexToken(LBRACKET,'[',1,37470)
LexToken(RBRACKET,']',1,37471)
LexToken(RBRACE,'}',1,37473)
LexToken(RPAREN,')',1,37474)
LexToken(COMMA,',',1,37475)
LexToken(ID,'$globals',1,37477)
LexToken(PERIOD,'.',1

LexToken(COMMA,',',1,45592)
LexToken(ID,'$globals',1,45594)
LexToken(PERIOD,'.',1,45602)
LexToken(ID,'IndexableCollection',1,45603)
LexToken(RPAREN,')',1,45622)
LexToken(SEMI,';',1,45623)
LexToken(ID,'$core',1,45625)
LexToken(PERIOD,'.',1,45630)
LexToken(ID,'addMethod',1,45631)
LexToken(LPAREN,'(',1,45640)
LexToken(ID,'$core',1,45642)
LexToken(PERIOD,'.',1,45647)
LexToken(ID,'method',1,45648)
LexToken(LPAREN,'(',1,45654)
LexToken(LBRACE,'{',1,45655)
LexToken(ID,'selector',1,45657)
LexToken(COLON,':',1,45665)
LexToken(STRING,'"indexOf:"',1,45667)
LexToken(COMMA,',',1,45677)
LexToken(ID,'protocol',1,45679)
LexToken(COLON,':',1,45687)
LexToken(STRING,"'accessing'",1,45689)
LexToken(COMMA,',',1,45700)
LexToken(ID,'fn',1,45702)
LexToken(COLON,':',1,45704)
LexToken(FUNCTION,'function',1,45706)
LexToken(LPAREN,'(',1,45715)
LexToken(ID,'anObject',1,45716)
LexToken(RPAREN,')',1,45724)
LexToken(LBRACE,'{',1,45725)
LexToken(VAR,'var',1,45727)
LexToken(ID,'self',1,45731)
LexToken(EQ,'=',1,45735)
L

LexToken(LPAREN,'(',1,54147)
LexToken(LBRACE,'{',1,54148)
LexToken(ID,'each',1,54149)
LexToken(COLON,':',1,54153)
LexToken(ID,'each',1,54154)
LexToken(RBRACE,'}',1,54158)
LexToken(COMMA,',',1,54159)
LexToken(ID,'$ctx1',1,54160)
LexToken(COMMA,',',1,54165)
LexToken(NUMBER,'1',1,54166)
LexToken(RPAREN,')',1,54167)
LexToken(RBRACE,'}',1,54168)
LexToken(RPAREN,')',1,54169)
LexToken(SEMI,';',1,54170)
LexToken(RBRACE,'}',1,54172)
LexToken(RPAREN,')',1,54173)
LexToken(RPAREN,')',1,54174)
LexToken(SEMI,';',1,54175)
LexToken(ID,'$1',1,54177)
LexToken(EQ,'=',1,54179)
LexToken(ID,'associations',1,54180)
LexToken(SEMI,';',1,54192)
LexToken(RETURN,'return',1,54194)
LexToken(ID,'$1',1,54201)
LexToken(SEMI,';',1,54203)
LexToken(RBRACE,'}',1,54205)
LexToken(COMMA,',',1,54206)
LexToken(FUNCTION,'function',1,54208)
LexToken(LPAREN,'(',1,54216)
LexToken(ID,'$ctx1',1,54217)
LexToken(RPAREN,')',1,54222)
LexToken(LBRACE,'{',1,54224)
LexToken(ID,'$ctx1',1,54225)
LexToken(PERIOD,'.',1,54230)
LexToken(ID,'fill

LexToken(STRING,'"anObject"',1,62427)
LexToken(COMMA,',',1,62437)
LexToken(STRING,'"aBlock"',1,62439)
LexToken(RBRACKET,']',1,62447)
LexToken(COMMA,',',1,62448)
LexToken(ID,'source',1,62450)
LexToken(COLON,':',1,62456)
LexToken(STRING,'"keyAtValue: anObject ifAbsent: aBlock\\x0a\\x09^ self indexOf: anObject ifAbsent: aBlock"',1,62458)
LexToken(COMMA,',',1,62546)
LexToken(ID,'referencedClasses',1,62548)
LexToken(COLON,':',1,62565)
LexToken(LBRACKET,'[',1,62567)
LexToken(RBRACKET,']',1,62568)
LexToken(COMMA,',',1,62569)
LexToken(ID,'messageSends',1,62571)
LexToken(COLON,':',1,62583)
LexToken(LBRACKET,'[',1,62585)
LexToken(STRING,'"indexOf:ifAbsent:"',1,62586)
LexToken(RBRACKET,']',1,62605)
LexToken(RBRACE,'}',1,62607)
LexToken(RPAREN,')',1,62608)
LexToken(COMMA,',',1,62609)
LexToken(ID,'$globals',1,62611)
LexToken(PERIOD,'.',1,62619)
LexToken(ID,'AssociativeCollection',1,62620)
LexToken(RPAREN,')',1,62641)
LexToken(SEMI,';',1,62642)
LexToken(ID,'$core',1,62644)
LexToken(PERIOD,'.',1,6264

LexToken(EQ,'=',1,70240)
LexToken(THIS,'this',1,70241)
LexToken(SEMI,';',1,70245)
LexToken(RETURN,'return',1,70247)
LexToken(ID,'$core',1,70254)
LexToken(PERIOD,'.',1,70259)
LexToken(ID,'withContext',1,70260)
LexToken(LPAREN,'(',1,70271)
LexToken(FUNCTION,'function',1,70272)
LexToken(LPAREN,'(',1,70280)
LexToken(ID,'$ctx1',1,70281)
LexToken(RPAREN,')',1,70286)
LexToken(LBRACE,'{',1,70288)
LexToken(ID,'self',1,70290)
LexToken(PERIOD,'.',1,70294)
LexToken(ID,'_subclassResponsibility',1,70295)
LexToken(LPAREN,'(',1,70318)
LexToken(RPAREN,')',1,70319)
LexToken(SEMI,';',1,70320)
LexToken(RETURN,'return',1,70322)
LexToken(ID,'self',1,70329)
LexToken(SEMI,';',1,70333)
LexToken(RBRACE,'}',1,70335)
LexToken(COMMA,',',1,70336)
LexToken(FUNCTION,'function',1,70338)
LexToken(LPAREN,'(',1,70346)
LexToken(ID,'$ctx1',1,70347)
LexToken(RPAREN,')',1,70352)
LexToken(LBRACE,'{',1,70354)
LexToken(ID,'$ctx1',1,70355)
LexToken(PERIOD,'.',1,70360)
LexToken(ID,'fill',1,70361)
LexToken(LPAREN,'(',1,70365)
LexT

LexToken(ID,'protocol',1,78939)
LexToken(COLON,':',1,78947)
LexToken(STRING,"'accessing'",1,78949)
LexToken(COMMA,',',1,78960)
LexToken(ID,'fn',1,78962)
LexToken(COLON,':',1,78964)
LexToken(FUNCTION,'function',1,78966)
LexToken(LPAREN,'(',1,78975)
LexToken(RPAREN,')',1,78976)
LexToken(LBRACE,'{',1,78977)
LexToken(VAR,'var',1,78979)
LexToken(ID,'self',1,78983)
LexToken(EQ,'=',1,78987)
LexToken(THIS,'this',1,78988)
LexToken(SEMI,';',1,78992)
LexToken(RETURN,'return',1,78994)
LexToken(ID,'$core',1,79001)
LexToken(PERIOD,'.',1,79006)
LexToken(ID,'withContext',1,79007)
LexToken(LPAREN,'(',1,79018)
LexToken(FUNCTION,'function',1,79019)
LexToken(LPAREN,'(',1,79027)
LexToken(ID,'$ctx1',1,79028)
LexToken(RPAREN,')',1,79033)
LexToken(LBRACE,'{',1,79035)
LexToken(VAR,'var',1,79037)
LexToken(ID,'$1',1,79041)
LexToken(SEMI,';',1,79043)
LexToken(ID,'$1',1,79045)
LexToken(EQ,'=',1,79047)
LexToken(ID,'$recv',1,79048)
LexToken(LPAREN,'(',1,79053)
LexToken(ID,'self',1,79054)
LexToken(LBRACKET,'[',1,7905

LexToken(RBRACE,'}',1,86677)
LexToken(RPAREN,')',1,86678)
LexToken(SEMI,';',1,86679)
LexToken(RBRACE,'}',1,86681)
LexToken(RPAREN,')',1,86682)
LexToken(RPAREN,')',1,86683)
LexToken(SEMI,';',1,86684)
LexToken(RETURN,'return',1,86686)
LexToken(ID,'$1',1,86693)
LexToken(SEMI,';',1,86695)
LexToken(RBRACE,'}',1,86697)
LexToken(COMMA,',',1,86698)
LexToken(FUNCTION,'function',1,86700)
LexToken(LPAREN,'(',1,86708)
LexToken(ID,'$ctx1',1,86709)
LexToken(RPAREN,')',1,86714)
LexToken(LBRACE,'{',1,86716)
LexToken(ID,'$ctx1',1,86717)
LexToken(PERIOD,'.',1,86722)
LexToken(ID,'fill',1,86723)
LexToken(LPAREN,'(',1,86727)
LexToken(ID,'self',1,86728)
LexToken(COMMA,',',1,86732)
LexToken(STRING,'"removeKey:ifAbsent:"',1,86733)
LexToken(COMMA,',',1,86754)
LexToken(LBRACE,'{',1,86755)
LexToken(ID,'aKey',1,86756)
LexToken(COLON,':',1,86760)
LexToken(ID,'aKey',1,86761)
LexToken(COMMA,',',1,86765)
LexToken(ID,'aBlock',1,86766)
LexToken(COLON,':',1,86772)
LexToken(ID,'aBlock',1,86773)
LexToken(RBRACE,'}',1,8677

LexToken(RBRACE,'}',1,94135)
LexToken(RPAREN,')',1,94136)
LexToken(SEMI,';',1,94137)
LexToken(RBRACE,'}',1,94139)
LexToken(RPAREN,')',1,94140)
LexToken(RPAREN,')',1,94141)
LexToken(SEMI,';',1,94142)
LexToken(ID,'$1',1,94144)
LexToken(EQ,'=',1,94146)
LexToken(ID,'newCollection',1,94147)
LexToken(SEMI,';',1,94160)
LexToken(RETURN,'return',1,94162)
LexToken(ID,'$1',1,94169)
LexToken(SEMI,';',1,94171)
LexToken(RBRACE,'}',1,94173)
LexToken(COMMA,',',1,94174)
LexToken(FUNCTION,'function',1,94176)
LexToken(LPAREN,'(',1,94184)
LexToken(ID,'$ctx1',1,94185)
LexToken(RPAREN,')',1,94190)
LexToken(LBRACE,'{',1,94192)
LexToken(ID,'$ctx1',1,94193)
LexToken(PERIOD,'.',1,94198)
LexToken(ID,'fill',1,94199)
LexToken(LPAREN,'(',1,94203)
LexToken(ID,'self',1,94204)
LexToken(COMMA,',',1,94208)
LexToken(STRING,'"deepCopy"',1,94209)
LexToken(COMMA,',',1,94219)
LexToken(LBRACE,'{',1,94220)
LexToken(ID,'newCollection',1,94221)
LexToken(COLON,':',1,94234)
LexToken(ID,'newCollection',1,94235)
LexToken(RBRACE,'}',

LexToken(ID,'$6',1,102584)
LexToken(RPAREN,')',1,102586)
LexToken(PERIOD,'.',1,102587)
LexToken(ID,'__minus',1,102588)
LexToken(LPAREN,'(',1,102595)
LexToken(ID,'aNumber',1,102596)
LexToken(RPAREN,')',1,102603)
LexToken(SEMI,';',1,102604)
LexToken(ID,'$4',1,102606)
LexToken(EQ,'=',1,102608)
LexToken(ID,'$recv',1,102609)
LexToken(LPAREN,'(',1,102614)
LexToken(ID,'$5',1,102615)
LexToken(RPAREN,')',1,102617)
LexToken(PERIOD,'.',1,102618)
LexToken(ID,'__plus',1,102619)
LexToken(LPAREN,'(',1,102625)
LexToken(LPAREN,'(',1,102626)
LexToken(NUMBER,'1',1,102627)
LexToken(RPAREN,')',1,102628)
LexToken(RPAREN,')',1,102629)
LexToken(SEMI,';',1,102630)
LexToken(ID,'$3',1,102632)
LexToken(EQ,'=',1,102634)
LexToken(ID,'self',1,102635)
LexToken(PERIOD,'.',1,102639)
LexToken(ID,'_copyFrom_to_',1,102640)
LexToken(LPAREN,'(',1,102653)
LexToken(ID,'$4',1,102654)
LexToken(COMMA,',',1,102656)
LexToken(ID,'self',1,102657)
LexToken(PERIOD,'.',1,102661)
LexToken(ID,'_size',1,102662)
LexToken(LPAREN,'(',1,10266

LexToken(COLON,':',1,111060)
LexToken(STRING,'"streamContents:"',1,111062)
LexToken(COMMA,',',1,111079)
LexToken(ID,'protocol',1,111081)
LexToken(COLON,':',1,111089)
LexToken(STRING,"'streaming'",1,111091)
LexToken(COMMA,',',1,111102)
LexToken(ID,'fn',1,111104)
LexToken(COLON,':',1,111106)
LexToken(FUNCTION,'function',1,111108)
LexToken(LPAREN,'(',1,111117)
LexToken(ID,'aBlock',1,111118)
LexToken(RPAREN,')',1,111124)
LexToken(LBRACE,'{',1,111125)
LexToken(VAR,'var',1,111127)
LexToken(ID,'self',1,111131)
LexToken(EQ,'=',1,111135)
LexToken(THIS,'this',1,111136)
LexToken(SEMI,';',1,111140)
LexToken(VAR,'var',1,111142)
LexToken(ID,'stream',1,111146)
LexToken(SEMI,';',1,111152)
LexToken(RETURN,'return',1,111154)
LexToken(ID,'$core',1,111161)
LexToken(PERIOD,'.',1,111166)
LexToken(ID,'withContext',1,111167)
LexToken(LPAREN,'(',1,111178)
LexToken(FUNCTION,'function',1,111179)
LexToken(LPAREN,'(',1,111187)
LexToken(ID,'$ctx1',1,111188)
LexToken(RPAREN,')',1,111193)
LexToken(LBRACE,'{',1,111195

LexToken(THIS,'this',1,119171)
LexToken(SEMI,';',1,119175)
LexToken(RETURN,'return',1,119177)
LexToken(ID,'$core',1,119184)
LexToken(PERIOD,'.',1,119189)
LexToken(ID,'withContext',1,119190)
LexToken(LPAREN,'(',1,119201)
LexToken(FUNCTION,'function',1,119202)
LexToken(LPAREN,'(',1,119210)
LexToken(ID,'$ctx1',1,119211)
LexToken(RPAREN,')',1,119216)
LexToken(LBRACE,'{',1,119218)
LexToken(ID,'self',1,119220)
LexToken(PERIOD,'.',1,119224)
LexToken(ID,'length',1,119225)
LexToken(EQ,'=',1,119232)
LexToken(NUMBER,'0',1,119234)
LexToken(SEMI,';',1,119235)
LexToken(RETURN,'return',1,119237)
LexToken(ID,'self',1,119244)
LexToken(SEMI,';',1,119248)
LexToken(RBRACE,'}',1,119250)
LexToken(COMMA,',',1,119251)
LexToken(FUNCTION,'function',1,119253)
LexToken(LPAREN,'(',1,119261)
LexToken(ID,'$ctx1',1,119262)
LexToken(RPAREN,')',1,119267)
LexToken(LBRACE,'{',1,119269)
LexToken(ID,'$ctx1',1,119270)
LexToken(PERIOD,'.',1,119275)
LexToken(ID,'fill',1,119276)
LexToken(LPAREN,'(',1,119280)
LexToken(ID,'self'

LexToken(LBRACE,'{',1,126408)
LexToken(VAR,'var',1,126410)
LexToken(ID,'$2',1,126414)
LexToken(COMMA,',',1,126416)
LexToken(ID,'$3',1,126417)
LexToken(COMMA,',',1,126419)
LexToken(ID,'$1',1,126420)
LexToken(SEMI,';',1,126422)
LexToken(ID,'$2',1,126424)
LexToken(EQ,'=',1,126426)
LexToken(ID,'self',1,126427)
LexToken(PERIOD,'.',1,126431)
LexToken(ID,'_new_',1,126432)
LexToken(LPAREN,'(',1,126437)
LexToken(LPAREN,'(',1,126438)
LexToken(NUMBER,'3',1,126439)
LexToken(RPAREN,')',1,126440)
LexToken(RPAREN,')',1,126441)
LexToken(SEMI,';',1,126442)
LexToken(ID,'$recv',1,126444)
LexToken(LPAREN,'(',1,126449)
LexToken(ID,'$2',1,126450)
LexToken(RPAREN,')',1,126452)
LexToken(PERIOD,'.',1,126453)
LexToken(ID,'_at_put_',1,126454)
LexToken(LPAREN,'(',1,126462)
LexToken(LPAREN,'(',1,126463)
LexToken(NUMBER,'1',1,126464)
LexToken(RPAREN,')',1,126465)
LexToken(COMMA,',',1,126466)
LexToken(ID,'anObject',1,126467)
LexToken(RPAREN,')',1,126475)
LexToken(SEMI,';',1,126476)
LexToken(ID,'$ctx1',1,126478)
LexT

LexToken(RBRACKET,']',1,133933)
LexToken(COMMA,',',1,133934)
LexToken(ID,'source',1,133936)
LexToken(COLON,':',1,133942)
LexToken(STRING,'"remove: anObject\\x0a\\x09self errorReadOnly"',1,133944)
LexToken(COMMA,',',1,133988)
LexToken(ID,'referencedClasses',1,133990)
LexToken(COLON,':',1,134007)
LexToken(LBRACKET,'[',1,134009)
LexToken(RBRACKET,']',1,134010)
LexToken(COMMA,',',1,134011)
LexToken(ID,'messageSends',1,134013)
LexToken(COLON,':',1,134025)
LexToken(LBRACKET,'[',1,134027)
LexToken(STRING,'"errorReadOnly"',1,134028)
LexToken(RBRACKET,']',1,134043)
LexToken(RBRACE,'}',1,134045)
LexToken(RPAREN,')',1,134046)
LexToken(COMMA,',',1,134047)
LexToken(ID,'$globals',1,134049)
LexToken(PERIOD,'.',1,134057)
LexToken(ID,'CharacterArray',1,134058)
LexToken(RPAREN,')',1,134072)
LexToken(SEMI,';',1,134073)
LexToken(ID,'$core',1,134075)
LexToken(PERIOD,'.',1,134080)
LexToken(ID,'addMethod',1,134081)
LexToken(LPAREN,'(',1,134090)
LexToken(ID,'$core',1,134092)
LexToken(PERIOD,'.',1,134097)
LexT

LexToken(COLON,':',1,143147)
LexToken(LBRACKET,'[',1,143149)
LexToken(RBRACKET,']',1,143150)
LexToken(COMMA,',',1,143151)
LexToken(ID,'messageSends',1,143176)
LexToken(COLON,':',1,143188)
LexToken(LBRACKET,'[',1,143190)
LexToken(RBRACKET,']',1,143191)
LexToken(RBRACE,'}',1,143193)
LexToken(RPAREN,')',1,143194)
LexToken(COMMA,',',1,143195)
LexToken(ID,'$globals',1,143197)
LexToken(PERIOD,'.',1,143205)
LexToken(ID,'String',1,143206)
LexToken(RPAREN,')',1,143212)
LexToken(SEMI,';',1,143213)
LexToken(ID,'$core',1,143215)
LexToken(PERIOD,'.',1,143220)
LexToken(ID,'addMethod',1,143221)
LexToken(LPAREN,'(',1,143230)
LexToken(ID,'$core',1,143232)
LexToken(PERIOD,'.',1,143237)
LexToken(ID,'method',1,143238)
LexToken(LPAREN,'(',1,143244)
LexToken(LBRACE,'{',1,143245)
LexToken(ID,'selector',1,143247)
LexToken(COLON,':',1,143255)
LexToken(STRING,'"asSymbol"',1,143257)
LexToken(COMMA,',',1,143267)
LexToken(ID,'protocol',1,143269)
LexToken(COLON,':',1,143277)
LexToken(STRING,"'converting'",1,143279)

LexToken(LPAREN,'(',1,153749)
LexToken(ID,'$core',1,153751)
LexToken(PERIOD,'.',1,153756)
LexToken(ID,'method',1,153757)
LexToken(LPAREN,'(',1,153763)
LexToken(LBRACE,'{',1,153764)
LexToken(ID,'selector',1,153766)
LexToken(COLON,':',1,153774)
LexToken(STRING,'"isVowel"',1,153776)
LexToken(COMMA,',',1,153785)
LexToken(ID,'protocol',1,153787)
LexToken(COLON,':',1,153795)
LexToken(STRING,"'testing'",1,153797)
LexToken(COMMA,',',1,153806)
LexToken(ID,'fn',1,153808)
LexToken(COLON,':',1,153810)
LexToken(FUNCTION,'function',1,153812)
LexToken(LPAREN,'(',1,153821)
LexToken(RPAREN,')',1,153822)
LexToken(LBRACE,'{',1,153823)
LexToken(VAR,'var',1,153825)
LexToken(ID,'self',1,153829)
LexToken(EQ,'=',1,153833)
LexToken(THIS,'this',1,153834)
LexToken(SEMI,';',1,153838)
LexToken(RETURN,'return',1,153895)
LexToken(ID,'$core',1,153902)
LexToken(PERIOD,'.',1,153907)
LexToken(ID,'withContext',1,153908)
LexToken(LPAREN,'(',1,153919)
LexToken(FUNCTION,'function',1,153920)
LexToken(LPAREN,'(',1,153928)
Lex

LexToken(COMMA,',',1,165735)
LexToken(STRING,'"="',1,165737)
LexToken(COMMA,',',1,165740)
LexToken(STRING,'"+"',1,165742)
LexToken(COMMA,',',1,165745)
LexToken(STRING,'"copyFrom:to:"',1,165747)
LexToken(RBRACKET,']',1,165761)
LexToken(RBRACE,'}',1,165763)
LexToken(RPAREN,')',1,165764)
LexToken(COMMA,',',1,165765)
LexToken(ID,'$globals',1,165767)
LexToken(PERIOD,'.',1,165775)
LexToken(ID,'String',1,165776)
LexToken(RPAREN,')',1,165782)
LexToken(SEMI,';',1,165783)
LexToken(ID,'$core',1,165785)
LexToken(PERIOD,'.',1,165790)
LexToken(ID,'addMethod',1,165791)
LexToken(LPAREN,'(',1,165800)
LexToken(ID,'$core',1,165802)
LexToken(PERIOD,'.',1,165807)
LexToken(ID,'method',1,165808)
LexToken(LPAREN,'(',1,165814)
LexToken(LBRACE,'{',1,165815)
LexToken(ID,'selector',1,165817)
LexToken(COLON,':',1,165825)
LexToken(STRING,'"lines"',1,165827)
LexToken(COMMA,',',1,165834)
LexToken(ID,'protocol',1,165836)
LexToken(COLON,':',1,165844)
LexToken(STRING,"'split join'",1,165846)
LexToken(COMMA,',',1,165858)

LexToken(RBRACE,'}',1,177308)
LexToken(RPAREN,')',1,177309)
LexToken(COMMA,',',1,177310)
LexToken(ID,'$globals',1,177312)
LexToken(PERIOD,'.',1,177320)
LexToken(ID,'String',1,177321)
LexToken(RPAREN,')',1,177327)
LexToken(SEMI,';',1,177328)
LexToken(ID,'$core',1,177330)
LexToken(PERIOD,'.',1,177335)
LexToken(ID,'addMethod',1,177336)
LexToken(LPAREN,'(',1,177345)
LexToken(ID,'$core',1,177347)
LexToken(PERIOD,'.',1,177352)
LexToken(ID,'method',1,177353)
LexToken(LPAREN,'(',1,177359)
LexToken(LBRACE,'{',1,177360)
LexToken(ID,'selector',1,177362)
LexToken(COLON,':',1,177370)
LexToken(STRING,'"trimBoth"',1,177372)
LexToken(COMMA,',',1,177382)
LexToken(ID,'protocol',1,177384)
LexToken(COLON,':',1,177392)
LexToken(STRING,"'regular expressions'",1,177394)
LexToken(COMMA,',',1,177415)
LexToken(ID,'fn',1,177417)
LexToken(COLON,':',1,177419)
LexToken(FUNCTION,'function',1,177421)
LexToken(LPAREN,'(',1,177430)
LexToken(RPAREN,')',1,177431)
LexToken(LBRACE,'{',1,177432)
LexToken(VAR,'var',1,177434)

LexToken(RPAREN,')',1,188224)
LexToken(LBRACE,'{',1,188225)
LexToken(VAR,'var',1,188227)
LexToken(ID,'self',1,188231)
LexToken(EQ,'=',1,188235)
LexToken(THIS,'this',1,188236)
LexToken(SEMI,';',1,188240)
LexToken(RETURN,'return',1,188297)
LexToken(ID,'$core',1,188304)
LexToken(PERIOD,'.',1,188309)
LexToken(ID,'withContext',1,188310)
LexToken(LPAREN,'(',1,188321)
LexToken(FUNCTION,'function',1,188322)
LexToken(LPAREN,'(',1,188330)
LexToken(ID,'$ctx1',1,188331)
LexToken(RPAREN,')',1,188336)
LexToken(LBRACE,'{',1,188338)
LexToken(RETURN,'return',1,188363)
LexToken(ID,'String',1,188370)
LexToken(PERIOD,'.',1,188376)
LexToken(ID,'fromCharCode',1,188377)
LexToken(LPAREN,'(',1,188389)
LexToken(ID,'anInteger',1,188390)
LexToken(RPAREN,')',1,188399)
LexToken(SEMI,';',1,188400)
LexToken(RETURN,'return',1,188402)
LexToken(ID,'self',1,188409)
LexToken(SEMI,';',1,188413)
LexToken(RBRACE,'}',1,188470)
LexToken(COMMA,',',1,188471)
LexToken(FUNCTION,'function',1,188473)
LexToken(LPAREN,'(',1,188481)
Le

LexToken(RETURN,'return',1,200102)
LexToken(ID,'self',1,200109)
LexToken(SEMI,';',1,200113)
LexToken(RBRACE,'}',1,200170)
LexToken(COMMA,',',1,200171)
LexToken(FUNCTION,'function',1,200173)
LexToken(LPAREN,'(',1,200181)
LexToken(ID,'$ctx1',1,200182)
LexToken(RPAREN,')',1,200187)
LexToken(LBRACE,'{',1,200189)
LexToken(ID,'$ctx1',1,200190)
LexToken(PERIOD,'.',1,200195)
LexToken(ID,'fill',1,200196)
LexToken(LPAREN,'(',1,200200)
LexToken(ID,'self',1,200201)
LexToken(COMMA,',',1,200205)
LexToken(STRING,'"add:in:"',1,200206)
LexToken(COMMA,',',1,200215)
LexToken(LBRACE,'{',1,200216)
LexToken(ID,'anObject',1,200217)
LexToken(COLON,':',1,200225)
LexToken(ID,'anObject',1,200226)
LexToken(COMMA,',',1,200234)
LexToken(ID,'anotherObject',1,200235)
LexToken(COLON,':',1,200248)
LexToken(ID,'anotherObject',1,200249)
LexToken(RBRACE,'}',1,200262)
LexToken(COMMA,',',1,200263)
LexToken(ID,'$globals',1,200264)
LexToken(PERIOD,'.',1,200272)
LexToken(ID,'Set',1,200273)
LexToken(RPAREN,')',1,200276)
LexToke

KeyboardInterrupt: 

In [None]:
# Sampling
sample_path = '/home/irteam/users/data/150kJavaScript/samples.txt'
with open(sample_path, 'w') as f:
    # Set intial hidden ane memory states
    state = (Variable(torch.zeros(num_layers, 1, hidden_size)).cuda(),
         Variable(torch.zeros(num_layers, 1, hidden_size)).cuda())

    # Select one word id randomly
    prob = torch.ones(vocab_size)
    input = Variable(torch.multinomial(prob, num_samples=1).unsqueeze(1),
                     volatile=True).cuda()

    for i in range(num_samples):
        # Forward propagate rnn 
        output, state = model(input, state)
        
        # Sample a word id
        prob = output.squeeze().data.exp().cpu()
        word_id = torch.multinomial(prob, 1)[0]
        
        # Feed sampled word id to next time step
        input.data.fill_(word_id)
        
        # File write
        word = corpus.dictionary.idx2word[word_id]
        word = '\n' if word == '<eos>' else word + ' '
        f.write(word)

        if (i+1) % 100 == 0:
            print('Sampled [%d/%d] words and save to %s'%(i+1, num_samples, sample_path))

In [None]:
# Testing
for epoch in range(1):
#     states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda(),
#               Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda())
    batch.next_epoch(batch_size)
    batch.initialize_states(num_layers, hidden_size)
    step=0
    while(batch.epoch_end==0):
        step+=1
        # update the minibatch inputs / outputs
        batch.get_minibatch(0)
        inputs_np = np.array([vocab.word_list_to_idx_list(line) for
                             line in batch.batch_in],dtype=int)
        targets_np = np.array([vocab.word_list_to_idx_list(line) for
                             line in batch.batch_in],dtype=int)
        inputs = Variable(torch.LongTensor(inputs_np)).cuda()
        targets = Variable(torch.LongTensor(targets_np)).cuda()
        
        outputs, states = model(inputs,batch.states)
        batch.states = detach(states)
        break

In [None]:
out = outputs.view(batch_size,seq_length,-1).max(2)[1]

In [None]:
input

In [None]:
for i in range(len(targets)):
    line0 = inputs[i]
    line1 = targets[i]
    line2 = out[i]
    print(''.join(vocab.idx_list_to_word_list(line0.cpu().data.numpy())))    
    print(''.join(vocab.idx_list_to_word_list(line1.cpu().data.numpy())))
    print(''.join(vocab.idx_list_to_word_list(line2.cpu().data.numpy())))
    print('\n\n')

In [None]:
inputs_np = ids.cpu().numpy()
targets_np = targets.cpu().data.numpy()
predicted_np = predicted_outs.cpu().data.numpy()

In [None]:
for i in range(targets.size(0)):
    print(vocab.idx_list_to_word_list(inputs_np[i]))
    print(vocab.idx_list_to_word_list(predicted_np[i]))
    print(vocab.idx_list_to_word_list(targets_np[i]))
    print('\n')