In [1]:
%load_ext autoreload

In [2]:
from config import Config
from pprint import pprint, pformat
from logger import model_logger
log = model_logger.getLogger('main')
log.setLevel(Config.Log.MODEL.level)

2018-01-29 11:27:47,145:root:INFO   :           getLogger:: creating logger for main under MODEL


In [3]:
%autoreload 2
from trainer import Trainer, Feeder, Predictor
from datafeed import DataFeed
from torch import nn, optim
from torch.nn import functional as F
from torch.autograd import Variable
import torch

2018-01-29 11:27:47,211:root:INFO   :           getLogger:: creating logger for main under TRAINER
2018-01-29 11:27:47,428:root:INFO   :           getLogger:: creating logger for main under DATAFEED


In [4]:
import csv
train_dataset = csv.reader(open('dataset/train.csv'))
test_dataset = csv.reader(open('dataset/test.csv'))

In [5]:
from collections import namedtuple
Sample = namedtuple('Sample', ['id','comment_text',
                               'toxic','severe_toxic','obscene',
                               'threat','insult','identity_hate'])

### Unicode to ascii text

In [6]:
import unicodedata
train_datapoints = []
for i in list(train_dataset)[1:]:
    _id, c, t, st, o, t, ins, ih = i
    t, st, o, t, ins, ih = (int(_) for _ in [t, st, o, t, ins, ih])
    c = unicodedata.normalize('NFD', c).encode('ascii','ignore').decode()
    train_datapoints.append(Sample(_id, c, t, st, o, t, ins, ih))

test_datapoints = []
for i in list(test_dataset)[1:]:
    _id, c = i
    c = unicodedata.normalize('NFD', c).encode('ascii','ignore').decode()
    test_datapoints.append(Sample(_id, c, 0, 0, 0, 0, 0, 0))

len(train_datapoints), len(test_datapoints)

(159571, 153164)

In [7]:
#train_datapoints = train_datapoints[:1000]

In [8]:
test_datapoints[1000:1010]

[Sample(id='01ac9982edae9977', comment_text='" \n\n Dear ,Welcome to Wikipedia!Unfortunately, using your e-mail address as your username is not a good idea. Wikipedia content is extensively copied and the site itself is one of the most visited sites in the world. Any edit you make on Wikipedia will have your username attached to it, and using your email address will make you a tempting target for spammers. We recommend that you change your username at Wikipedia:Changing username in order to prevent abuse.If you need any help, simply contact me on my talk page, or go to Wikipedia:Help desk. Another option is to place  on your own talk page, and someone will come shortly to help. Remember to sign your posts on talk pages with four tildes (~~~~). Again, welcome! -  at "', toxic=0, severe_toxic=0, obscene=0, threat=0, insult=0, identity_hate=0),
 Sample(id='01ac9ea1271d7409', comment_text='Asshole, your dirty MF people tries to steal our glory and heavy persian (tajik) history by claiming 

## Build vocabulary

#### buils INPUT_VOCAB

In [9]:
%timeit
from nltk import word_tokenize
from tqdm import tqdm
datapoints = train_datapoints
INPUT_VOCAB = [word for dp in tqdm(datapoints) for word in word_tokenize(dp.comment_text)]
INPUT_VOCAB = list(set(INPUT_VOCAB))
len(INPUT_VOCAB)

100%|██████████| 159571/159571 [01:28<00:00, 1800.43it/s]


299974

In [10]:
INPUT_VOCAB[:10]

['insane',
 'ass/face',
 '*t',
 'FLUID',
 'members/archive1',
 'cut-out.',
 "'TS",
 'PED-OH-PHILE',
 'Plainlist',
 'AMAZING.']

In [11]:
OUTPUT_VOCAB = ['toxic','severe_toxic','obscene', 'threat','insult','identity_hate']
INPUT_VOCAB = ['<<PAD>>', '<<UNK>>'] + list(set(INPUT_VOCAB + OUTPUT_VOCAB))

#### builds WORD_INDEX

In [12]:
WORD_INDEX = {w: i for i, w in enumerate(INPUT_VOCAB)}
OUTPUT_IDS = [WORD_INDEX[i] for i in OUTPUT_VOCAB]

In [13]:
sorted(list(WORD_INDEX.items()), key=lambda x: x[1])[:10], WORD_INDEX['<<PAD>>'], INPUT_VOCAB[0]

([('<<PAD>>', 0),
  ('<<UNK>>', 1),
  ('insane', 2),
  ('FLUID', 3),
  ('ass/face', 4),
  ('*t', 5),
  ('members/archive1', 6),
  ('cut-out.', 7),
  ("'TS", 8),
  ('PED-OH-PHILE', 9)],
 0,
 '<<PAD>>')

## tests INPUTVOCAB and WORD_INDEX mapping

In [14]:
import random
_i = train_datapoints[random.choice(range(len(train_datapoints)))]
print(_i.comment_text)
print("""


""")
print(
      ' '.join( [INPUT_VOCAB[i] for i in 
                 [WORD_INDEX[j] for j in word_tokenize(_i.comment_text)]]
              )
     )

Note to Eclecticology: Hum, you just brought to my attention a naming conflict for two cities named Paris over at List of places and things named Paris. Are their other such conflicts within Canada? And if so are most of these between Ontario and the rest of Canada? The city naming convention was debated and worked out on the wikipedia mailing list and in several other places  it would have been nice to know of any possible systemic internal naming conflicts within Canada. Maybe this is one of the few exceptions that we can disambiguate on a case-by-case basis What is the population of each of these Canadian Paris? Is it at all likely that either of these will become encyclopedia articles? maveric149




Note to Eclecticology : Hum , you just brought to my attention a naming conflict for two cities named Paris over at List of places and things named Paris . Are their other such conflicts within Canada ? And if so are most of these between Ontario and the rest of Canada ? The city namin

## Baseline model

In [15]:
class Model(nn.Module):
    def __init__(self, Config, input_vocab_size, output_vocab_size):
        super(Model, self).__init__()
        self.input_vocab_size = input_vocab_size
        self.output_vocab_size = output_vocab_size
        self.hidden_dim = Config.hidden_dim

        self.embed = nn.Embedding(self.input_vocab_size, self.hidden_dim)
        self.encode = nn.GRUCell(self.hidden_dim, self.hidden_dim)

        self.classify = [nn.Linear(self.hidden_dim, 2)
                         for i in range (self.output_vocab_size)]

        self.log = model_logger.getLogger('model')
        self.log.setLevel(logging.INFO)
        if Config.cuda:
            self.cuda()
            [i.cuda() for i in self.classify]
        
    def init_hidden(self, batch_size):
        ret = torch.zeros(batch_size, self.hidden_dim)
        if Config().cuda: ret = ret.cuda()
        return Variable(ret)
    
    def forward(self, seq):
        seq = Variable(torch.LongTensor(seq))
        if Config().cuda: seq = seq.cuda()
        batch_size = seq.size()[0]
        self.log.debug('{} seq size: {}'.format(type(seq.data), seq.size()))
        seq_emb = self.embed(seq).transpose(1,0)
        output = self.init_hidden(batch_size)
        for token_emb in seq_emb:
            self.log.debug('token_emb := {}'.format(token_emb))
            self.log.debug('output := {}'.format(output))
            output = self.encode(token_emb, output)
                    
        self.log.debug('output := {}'.format(output))
    
        ret = torch.stack([F.softmax(classify(output), dim=-1) 
                           for classify in self.classify])
        self.log.debug('ret := {}'.format(ret))

        self.log.debug('ret size: {}'.format(ret.size()))

        return ret

### Batching utils

In [16]:
import numpy as np
def seq_maxlen(seqs):
    return max([len(seq) for seq in seqs])

PAD = WORD_INDEX[INPUT_VOCAB[0]]
print(PAD)
def pad_seq(seqs, maxlen=0, PAD=PAD):
    if type(seqs[0]) == type([]):
        maxlen = maxlen if maxlen else seq_maxlen(seqs)
        def pad_seq_(seq):
            return seq + [PAD]*(maxlen-len(seq))
        seqs = [ pad_seq_(seq) for seq in seqs ]
    return seqs

def batchop(datapoints, *args, **kwargs):
    indices = [d.id for d in datapoints]
    seq   = pad_seq([ [WORD_INDEX[w] for w in word_tokenize(d.comment_text)]
                     for d in datapoints])
    target = [(d.toxic, d.severe_toxic, d.obscene, d.threat, d.insult, d.identity_hate)
              for d in datapoints]
    seq, target = np.array(seq), np.array(target)
    return indices, (seq, ), (target,)

0


## Attention based model

In [56]:
class AttModel(nn.Module):
    def __init__(self, Config, input_vocab_size, output_vocab_size):
        super(AttModel, self).__init__()
        self.input_vocab_size = input_vocab_size
        self.output_vocab_size = output_vocab_size
        self.hidden_dim = Config.hidden_dim

        self.embed = nn.Embedding(self.input_vocab_size, self.hidden_dim)
        self.encode = nn.GRUCell(self.hidden_dim, self.hidden_dim)
        self.attend = nn.Parameter(torch.FloatTensor(self.hidden_dim, self.hidden_dim))

        self.classify = nn.Linear(self.hidden_dim, 2)
        self.log = model_logger.getLogger('model')
        self.size_log = self.log.getLogger('size')
        self.log.setLevel(logging.DEBUG)
        self.size_log.setLevel(logging.DEBUG)
        if Config.cuda:
            self.cuda()
            
    def logsize(self, tensor, name=''):
        self.size_log.debug('{} <- {}'.format(tensor.size(), name))
        
    def init_hidden(self, batch_size):
        ret = torch.zeros(batch_size, self.hidden_dim)
        if Config().cuda: ret = ret.cuda()
        return Variable(ret)
    
    def forward(self, seq, classes=OUTPUT_IDS):
        seq = Variable(torch.LongTensor(seq))
        classes = Variable(torch.LongTensor(classes))
        if Config().cuda: 
            seq = seq.cuda()
            classes = classes.cuda()
            
        batch_size = seq.size()[0]
        self.log.debug('{} seq size: {}'.format(type(seq.data), seq.size()))
        seq_emb = self.embed(seq).transpose(1,0)                  ;self.logsize(seq_emb, 'seq_emb')
        seq_repr = []
        output = self.init_hidden(batch_size)                     ;self.logsize(output, 'output')
        for token_emb in seq_emb:
            self.log.debug('token_emb := {}'.format(token_emb))
            self.log.debug('output := {}'.format(output))
            output = self.encode(token_emb, output)               ;self.logsize(output, 'output')
            seq_repr.append(output)

        seq_repr = torch.stack(seq_repr).transpose(1,0)           ;self.logsize(seq_repr, 'seq_repr')
        outputs = []
        attend = self.attend
        self.logsize(attend, 'attend')
        for class_ in classes:
            class_emb = self.embed(class_)                  ;self.logsize(class_emb, 'class_emb')
            attn = torch.mm(class_emb, attend)        ;self.logsize(attn, 'attn')
            #attn = attn.expand_as(seq_repr) ;self.logsize(attn, 'attn')
            attended_outputs = torch.bmm(attn.expand(seq_repr.size()[0], *attn.size()), seq_repr.transpose(1,2))                
        
            self.logsize(attended_outputs, 'attended_outputs')
            self.log.debug('{}'.format(attended_outputs))
            output = attended_outputs.transpose(-1, -2).expand_as * seq_repr
            self.logsize(output, 'output')

            output = output.sum(0).squeeze()                ;self.logsize(output, 'output')
            output = self.classify(output)                  ;self.logsize(output, 'output')
            output = F.softmax(output)
            outputs.append(output)
            
        ret = torch.stack(outputs)
        return ret

## Loss and accuracy function

In [18]:
def loss(output, target, loss_function=nn.NLLLoss(), *args, **kwargs):
    loss = 0
    target = Variable(torch.LongTensor(target[0]))
    if Config().cuda: target = target.cuda()
    output = output.transpose(1,0)
    batch_size = output.size()[0]
    for i, t in zip(output, target):
        log.debug('i, o sizes: {} {}'.format(i, t))
        loss += loss_function(i, t.squeeze()).mean()
        log.debug('loss size: {}'.format(loss))

    return loss/batch_size

def accuracy(output, target, *args, **kwargs):
    accuracy = 0
    target = Variable(torch.LongTensor(target[0]))
    if Config().cuda: target = target.cuda()
    output = output.transpose(1,0)
    batch_size = output.size()[0]
    class_size = output.size()[1]    
    
    for i, t in zip(output, target):
        correct = (i.max(dim=1)[1] == t).sum()
        accuracy += correct/class_size
        
    return (accuracy/batch_size).data[0]
    

### repr_function to build human readable output from model

In [19]:
from IPython.display import HTML
from IPython.display import display
def repr_function(output, feed, batch_index):
    results = []
    output = output.transpose(1,0)
    indices, (seq,), (classes,) = feed.nth_batch(batch_index)
    print(output.size(), len(indices), len(seq), len(classes))
    for i, o, s, c in zip(indices, output, seq, classes):
        orig_s = feed.data_dict[i].comment_text
        s = [INPUT_VOCAB[i] for i in s]
        s = ' '.join(s)
        results.append([orig_s, s] + list(c))
        o = o.max(dim=1)[1]
        results.append([' ', '  '] + o.data.cpu().numpy().tolist())
    return results

In [20]:
_train_datapoints = train_datapoints

In [21]:
import logging
log.setLevel(logging.INFO)

## Experiment on model 1

In [23]:
import random
def  experiment(epochs=10, checkpoint=1):
    model =  Model(Config(), len(INPUT_VOCAB), len(OUTPUT_VOCAB))
    if Config().cuda:  model = model.cuda()
        
    split_index = int( len(train_datapoints) * 0.85 )
    train_feed = DataFeed(train_datapoints[:split_index], batchop=batchop, batch_size=128)
    test_feed = DataFeed(train_datapoints[split_index:], batchop=batchop, batch_size=120)

    trainer = Trainer(model=model, loss_function=loss, accuracy_function=accuracy, 
                    checkpoint=checkpoint, epochs=epochs,
                    feeder = Feeder(train_feed, test_feed))

    predictor = Predictor(model=model, repr_function=repr_function, feed=test_feed)
    output, results = predictor.predict(random.choice(range(test_feed.num_batch)))

    for e in range(10000):
        output, results = predictor.predict(random.choice(range(test_feed.num_batch)))
        display(HTML(results._repr_html_()))
        trainer.train()
        
experiment()

2018-01-29 12:31:33,034:root:INFO   :           getLogger:: creating logger for model under MODEL


torch.Size([120, 6, 2]) 120 120 120
torch.Size([120, 6, 2]) 120 120 120


0,1,2,3,4,5,6,7
"""  What's your opinion on professional wrestling? Out of interest. Preceding unsigned comment added by (talk contribs) """,`` What 's your opinion on professional wrestling ? Out of interest . Preceding unsigned comment added by ( talk contribs ) `` <> <> <> <> <> <> <> <> <> <> <> <> <> <>,0,0,0,0,0,0
,,1,0,0,0,0,0
Looks like they have a real annoying javascript script to try and force you to use their frames. One less site I'll ever visit.,Looks like they have a real annoying javascript script to try and force you to use their frames . One less site I 'll ever visit . <> <> <> <> <> <> <> <> <> <>,0,0,0,0,0,0
,,1,0,0,0,0,0
They are known as Palestinian because of Arab propaganda. Real Palestinians are Jews. Arab Palestinians are frauds from Arabia.,They are known as Palestinian because of Arab propaganda . Real Palestinians are Jews . Arab Palestinians are frauds from Arabia . <> <> <> <> <> <> <> <> <> <> <> <> <> <> <>,0,0,0,0,0,0
,,1,0,0,0,0,0
""" Alright, and thank you. I do believe I shall, when you do get to sleep yourself, I hope you have a good one. dlus Contribs ""","`` Alright , and thank you . I do believe I shall , when you do get to sleep yourself , I hope you have a good one . dlus Contribs `` <> <> <> <> <>",0,0,0,0,0,0
,,1,0,0,0,0,0
"Your edits to Tourette syndrome Please stop. If you continue to vandalize Wikipedia, you will be blocked from editing. (tc)","Your edits to Tourette syndrome Please stop . If you continue to vandalize Wikipedia , you will be blocked from editing . ( tc ) <> <> <> <> <> <> <> <> <> <> <> <>",0,0,0,0,0,0
,,1,0,0,0,0,0


2018-01-29 12:31:33,569:TRAINER.main:CRITICAL:               train:: memory consumed : 2812436480

  0%|          | 0/1059 [00:00<?, ?it/s][A
100%|██████████| 1059/1059 [58:22<00:00, 297.84s/it]
2018-01-29 13:29:56,286:TRAINER.main:INFO   :               train:: -- 0 -- loss: -0.8868265748023987
100%|██████████| 199/199 [00:25<00:00,  6.78it/s]
2018-01-29 13:30:21,722:TRAINER.main:INFO   : do_every_checkpoint:: -- 0 -- loss: -0.9059795141220093, accuracy: 0
2018-01-29 13:30:21,725:TRAINER.main:CRITICAL:               train:: memory consumed : 11399782400
100%|██████████| 1059/1059 [1:00:07<00:00,  2.82it/s] 
2018-01-29 14:30:29,678:TRAINER.main:INFO   :               train:: -- 1 -- loss: -0.9176936745643616
100%|██████████| 199/199 [00:38<00:00,  4.71it/s]
2018-01-29 14:31:07,759:TRAINER.main:INFO   : do_every_checkpoint:: -- 1 -- loss: -0.9556130766868591, accuracy: 0
2018-01-29 14:31:07,762:TRAINER.main:CRITICAL:               train:: memory consumed : 11471265792
100%|██████████| 

torch.Size([120, 6, 2]) 120 120 120


0,1,2,3,4,5,6,7
"Since the section had already been flagged for a bit, I went ahead with the move. I will not protest an undo, but I feel strongly that this note does not belong in this article. PERHAPS is belongs in another Wikipedia article, such as [carbon footprint] where I moved it.","Since the section had already been flagged for a bit , I went ahead with the move . I will not protest an undo , but I feel strongly that this note does not belong in this article . PERHAPS is belongs in another Wikipedia article , such as [ carbon footprint ] where I moved it . <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0
"Congratulations on making the news - and it wasn't for riding the baggage carousel (you can only do that if you look like cow luggage, or even cow luggage). BTW - you look like my hubby, only hubby is rather greyer. I don't know whether this is a good thing or a bad thing.","Congratulations on making the news - and it was n't for riding the baggage carousel ( you can only do that if you look like cow luggage , or even cow luggage ) . BTW - you look like my hubby , only hubby is rather greyer . I do n't know whether this is a good thing or a bad thing . <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0
"Hola ,mi llamo Holli and this is my wiki im new to wiki and don't really know what the.... ummm never mind I'm supposed to do and how to do it! so as you can see I'm probably screwed! ? -holli1213  . . 1-4-10  ~","Hola , mi llamo Holli and this is my wiki im new to wiki and do n't really know what the ... . ummm never mind I 'm supposed to do and how to do it ! so as you can see I 'm probably screwed ! ? -holli1213 . . 1-4-10 ~ <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0
"Yerevan Thermal Power Plant While your contributions to Wikipedia are appreciated, copyrighted material is not. Please do not reinstate copyrighted text (text copied from other websites) at the Yerevan Thermal Power Plant or I will block you from further editing. Thanks,","Yerevan Thermal Power Plant While your contributions to Wikipedia are appreciated , copyrighted material is not . Please do not reinstate copyrighted text ( text copied from other websites ) at the Yerevan Thermal Power Plant or I will block you from further editing . Thanks , <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0
"Stop. If you continue to vandalize pages, as you did to Moi dix Mois, you will be blocked from editing Wikipedia. Removing warnings from your talk page does not erase their existence. If you continue to hide evidence of your mischievous behavior, you will be blocked.","Stop . If you continue to vandalize pages , as you did to Moi dix Mois , you will be blocked from editing Wikipedia . Removing warnings from your talk page does not erase their existence . If you continue to hide evidence of your mischievous behavior , you will be blocked . <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0


2018-01-29 22:57:14,881:TRAINER.main:CRITICAL:               train:: memory consumed : 13014331392
  4%|▍         | 44/1059 [04:08<1:43:04,  6.09s/it]

KeyboardInterrupt: 

## Experiment on model using attention

In [None]:
import random
def  experiment(epochs=1, checkpoint=1):
    model =  AttModel(Config(), len(INPUT_VOCAB), len(OUTPUT_VOCAB))
    if Config().cuda:  model = model.cuda()
        
    split_index = int( len(train_datapoints) * 0.85 )
    train_feed = DataFeed(train_datapoints[:split_index], batchop=batchop, batch_size=128)
    test_feed = DataFeed(train_datapoints[split_index:], batchop=batchop, batch_size=120)

    trainer = Trainer(model=model, loss_function=loss, accuracy_function=accuracy, 
                    checkpoint=checkpoint, epochs=epochs,
                    feeder = Feeder(train_feed, test_feed))

    predictor = Predictor(model=model, repr_function=repr_function, feed=test_feed)
    output, results = predictor.predict(random.choice(range(test_feed.num_batch)))

    for e in range(10000):
        output, results = predictor.predict(random.choice(range(test_feed.num_batch)))
        display(HTML(results._repr_html_()))
        trainer.train()
        
experiment()

2018-01-29 23:32:56,479:root:INFO   :           getLogger:: creating logger for model under MODEL
2018-01-29 23:32:56,480:root:INFO   :           getLogger:: creating logger for size under MODEL.model


torch.Size([193, 6, 2]) 120 120 120
torch.Size([29, 6, 2]) 120 120 120


0,1,2,3,4,5,6,7
speedy deletion I began and now it is up for deletion as advertising- wikipedia is crazy,speedy deletion I began and now it is up for deletion as advertising- wikipedia is crazy <> <> <> <> <> <> <> <> <> <> <> <> <>,0,0,0,0,0,0
,,0,0,0,0,0,0
"""== Mangalore == Please read Talk:Mangalore#Requested_move_18_October_2014. talk to me """,`` == Mangalore == Please read Talk : Mangalore # Requested_move_18_October_2014 . talk to me `` <> <> <> <> <> <> <> <> <> <> <> <> <>,0,0,0,0,0,0
,,0,0,0,0,0,0
"Yes you are correct, she is queen by name only. Were arguing on a issue we both agree on.","Yes you are correct , she is queen by name only . Were arguing on a issue we both agree on . <> <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0
there is no reason to inform the reader of such information: it is not critical info. Your,there is no reason to inform the reader of such information : it is not critical info . Your <> <> <> <> <> <> <> <> <> <>,0,0,0,0,0,0
,,0,0,0,0,0,0
"September 2005 (UTC) Yeah, the logo was a nice addition. Keep up the good work. 03:01, 3","September 2005 ( UTC ) Yeah , the logo was a nice addition . Keep up the good work . 03:01 , 3 <> <> <> <> <> <>",0,0,0,0,0,0
,,0,0,0,0,0,0


2018-01-29 23:32:57,839:TRAINER.main:CRITICAL:               train:: memory consumed : 16855322624

  0%|          | 0/1059 [00:00<?, ?it/s][A
  0%|          | 1/1059 [00:00<12:19,  1.43it/s][A
  0%|          | 2/1059 [00:01<12:35,  1.40it/s][A
  0%|          | 3/1059 [00:02<12:53,  1.37it/s][A
  0%|          | 4/1059 [00:03<13:09,  1.34it/s][A
  0%|          | 5/1059 [00:03<12:55,  1.36it/s][A
  1%|          | 6/1059 [00:04<13:11,  1.33it/s][A
  1%|          | 7/1059 [00:05<12:56,  1.35it/s][A
  1%|          | 8/1059 [00:05<13:08,  1.33it/s][A
  1%|          | 9/1059 [00:06<13:06,  1.34it/s][A
  1%|          | 10/1059 [00:07<13:30,  1.29it/s][A
  1%|          | 11/1059 [00:08<13:16,  1.32it/s][A
  1%|          | 12/1059 [00:09<13:14,  1.32it/s][A
  5%|▍         | 50/1059 [00:40<15:55,  1.06it/s]

In [None]:
dummy_feed = DataFeed(train_datapoints[:100], batchop=batchop, batch_size=1)
indices, (seq,), (target,) = dummy_feed.nth_batch(random.choice(range(dummy_feed.num_batch)))
print(dummy_feed.data_dict[indices[0]])
print([INPUT_VOCAB[i] for i in seq[0]])