In [1]:
from ola_cb import * 
from ola_RNN import * 
from ola_models import * 

import os, time, copy, math, re, json, pickle, random
import numpy as np
import pandas as pd

import torch, torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.ticker as ticker

from functools import partial 

cuda_available = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda_available else "cpu")
print(f'''using device {device}''')

path = !pwd
path = path[0]
print(path)

using device cuda:0
/home/r2/Documents/RNNexp


In [168]:
def unpad(x,y,hidden):
    idx = (y != 0).nonzero()    
    if idx.shape[0] == 1: idx = idx[0]
    else: idx = idx.squeeze()
    if len(hidden.shape) > 2: return x[idx],y[idx],hidden[:,idx]
    return x[idx],y[idx],hidden[idx]

class SGRU(nn.Module):
    def __init__(self, in_sz, hd_sz, n_stacks):
        super(SGRU,self).__init__()
        self.in_sz = in_sz
        self.hd_sz = hd_sz
        self.n_stacks = n_stacks
        self.GRUs = []
        for i in range(n_stacks):
            self.GRUs.append(cuda(GRU(in_sz,hd_sz)))
                
    def batch_forward(self,xb,yb,hds,loss_fn):
        if xb[0,0,1].item() == 1 or hds is None: hds = self.initHidden(xb.shape[0])                   
        # I want hidden outputs to have size [hd_sz, n_stacks]
        # I will try to save all inputs in a separate torch-tensor to see if that will solve the problem. 
        # PRETTY SURE NOW THAT NO INPUT VARIABLES ARE OVERWRITTEN....         
        # So the error doesn't seem to originate from me saving all the input/hidden states separately.          
        # So what can the error be originating from? Is it from my custom optimizer function? 
        #         predict = cuda(torch.zeros(self.n_stacks, xb.shape[0], xb.shape[1], xb.shape[2]))
        #         hds  = cuda(torch.zeros(self.n_stacks,xb.shape[0],xb.shape[1]+1,self.hd_sz))
        
        # TRYING OTHER solution. 
        print(predict.shape)
        print(hds.shape)
        loss = 0
        for char in range(0,xb.shape[1]):
            x,y           = xb[:,char],yb[:,char]
            x,y,hidden    = unpad(x,y,hds)
            if x.shape[0] == 0: break
            predict[0,:,char] = x
            for stack in range(self.n_stacks-1):
                print(f"""stack is {stack}, char is {char}""")
                gru = self.GRUs[stack]
                predict[stack+1,:,char], hds[stack,:,char+1] = gru.forward(predict[stack,:,char],hds[stack,:,char],True)
            output, hds[stack+1,:,char+1] = gru.forward(predict[stack+1,:,char],hds[stack+1,:,char],False)
            loss += loss_fn(output,yb[:,char])
        return output, hds.detach(), loss/(char+1)
        
    def initHidden(self, bs): return cuda(torch.zeros(self.n_stacks,bs,self.hd_sz))
    
    def parameters(self):
        for stack in range(n_stacks):
            for param in iter(self.GRUs[stack].parameters()):
                yield param
        
class GRU(nn.Module):
    def __init__(self, in_sz, hd_sz):
        super(GRU,self).__init__()
        self.in_sz = in_sz
        self.hd_sz = hd_sz

        self.x_lin = nn.Linear(self.in_sz,3*self.hd_sz)                
        self.h_lin = nn.Linear(self.hd_sz,3*self.hd_sz)
        
        self.up_sig = nn.Sigmoid()
        self.re_sig = nn.Sigmoid()
            
        self.o1      = nn.Linear(self.hd_sz+self.in_sz,self.in_sz)  

        self.softmax = nn.LogSoftmax(dim=1)   
        self.loss    = 0 
            
    def forward(self,input,hidden,to_stacked=False):        
        x = self.x_lin(input)   
        h = self.h_lin(hidden)           
        x_u,x_r,x_n = x.chunk(3,1)
        h_u,h_r,h_n = h.chunk(3,1)
        update_gate = self.up_sig(x_u+h_u)        
        reset_gate  = self.re_sig(x_r+h_r)
        new_gate    = torch.tanh(x_n + reset_gate * h_n)         
        h_new       = update_gate * hidden + (1 - update_gate) * new_gate 
        
        combined   = torch.cat((input,h_new),1)
        combined   = self.o1(combined)
        
        if to_stacked: return combined, h_new
        else: return self.softmax(combined), h_new
    
    def batch_forward(self,xb,yb,hidden,loss_fn):
        self.train()
        if xb[0,0,1].item() == 1: hidden = self.initHidden(xb.shape[0])                   
        loss = 0 
        print(xb.shape)
        print(xb.shape[1])
        for char in range(xb.shape[1]):
            x,y           = xb[:,char],yb[:,char]
            x,y,hidden    = unpad(x,y,hidden)
            if x.shape[0] == 0: break
            output,hidden = self.forward(x,hidden)
            loss += loss_fn(output,y)    
        return output,hidden.detach(),loss/(char+1)

    def initHidden(self, bs):
        return cuda(torch.zeros(bs,self.hd_sz))
    

In [169]:
bs  = 20
sql = 2
lr  = 0.0005 

data          = pp_trumpdata(path+"/data/trump/", [0.9,0.95], bs)
data.train_dl = TweetDataLoader(data,data.train.tweets,bs,sql,shuffle=True)
data.valid_dl = TweetDataLoader(data,data.valid.tweets,bs,sql,shuffle=False)



In [170]:
n_stacks = 2
model  = cuda(SGRU(len(data.decoder), 150, n_stacks))
opt    = optim.RMSprop(model.parameters(), lr)
learn  = Learner(model, nn.NLLLoss(), opt, data, lr=lr)

cbs = CallbackHandler([CounterCallback(), StatsCallback()])

In [171]:
fit_rnn(1,learn,cbs)

torch.Size([2, 20, 2, 433])
torch.Size([2, 20, 3, 150])
stack is 0, char is 0
stack is 0, char is 1


  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/asyncio/base_events.py", line 539, in run_forever
    self._run_once()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/asyncio/base_event

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [20, 150]], which is output 0 of SelectBackward, is at version 5; expected version 4 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

In [147]:

def one_rnn_batch(xb,yb,cb):
    with torch.autograd.set_detect_anomaly(True):    
        pred, cb.learn.hidden, loss = cb.learn.model.batch_forward(xb,yb,cb.learn.hidden,cb.learn.loss_fn)
    if not cb.after_loss(loss): return   
    with torch.autograd.set_detect_anomaly(True):
        loss.backward()
    if not cb.after_backward(): return 
    cb.learn.opt.step()
    if not cb.after_step(): return
    cb.learn.opt.zero_grad()

def fit_rnn(epoches, learn, cb=None, itters=math.inf):
    hidden = learn.model.initHidden(learn.data.train_dl.bs)
    if not cb.begin_fit(learn):           return 
    for epoch in range(epoches):
        if not cb.begin_epoch(epoch):     return             
        for xb, yb in iter(learn.data.train_dl):   
            if not cb.begin_batch(xb,yb): return   
            one_rnn_batch(xb,yb,cb)
            if not cb.begin_validate():   return     
            if cb.do_stop():              break 
        if not cb.after_epoch():          return
    if not cb.after_fit():                return 
    return            

In [124]:

#  is it the hidden layers 
#  or is it the input that has been changed in-place? 
#  Variable, is that different from a parameter in PyTorch? 

stack is 0, char is 0
stack is 0, char is 1


  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/asyncio/base_events.py", line 539, in run_forever
    self._run_once()
  File "/home/r2/anaconda3/envs/v3env/lib/python3.7/asyncio/base_event

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [20, 150]], which is output 0 of SelectBackward, is at version 5; expected version 4 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

In [113]:
model2  = cuda(GRU(len(data.decoder), 150))
opt2    = optim.RMSprop(model2.parameters(), lr)
learn2  = Learner(model2,  nn.NLLLoss(), opt2 , data, lr=lr)
fit_rnn(1,learn2,cbs)

torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 5, 433])
5
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torch.Size([20, 1])
torc

torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])

torch.Size([17, 1])
torch.Size([20, 5, 433])
5
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([20, 5, 433])
5
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([20, 5, 433])
5
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([20, 5, 433])
5
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([20, 5, 433])
5
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([17, 1])
torch.Size([20, 5, 433])
5
torch.Size([16, 1])
torch.Size([16, 1])
torch.Size([15, 1])
torch.Size([15, 1])
torch.Size([14, 1])
torch.Size([20, 5, 433])
5
torch.Size([13, 1])
torch.Size([13, 1])
torch.Size([13, 1])
torch.Size([13, 1])
torch.Size([12, 1])
torch.Size([20, 5, 433])
5
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torc

torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([20, 5, 433])
5
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([12, 1])
torch.Size([20, 5, 433])
5
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([20, 5, 433])
5
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([11, 1])
torch.Size([10, 1])
torch.Size([20, 5, 433])
5
torch.Size([10, 1])
torch.Size([10, 1])
torch.Size([10, 1])
torch.Size([10, 1])
torch.Size([10, 1])
torch.Size([20, 5, 433])
5
torch.Size([10, 1])
torch.Size([10, 1])
torch.Size([10, 1])
torch.Size([8, 1])
torch.Size([7, 1])
torch.Size([20, 5, 433])
5
torch.Size([6, 1])
torch.Size([6, 1])
torch.Size([6, 1])
torch.Size([5, 1])
torch.Size([4, 1])
torch.Size([20, 5, 433])
5
torch.Size([3, 1])
torch.Size([2, 1])
torch.Size([2, 1])
torch.Size([2, 1])
torch.Size([1, 1])
torch.Size([20, 5, 433]

torch.Size([10, 1])
torch.Size([20, 5, 433])
5
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([20, 5, 433])
5
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([9, 1])
torch.Size([20, 5, 433])
5
torch.Size([8, 1])
torch.Size([8, 1])
torch.Size([8, 1])
torch.Size([8, 1])
torch.Size([8, 1])
torch.Size([20, 5, 433])
5
torch.Size([8, 1])
torch.Size([7, 1])
torch.Size([7, 1])
torch.Size([7, 1])
torch.Size([6, 1])
torch.Size([20, 5, 433])
5
torch.Size([5, 1])
torch.Size([5, 1])
torch.Size([4, 1])
torch.Size([4, 1])
torch.Size([3, 1])
torch.Size([20, 5, 433])
5
torch.Size([3, 1])
torch.Size([3, 1])
torch.Size([3, 1])


KeyboardInterrupt: 