In [None]:
# default_exp dataset.dataset

In [None]:
#export
import os
import torch
import itertools
import transformers

import pandas as pd
import numpy as np
import DialoGPT.config as config

In [None]:
#export
class DialoGPTDataset():
    def __init__(self, df):
        self.df = df
        
        self.tokenizer = config.TOKENIZER
        
    def __len__(self):
        return self.df.shape[0]
        
    def __getitem__(self, i):
        rows = self.df.iloc[i].values
        
        tokenized_rows = [self.tokenizer.encode(text + self.tokenizer.eos_token, 
                               return_tensors='pt').flatten().numpy().tolist()
                                for text in reversed(rows)]
        tokenized_rows = list(itertools.chain.from_iterable(tokenized_rows))
        return {
            'input_ids': torch.tensor(tokenized_rows, dtype=torch.long),
            'labels': torch.tensor(tokenized_rows, dtype=torch.long)
        }

In [None]:
#export
df = pd.read_csv(config.DATA_PATH/'cleaned_df_with_contexts.csv')

In [None]:
#hide
df.head()

Unnamed: 0,present,context_0,context_1,context_2,context_3,context_4,context_5,context_6,context_7,context_8
0,Morty. I had to... I had to do it. I had— I ha...,"Yeah, Rick... I-it's great. Is this the surprise?","What do you think of this... flying vehicle, M...","We gotta go, gotta get outta here, come on. Go...",Ow! Ow! You're tugging me too hard!,"Come on, I got a surprise for you. Come on, h...",It's the middle of the night. What are you tal...,"I got a surprise for you, Morty.","What, Rick? What’s going on?",Morty! You gotta come on. Jus'... you gotta co...
1,What?! A bomb?!,Morty. I had to... I had to do it. I had— I ha...,"Yeah, Rick... I-it's great. Is this the surprise?","What do you think of this... flying vehicle, M...","We gotta go, gotta get outta here, come on. Go...",Ow! Ow! You're tugging me too hard!,"Come on, I got a surprise for you. Come on, h...",It's the middle of the night. What are you tal...,"I got a surprise for you, Morty.","What, Rick? What’s going on?"
2,We're gonna drop it down there just get a whol...,What?! A bomb?!,Morty. I had to... I had to do it. I had— I ha...,"Yeah, Rick... I-it's great. Is this the surprise?","What do you think of this... flying vehicle, M...","We gotta go, gotta get outta here, come on. Go...",Ow! Ow! You're tugging me too hard!,"Come on, I got a surprise for you. Come on, h...",It's the middle of the night. What are you tal...,"I got a surprise for you, Morty."
3,T-t-that's absolutely crazy!,We're gonna drop it down there just get a whol...,What?! A bomb?!,Morty. I had to... I had to do it. I had— I ha...,"Yeah, Rick... I-it's great. Is this the surprise?","What do you think of this... flying vehicle, M...","We gotta go, gotta get outta here, come on. Go...",Ow! Ow! You're tugging me too hard!,"Come on, I got a surprise for you. Come on, h...",It's the middle of the night. What are you tal...
4,"Come on, Morty. Just take it easy, Morty. It's...",T-t-that's absolutely crazy!,We're gonna drop it down there just get a whol...,What?! A bomb?!,Morty. I had to... I had to do it. I had— I ha...,"Yeah, Rick... I-it's great. Is this the surprise?","What do you think of this... flying vehicle, M...","We gotta go, gotta get outta here, come on. Go...",Ow! Ow! You're tugging me too hard!,"Come on, I got a surprise for you. Come on, h..."


In [None]:
#hide
df.iloc[0].values

array(['Morty. I had to... I had to do it. I had— I had to— I had to make a bomb, Morty. I had to create a bomb.',
       "Yeah, Rick... I-it's great. Is this the surprise?",
       'What do you think of this... flying vehicle, Morty? I built it outta stuff I found in the garage.',
       'We gotta go, gotta get outta here, come on. Got a surprise for you Morty.',
       "Ow! Ow! You're tugging me too hard!",
       'Come on, I got a surprise for you.  Come on, hurry up.',
       "It's the middle of the night. What are you talking about?",
       'I got a surprise for you, Morty.', 'What, Rick? What’s going on?',
       "Morty! You gotta come on. Jus'... you gotta come with me."],
      dtype=object)

In [None]:
#hide
[i for i in reversed(df.iloc[0].values)]

["Morty! You gotta come on. Jus'... you gotta come with me.",
 'What, Rick? What’s going on?',
 'I got a surprise for you, Morty.',
 "It's the middle of the night. What are you talking about?",
 'Come on, I got a surprise for you.  Come on, hurry up.',
 "Ow! Ow! You're tugging me too hard!",
 'We gotta go, gotta get outta here, come on. Got a surprise for you Morty.',
 'What do you think of this... flying vehicle, Morty? I built it outta stuff I found in the garage.',
 "Yeah, Rick... I-it's great. Is this the surprise?",
 'Morty. I had to... I had to do it. I had— I had to— I had to make a bomb, Morty. I had to create a bomb.']

In [None]:
#hide
d = DialoGPTDataset(df)

In [None]:
#hide
d[0]

{'input_ids': tensor([   44,   419,    88,     0,   921, 17753,  1282,   319,    13,   449,
           385,     6,   986,   345, 17753,  1282,   351,   502,    13, 50256,
          2061,    11,  8759,    30,  1867,   447,   247,    82,  1016,   319,
            30, 50256,    40,  1392,   257,  5975,   329,   345,    11, 30395,
            13, 50256,  1026,   338,   262,  3504,   286,   262,  1755,    13,
          1867,   389,   345,  3375,   546,    30, 50256, 16773,   319,    11,
           314,  1392,   257,  5975,   329,   345,    13,   220,  7911,   319,
            11, 23290,   510,    13, 50256,    46,    86,     0, 11960,     0,
           921,   821, 27762,  2667,   502,  1165,  1327,     0, 50256,  1135,
         17753,   467,    11, 17753,   651,   503,  8326,   994,    11,  1282,
           319,    13, 11853,   257,  5975,   329,   345, 30395,    13, 50256,
          2061,   466,   345,   892,   286,   428,   986,  7348,  4038,    11,
         30395,    30,   314,  3170,   

In [None]:
import DialoGPT.model.model as model

In [None]:
#export
import torch
import torch.nn as nn
import transformers

import DialoGPT.config as config

In [None]:
model = transformers.AutoModelForCausalLM.from_pretrained(config.MODEL_NAME)

In [None]:
model??

In [None]:
#export
class DialoGPTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = transformers.AutoModelForCausalLM.from_pretrained(config.MODEL_NAME)
    
    def forward(input_ids, labels):
#         input_ids = past_key_values = (batch_size, sequence_length)
#         NB: The labels are shifted automatically inside the model so labels can be equal to input_ids
        out = self.model(input_ids=input_ids, labels=labels)
        return out

In [None]:
#hide
modeller = DialoGPTModel()

In [None]:
modeller

In [None]:
dl = torch.utils.data.DataLoader(d, batch_size=1)

In [None]:
modeller.eval()
for batch in dl:
    with torch.no_grad():
        out = modeller(input_ids=batch['input_ids'], labels= batch['labels'])

tensor([[   44,   419,    88,     0,   921, 17753,  1282,   319,    13,   449,
           385,     6,   986,   345, 17753,  1282,   351,   502,    13, 50256,
          2061,    11,  8759,    30,  1867,   447,   247,    82,  1016,   319,
            30, 50256,    40,  1392,   257,  5975,   329,   345,    11, 30395,
            13, 50256,  1026,   338,   262,  3504,   286,   262,  1755,    13,
          1867,   389,   345,  3375,   546,    30, 50256, 16773,   319,    11,
           314,  1392,   257,  5975,   329,   345,    13,   220,  7911,   319,
            11, 23290,   510,    13, 50256,    46,    86,     0, 11960,     0,
           921,   821, 27762,  2667,   502,  1165,  1327,     0, 50256,  1135,
         17753,   467,    11, 17753,   651,   503,  8326,   994,    11,  1282,
           319,    13, 11853,   257,  5975,   329,   345, 30395,    13, 50256,
          2061,   466,   345,   892,   286,   428,   986,  7348,  4038,    11,
         30395,    30,   314,  3170,   340,   503,  

TypeError: forward() got multiple values for argument 'input_ids'

In [None]:
modeller??