In [9]:
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
#!pip install transformers
#!pip install torchsummary

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [7]:
import pandas as pd
import numpy as np
import transformers
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
from tqdm import tqdm

In [38]:
from transformers import BertGenerationEncoder, BertGenerationDecoder, EncoderDecoderModel, BertTokenizer

In [39]:
# leverage checkpoints for Bert2Bert model...
# use BERT's cls token as BOS token and sep token as EOS token
encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
# add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
decoder = BertGenerationDecoder.from_pretrained(
    "bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
)
bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)

# create tokenizer...
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")

input_ids = tokenizer(
    "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
).input_ids
labels = tokenizer("This is a short summary", return_tensors="pt").input_ids

# train...
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
loss.backward()

You are using a model of type bert to instantiate a model of type bert-generation. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertGenerationEncoder: ['cls.seq_relationship.weight', 'bert.pooler.dense.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'bert.pooler.dense.weight', 'bert.embeddings.token_type_embeddings.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertGenerationEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertGenerationEncoder from the checkpoint of a

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]



In [40]:
# instantiate sentence fusion model
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")

input_ids = tokenizer(
    "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
).input_ids

outputs = sentence_fuser.generate(input_ids)

print(tokenizer.decode(outputs[0]))

Downloading:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

NameError: name 'AutoTokenizer' is not defined

In [19]:
class BertDataset(Dataset):
    def __init__(self, tokenizer,max_length):
        super(BertDataset, self).__init__()        
        self.root_dir="./"
        self.train_csv=pd.read_csv('https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv', delimiter='\t', header=None)
        self.tokenizer=tokenizer
        self.target=self.train_csv.iloc[:,1]
        self.max_length=max_length
        

    def load_words(self):
        with open('./datasets/borges.txt','r',encoding='utf8') as f:
            text = f.read()        
            text = ''.join([i for i in text if i.isalpha() or i.isspace()])
        if self.char_level==True:
            return list(text)
        else:
            return text.split(' ')
               
        
    def __len__(self):
        return len(self.train_csv)
    
    def __getitem__(self, index):
        
        text1 = self.train_csv.iloc[index,0]
        
        inputs = self.tokenizer.encode_plus(
            text1 ,
            None,
            pad_to_max_length=True,
            add_special_tokens=True,
            return_attention_mask=True,
            max_length=self.max_length,
        )
        ids = inputs["input_ids"]
        token_type_ids = inputs["token_type_ids"]
        mask = inputs["attention_mask"]

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'target': torch.tensor(self.train_csv.iloc[index, 1], dtype=torch.long)
            }
    
tokenizer = transformers.BertTokenizer.from_pretrained("bert-base-uncased")
dataset= BertDataset(tokenizer, max_length=100)
dataloader=DataLoader(dataset=dataset,batch_size=32)


In [20]:
class BERT(nn.Module):
    def __init__(self):
        super(BERT, self).__init__()
        self.bert_model = transformers.BertModel.from_pretrained("bert-base-uncased")
        self.out = nn.Linear(768, 1)
        
    def forward(self,ids,mask,token_type_ids):
        _,o2= self.bert_model(ids,attention_mask=mask,token_type_ids=token_type_ids, return_dict=False)
        
        out= self.out(o2)
        
        return out
    
model=BERT()

loss_fn = nn.BCEWithLogitsLoss()

#Initialize Optimizer
optimizer= optim.Adam(model.parameters(),lr= 0.0001)

# we are not retraining the weights of the BERT model, but only using them 
for param in model.bert_model.parameters():
    param.requires_grad = False

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [32]:
data =  next(iter(dataset))
data


{'ids': tensor([  101,  1037, 18385,  1010,  6057,  1998,  2633, 18276,  2128, 16603,
          1997,  5053,  1998,  1996,  6841,  1998,  5687,  5469,  3152,   102,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0]),
 'mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0

In [17]:
def finetune(epochs,dataloader,model,loss_fn,optimizer):
    model.train()
    for  epoch in range(epochs):
        print(epoch)
        
        loop=tqdm(enumerate(dataloader),leave=False,total=len(dataloader))
        for batch, dl in loop:
            ids=dl['ids']
            token_type_ids=dl['token_type_ids']
            mask= dl['mask']
            label=dl['target']
            label = label.unsqueeze(1)
            
            optimizer.zero_grad()
            
            output=model(
                ids=ids,
                mask=mask,
                token_type_ids=token_type_ids)
            label = label.type_as(output)

            loss=loss_fn(output,label)
            loss.backward()
            
            optimizer.step()
            
            pred = np.where(output >= 0, 1, 0)

            num_correct = sum(1 for a, b in zip(pred, label) if a[0] == b[0])
            num_samples = pred.shape[0]
            accuracy = num_correct/num_samples
            
            print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
            
            # Show progress while training
            loop.set_description(f'Epoch={epoch}/{epochs}')
            loop.set_postfix(loss=loss.item(),acc=accuracy)

    return model




In [None]:
model=finetune(1, dataloader, model, loss_fn, optimizer)


0


  0%|          | 0/217 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Epoch=0/1:   0%|          | 1/217 [00:03<11:30,  3.20s/it, acc=0.5, loss=0.72]

Got 16 / 32 with accuracy 50.00


Epoch=0/1:   1%|          | 2/217 [00:05<09:32,  2.66s/it, acc=0.75, loss=0.625]

Got 24 / 32 with accuracy 75.00


Epoch=0/1:   1%|▏         | 3/217 [00:07<08:53,  2.49s/it, acc=0.594, loss=0.665]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:   2%|▏         | 4/217 [00:10<08:35,  2.42s/it, acc=0.594, loss=0.683]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:   2%|▏         | 5/217 [00:12<08:24,  2.38s/it, acc=0.531, loss=0.698]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:   3%|▎         | 6/217 [00:14<08:21,  2.38s/it, acc=0.594, loss=0.685]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:   3%|▎         | 7/217 [00:17<08:13,  2.35s/it, acc=0.562, loss=0.686]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:   4%|▎         | 8/217 [00:19<08:09,  2.34s/it, acc=0.625, loss=0.65] 

Got 20 / 32 with accuracy 62.50


Epoch=0/1:   4%|▍         | 9/217 [00:21<08:04,  2.33s/it, acc=0.656, loss=0.658]

Got 21 / 32 with accuracy 65.62


Epoch=0/1:   5%|▍         | 10/217 [00:24<08:01,  2.33s/it, acc=0.5, loss=0.697] 

Got 16 / 32 with accuracy 50.00


Epoch=0/1:   5%|▌         | 11/217 [00:26<07:58,  2.32s/it, acc=0.531, loss=0.681]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:   6%|▌         | 12/217 [00:28<07:55,  2.32s/it, acc=0.469, loss=0.716]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:   6%|▌         | 13/217 [00:30<07:52,  2.31s/it, acc=0.688, loss=0.667]

Got 22 / 32 with accuracy 68.75


Epoch=0/1:   6%|▋         | 14/217 [00:33<07:49,  2.31s/it, acc=0.5, loss=0.686]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:   7%|▋         | 15/217 [00:35<07:47,  2.31s/it, acc=0.406, loss=0.721]

Got 13 / 32 with accuracy 40.62


Epoch=0/1:   7%|▋         | 16/217 [00:37<07:44,  2.31s/it, acc=0.562, loss=0.657]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:   8%|▊         | 17/217 [00:40<07:41,  2.31s/it, acc=0.438, loss=0.711]

Got 14 / 32 with accuracy 43.75


Epoch=0/1:   8%|▊         | 18/217 [00:42<07:42,  2.32s/it, acc=0.469, loss=0.718]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:   9%|▉         | 19/217 [00:44<07:38,  2.31s/it, acc=0.594, loss=0.676]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:   9%|▉         | 20/217 [00:47<07:35,  2.31s/it, acc=0.719, loss=0.649]

Got 23 / 32 with accuracy 71.88


Epoch=0/1:  10%|▉         | 21/217 [00:49<07:31,  2.31s/it, acc=0.594, loss=0.704]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  10%|█         | 22/217 [00:51<07:30,  2.31s/it, acc=0.531, loss=0.695]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  11%|█         | 23/217 [00:54<07:27,  2.31s/it, acc=0.625, loss=0.663]

Got 20 / 32 with accuracy 62.50


Epoch=0/1:  11%|█         | 24/217 [00:56<07:24,  2.30s/it, acc=0.562, loss=0.692]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  12%|█▏        | 25/217 [00:58<07:22,  2.30s/it, acc=0.438, loss=0.697]

Got 14 / 32 with accuracy 43.75


Epoch=0/1:  12%|█▏        | 26/217 [01:00<07:20,  2.31s/it, acc=0.5, loss=0.698]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  12%|█▏        | 27/217 [01:03<07:18,  2.31s/it, acc=0.719, loss=0.653]

Got 23 / 32 with accuracy 71.88


Epoch=0/1:  13%|█▎        | 28/217 [01:05<07:15,  2.30s/it, acc=0.438, loss=0.697]

Got 14 / 32 with accuracy 43.75


Epoch=0/1:  13%|█▎        | 29/217 [01:07<07:13,  2.31s/it, acc=0.688, loss=0.665]

Got 22 / 32 with accuracy 68.75


Epoch=0/1:  14%|█▍        | 30/217 [01:10<07:11,  2.31s/it, acc=0.531, loss=0.687]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  14%|█▍        | 31/217 [01:12<07:09,  2.31s/it, acc=0.625, loss=0.672]

Got 20 / 32 with accuracy 62.50


Epoch=0/1:  15%|█▍        | 32/217 [01:15<07:41,  2.49s/it, acc=0.531, loss=0.693]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  15%|█▌        | 33/217 [01:18<08:14,  2.69s/it, acc=0.5, loss=0.701]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  16%|█▌        | 34/217 [01:20<07:49,  2.57s/it, acc=0.469, loss=0.698]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  16%|█▌        | 35/217 [01:23<07:33,  2.49s/it, acc=0.531, loss=0.686]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  17%|█▋        | 36/217 [01:25<07:20,  2.43s/it, acc=0.562, loss=0.687]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  17%|█▋        | 37/217 [01:27<07:10,  2.39s/it, acc=0.438, loss=0.693]

Got 14 / 32 with accuracy 43.75


Epoch=0/1:  18%|█▊        | 38/217 [01:30<07:02,  2.36s/it, acc=0.562, loss=0.678]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  18%|█▊        | 39/217 [01:32<06:57,  2.35s/it, acc=0.469, loss=0.706]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  18%|█▊        | 40/217 [01:34<06:52,  2.33s/it, acc=0.531, loss=0.693]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  19%|█▉        | 41/217 [01:36<06:48,  2.32s/it, acc=0.531, loss=0.677]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  19%|█▉        | 42/217 [01:39<06:45,  2.31s/it, acc=0.5, loss=0.696]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  20%|█▉        | 43/217 [01:41<06:43,  2.32s/it, acc=0.594, loss=0.683]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  20%|██        | 44/217 [01:43<06:39,  2.31s/it, acc=0.406, loss=0.699]

Got 13 / 32 with accuracy 40.62


Epoch=0/1:  21%|██        | 45/217 [01:46<06:37,  2.31s/it, acc=0.656, loss=0.68] 

Got 21 / 32 with accuracy 65.62


Epoch=0/1:  21%|██        | 46/217 [01:48<06:34,  2.31s/it, acc=0.438, loss=0.687]

Got 14 / 32 with accuracy 43.75


Epoch=0/1:  22%|██▏       | 47/217 [01:50<06:33,  2.32s/it, acc=0.688, loss=0.677]

Got 22 / 32 with accuracy 68.75


Epoch=0/1:  22%|██▏       | 48/217 [01:53<06:30,  2.31s/it, acc=0.531, loss=0.689]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  23%|██▎       | 49/217 [01:55<06:27,  2.31s/it, acc=0.5, loss=0.698]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  23%|██▎       | 50/217 [01:57<06:25,  2.31s/it, acc=0.75, loss=0.647]

Got 24 / 32 with accuracy 75.00


Epoch=0/1:  24%|██▎       | 51/217 [02:00<06:22,  2.31s/it, acc=0.531, loss=0.692]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  24%|██▍       | 52/217 [02:02<06:25,  2.34s/it, acc=0.469, loss=0.71] 

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  24%|██▍       | 53/217 [02:04<06:20,  2.32s/it, acc=0.781, loss=0.643]

Got 25 / 32 with accuracy 78.12


Epoch=0/1:  25%|██▍       | 54/217 [02:07<06:17,  2.31s/it, acc=0.531, loss=0.699]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  25%|██▌       | 55/217 [02:09<06:13,  2.31s/it, acc=0.5, loss=0.692]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  26%|██▌       | 56/217 [02:11<06:10,  2.30s/it, acc=0.562, loss=0.678]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  26%|██▋       | 57/217 [02:13<06:08,  2.30s/it, acc=0.75, loss=0.659] 

Got 24 / 32 with accuracy 75.00


Epoch=0/1:  27%|██▋       | 58/217 [02:16<06:05,  2.30s/it, acc=0.469, loss=0.701]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  27%|██▋       | 59/217 [02:18<06:03,  2.30s/it, acc=0.531, loss=0.681]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  28%|██▊       | 60/217 [02:20<06:01,  2.30s/it, acc=0.656, loss=0.673]

Got 21 / 32 with accuracy 65.62


Epoch=0/1:  28%|██▊       | 61/217 [02:23<05:59,  2.30s/it, acc=0.469, loss=0.704]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  29%|██▊       | 62/217 [02:25<05:57,  2.31s/it, acc=0.531, loss=0.675]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  29%|██▉       | 63/217 [02:27<05:56,  2.31s/it, acc=0.469, loss=0.707]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  29%|██▉       | 64/217 [02:30<05:53,  2.31s/it, acc=0.594, loss=0.658]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  30%|██▉       | 65/217 [02:32<05:50,  2.30s/it, acc=0.594, loss=0.681]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  30%|███       | 66/217 [02:34<05:47,  2.30s/it, acc=0.594, loss=0.672]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  31%|███       | 67/217 [02:36<05:45,  2.30s/it, acc=0.625, loss=0.682]

Got 20 / 32 with accuracy 62.50


Epoch=0/1:  31%|███▏      | 68/217 [02:39<05:43,  2.31s/it, acc=0.531, loss=0.695]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  32%|███▏      | 69/217 [02:41<05:41,  2.31s/it, acc=0.625, loss=0.688]

Got 20 / 32 with accuracy 62.50


Epoch=0/1:  32%|███▏      | 70/217 [02:43<05:40,  2.32s/it, acc=0.531, loss=0.692]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  33%|███▎      | 71/217 [02:46<05:37,  2.31s/it, acc=0.469, loss=0.706]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  33%|███▎      | 72/217 [02:48<05:34,  2.31s/it, acc=0.562, loss=0.706]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  34%|███▎      | 73/217 [02:50<05:32,  2.31s/it, acc=0.469, loss=0.708]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  34%|███▍      | 74/217 [02:53<05:28,  2.30s/it, acc=0.562, loss=0.679]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  35%|███▍      | 75/217 [02:55<05:26,  2.30s/it, acc=0.344, loss=0.728]

Got 11 / 32 with accuracy 34.38


Epoch=0/1:  35%|███▌      | 76/217 [02:57<05:24,  2.30s/it, acc=0.594, loss=0.677]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  35%|███▌      | 77/217 [03:00<05:24,  2.32s/it, acc=0.469, loss=0.701]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  36%|███▌      | 78/217 [03:02<05:21,  2.31s/it, acc=0.469, loss=0.704]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  36%|███▋      | 79/217 [03:04<05:18,  2.31s/it, acc=0.5, loss=0.702]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  37%|███▋      | 80/217 [03:06<05:15,  2.30s/it, acc=0.75, loss=0.653]

Got 24 / 32 with accuracy 75.00


Epoch=0/1:  37%|███▋      | 81/217 [03:09<05:13,  2.30s/it, acc=0.719, loss=0.664]

Got 23 / 32 with accuracy 71.88


Epoch=0/1:  38%|███▊      | 82/217 [03:11<05:11,  2.31s/it, acc=0.781, loss=0.64] 

Got 25 / 32 with accuracy 78.12


Epoch=0/1:  38%|███▊      | 83/217 [03:13<05:09,  2.31s/it, acc=0.469, loss=0.687]

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  39%|███▊      | 84/217 [03:16<05:08,  2.32s/it, acc=0.719, loss=0.66] 

Got 23 / 32 with accuracy 71.88


Epoch=0/1:  39%|███▉      | 85/217 [03:18<05:05,  2.31s/it, acc=0.531, loss=0.697]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  40%|███▉      | 86/217 [03:20<05:02,  2.31s/it, acc=0.719, loss=0.645]

Got 23 / 32 with accuracy 71.88


Epoch=0/1:  40%|████      | 87/217 [03:23<05:00,  2.31s/it, acc=0.656, loss=0.65] 

Got 21 / 32 with accuracy 65.62


Epoch=0/1:  41%|████      | 88/217 [03:25<04:57,  2.31s/it, acc=0.375, loss=0.722]

Got 12 / 32 with accuracy 37.50


Epoch=0/1:  41%|████      | 89/217 [03:27<04:55,  2.30s/it, acc=0.688, loss=0.672]

Got 22 / 32 with accuracy 68.75


Epoch=0/1:  41%|████▏     | 90/217 [03:30<04:52,  2.30s/it, acc=0.469, loss=0.72] 

Got 15 / 32 with accuracy 46.88


Epoch=0/1:  42%|████▏     | 91/217 [03:32<04:50,  2.30s/it, acc=0.656, loss=0.633]

Got 21 / 32 with accuracy 65.62


Epoch=0/1:  42%|████▏     | 92/217 [03:34<04:47,  2.30s/it, acc=0.688, loss=0.673]

Got 22 / 32 with accuracy 68.75


Epoch=0/1:  43%|████▎     | 93/217 [03:36<04:45,  2.30s/it, acc=0.531, loss=0.679]

Got 17 / 32 with accuracy 53.12


Epoch=0/1:  43%|████▎     | 94/217 [03:39<04:43,  2.30s/it, acc=0.594, loss=0.682]

Got 19 / 32 with accuracy 59.38


Epoch=0/1:  44%|████▍     | 95/217 [03:41<04:42,  2.31s/it, acc=0.562, loss=0.667]

Got 18 / 32 with accuracy 56.25


Epoch=0/1:  45%|████▍     | 97/217 [03:46<04:42,  2.35s/it, acc=0.406, loss=0.715]

Got 13 / 32 with accuracy 40.62


Epoch=0/1:  45%|████▌     | 98/217 [03:48<04:38,  2.34s/it, acc=0.5, loss=0.698]  

Got 16 / 32 with accuracy 50.00


Epoch=0/1:  46%|████▌     | 99/217 [03:50<04:35,  2.33s/it, acc=0.438, loss=0.708]

Got 14 / 32 with accuracy 43.75


In [13]:
has_gpu = torch.cuda.is_available()
print(has_gpu)

if torch.cuda.is_available(): 
    dev = "cuda:0" 
else: 
    dev = "cpu" 
device = torch.device(dev) 

True


## Get Text Data

In [3]:
with open('./datasets/borges_full.txt','r',encoding='utf8') as f:
    text = f.read()

In [4]:
# import urllib.request  # the lib that handles the url stuff
# target_url = 'https://ia601201.us.archive.org/2/items/BorgesObrasCompletasBorges/Borges-Obras-Completas-Borges_djvu.txt'
# data = urllib.request.urlopen(target_url)
# text = data.read().decode('utf-8')
# with open('./datasets/borges_full.txt', 'w') as f:
#     f.write(text)


In [5]:
import re
re.sub("[^a-zA-Z]+", "", "ABC12abc345def")

text = ''.join([i for i in text if i.isalpha() or i.isspace()])
print(text[0:1000])


Quiero dejar escrita una confesión que a m tiempo será 
íntima y general  ya que las cosas que le ocurren a un 
hombre les ocurren a todos Estoy hablando de algo ya 
remoto y perdido los días de mi santo t los más antiguos 
Yo recibía los regalos y yo pensaba que no era más que 
un chico y que no había hecho nada  absolutamente nada 
para merecerlos  PoY supuesto nunca lo dije la nifiez es 
tímida  Desde entonces me has dado tantas cosas y son 
tantos los años y los recuerdos  Padre Norah los abuelos 
tu memoria y en ella la memoria de los mayores  los 
patios los esclavos el agúatele la carga de los húsares 
del Perú y el oprobio de Rosas   tu prisión valerosa 
cuando tantos hombres callábamos  las mañanas del Paso 
del Molino f de Ginebra y de Austin f las compartidas cla 
ridades  T sombras tu fresca ancianidad tu amor a Dv 
ckens y a Ea de Queiroz Madre  vos misma  

Aquí estamos hablando los dos  et tout le resie est litié 
rature como escribió t con excelente literatura seríame 


In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from collections import Counter

class Dataset(Dataset):
        
    def __init__(self,sequence_length,char_level):
        

        self.char_level = char_level
        self.words = self.load_words()
        self.uniq_words = self.get_uniq_words()

        self.index_to_word = {index: word for index, word in enumerate(self.uniq_words)}
        self.unk_word = 'UNK'
        self.unk_word_index = len(self.index_to_word)+1
        self.index_to_word[self.unk_word_index] = self.unk_word
        self.word_to_index = {word: index for index, word in enumerate(self.uniq_words)}
        self.sequence_length = sequence_length
        self.words_indexes = [self.word_to_index[w] for w in self.words]
        

    def load_words(self):
        with open('./datasets/borges.txt','r',encoding='utf8') as f:
            text = f.read()        
        if self.char_level==True:
            return list(text)
        else:
            return text.split(' ')

    def get_uniq_words(self):
        word_counts = Counter(self.words)
        return sorted(word_counts, key=word_counts.get, reverse=True)

    def __len__(self):
        return len(self.words_indexes) - self.sequence_length
    
    def __getitem__(self, index):
        return (
            torch.tensor(self.words_indexes[index:index+self.sequence_length]),
            torch.tensor(self.words_indexes[index+1:index+self.sequence_length+1]),
        )    

batch_size=10    
sequence_length=100
char_level = False
    
dataset = Dataset(sequence_length, char_level=char_level)
dataloader = DataLoader(dataset, batch_size=batch_size)
    


In [7]:
from collections import Counter
words = dataset.load_words()
len(set(words))

print(dataset.unk_word_index)
dataset.words_indexes.count(92)

6090


10

In [8]:
import torch
from torch import nn

dev = "cuda:0"

class TokenRNN(nn.Module):
    def __init__(self, dataset, use_gpu):
        super(TokenRNN, self).__init__()
        self.embedding_dim = 128
        self.lstm_size = 512
        self.num_layers = 2
        self.bidirectional = True
        self.use_gpu = use_gpu

        n_vocab = len(dataset.uniq_words)
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,
            embedding_dim=self.embedding_dim,
        )
        self.lstm = nn.LSTM(
            input_size=self.embedding_dim,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=0.2,
            bidirectional=self.bidirectional
        )

        bir=1
        if self.bidirectional:
            bir=2
        self.fc = nn.Linear(self.lstm_size*bir, n_vocab)

    def forward(self, x, prev_state):
        if self.use_gpu:
            x = x.to(device)
        embed = self.embedding(x)
        if self.use_gpu:
            embed = embed.to(device)
        output, state = self.lstm(embed, prev_state)
        if self.use_gpu:
            output = output.to(device)
        logits = self.fc(output)

        return logits, state

    def init_state(self, sequence_length):
        bir = 1
        if self.bidirectional:
            bir = 2
        
        h = torch.zeros(self.num_layers*bir, sequence_length, self.lstm_size)
        if self.use_gpu:
            h = h.to(device)
        return (h,h)
    
    
model = TokenRNN(dataset, True)
model

TokenRNN(
  (embedding): Embedding(6089, 128)
  (lstm): LSTM(128, 512, num_layers=2, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=1024, out_features=6089, bias=True)
)

In [9]:
#model.to(device)


In [10]:
import argparse
import torch
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader
import time

epochs=10
start = time.time()
device = torch.device(device) 

def train(dataset, model):
    print(f" doing model.to(device) {model.use_gpu}")

    if model.use_gpu:
        model.to(device)
        
    model.train()

    dataloader = DataLoader(dataset, batch_size=batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    for epoch in range(epochs):
        state_h, state_c = model.init_state(sequence_length)
        if model.use_gpu:
            state_h = state_h.to(device)
            state_c = state_c.to(device)

        for batch, (x, y) in enumerate(dataloader):

            optimizer.zero_grad()

            if model.use_gpu:
                x.to(device)

            y_pred, (state_h, state_c) = model(x, (state_h, state_c))

            if model.use_gpu:
                y_pred = y_pred.to(device)
                y = y.to(device)

            loss = criterion(y_pred.transpose(1, 2), y)

            state_h = state_h.detach()
            state_c = state_c.detach()

            loss.backward()
            optimizer.step()
            if batch%500==0:
                # print(time.time() - start)
                # Be careful to overwrite our original name file!
#                model_name = 'borges_second_pass.net'
#                torch.save(model.state_dict(),model_name)
                print({ 'epoch': epoch, 'batch': batch, 'loss': loss.item() })

train(dataset, model)

 doing model.to(device) True
{'epoch': 0, 'batch': 0, 'loss': 8.713558197021484}
{'epoch': 0, 'batch': 500, 'loss': 5.4991278648376465}
{'epoch': 0, 'batch': 1000, 'loss': 5.258494853973389}
{'epoch': 1, 'batch': 0, 'loss': 6.857874393463135}
{'epoch': 1, 'batch': 500, 'loss': 3.3809866905212402}
{'epoch': 1, 'batch': 1000, 'loss': 2.907339096069336}
{'epoch': 2, 'batch': 0, 'loss': 5.977495193481445}
{'epoch': 2, 'batch': 500, 'loss': 1.9419424533843994}
{'epoch': 2, 'batch': 1000, 'loss': 1.5021312236785889}
{'epoch': 3, 'batch': 0, 'loss': 4.880244731903076}
{'epoch': 3, 'batch': 500, 'loss': 0.9647094011306763}
{'epoch': 3, 'batch': 1000, 'loss': 0.800978422164917}
{'epoch': 4, 'batch': 0, 'loss': 3.733320713043213}
{'epoch': 4, 'batch': 500, 'loss': 0.5864612460136414}
{'epoch': 4, 'batch': 1000, 'loss': 0.4179871380329132}
{'epoch': 5, 'batch': 0, 'loss': 2.725355625152588}
{'epoch': 5, 'batch': 500, 'loss': 0.3142109811306}
{'epoch': 5, 'batch': 1000, 'loss': 0.23613715171813965

-------
------

## Saving the Model

https://pytorch.org/tutorials/beginner/saving_loading_models.html

In [11]:
# Be careful to overwrite our original name file!
model_name = 'borges_second_pass.net'
torch.save(model.state_dict(),model_name)

## Load Model

In [12]:
# MUST MATCH THE EXACT SAME SETTINGS AS MODEL USED DURING TRAINING!
model_name = 'borges_second_pass.net'

model = TokenRNN(dataset, False)
model.load_state_dict(torch.load(model_name))
model.eval()

TokenRNN(
  (embedding): Embedding(6089, 128)
  (lstm): LSTM(128, 512, num_layers=2, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=1024, out_features=6089, bias=True)
)

In [13]:
torch.backends.cudnn.enabled = False

# Generating Predictions

In [14]:
def predict(dataset, model, text, next_words=100, use_gpu=False):
    model.eval()
    
    if use_gpu:
        model.to(device)

    words = text.split(' ')
    state_h, state_c = model.init_state(len(words))
    if use_gpu:
        state_h = state_h.to(device)
        state_c = state_c.to(device)

    def get_index(word):
        if word in dataset.word_to_index.keys():
            return dataset.word_to_index[word]
        else:
            return dataset.unk_word_index
    
    for i in range(0, next_words):
        
        x = torch.tensor([[get_index(w) for w in words[i:]]])

        y_pred, (state_h, state_c) = model(x, (state_h, state_c))

        last_word_logits = y_pred[0][-1]
        last_word_logits = last_word_logits.to('cpu')

        p = torch.nn.functional.softmax(last_word_logits, dim=0).detach().numpy()
        word_index = np.random.choice(len(last_word_logits), p=p)
        words.append(dataset.index_to_word[word_index])

    return words

import random
r = random.randint(0, len(dataset.words))
text = "".join(dataset.words[r:r+dataset.sequence_length])
language_generated = predict(dataset, model, text="el universo", next_words=100, use_gpu=False)

print(' '.join(language_generated).lower())



el universo vocales esquina indigno (|saias ya manuscrito para mi, calificar
de esta vio en 1912 como como un detenido un dios se claros, de
las zaguan de confianza y una casi una pie, una refutacién de la jefes la desdefaron. es blasfematorio®. es space

hamiet, es blasfematorio®. y mortifica y ser el famoso el famoso el sentenciado diecinueve, fierro de shahrazad de piel de boletines...

observé de vertiginosa de vertiginosa de dialéctica, de san
lucas.

estos regresa destino. y carpécrates; un remoto
espejo asi sagrado a mera a refleja olviden que erratas, ver a voces a voces que se aterréd el tiempo de las palabras y de el


--------