In [2]:
from torch import math
import torch
import torch.nn as nn
import os, logging
from torch.utils.data import Dataset,DataLoader

from torchvision import transforms
import cv2
import numpy as np


class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position=torch.arange(max_len).unsqueeze(1)
        div_term=1/(10000**(torch.arange(0,d_model,2)/d_model))
        pe=torch.zeros(max_len,d_model)
        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)
        self.register_buffer('pe', pe)
    def forward(self,patches):
        return self.dropout(patches+self.pe[:patches.shape[1]])



# class ImgPatcher(nn.Module):
#     def __init__(self,n_patches:int,emb_d:int,channels_first:bool,img_shape:tuple):
#         super(ImgPatcher,self).__init__()
#         self.channels_first=channels_first
#         H,W,C=img_shape
#         if self.channels_first:
#             C,H,W=img_shape
#         self.chw=(C,H,W)
#         assert H==W, "Please make sure image has same height and width"
#         assert H%n_patches==0, "Please make sure n_patches is factor of image dims H and W"
#         self.n_patches=n_patches
#         self.p=H//n_patches
#         self.embedding=nn.Linear(H*W*C//n_patches**2,emb_d)                            #n_patches^2 patches of size (H/n_patches*W/n_patches)*C will be created from each image
#         print(self.embedding.weight.shape)
    
#     def forward(self,img):
#         img=img.float()
#         N=img.size(0)
#         if self.channels_first:
#             img=img.permute(0,2,3,1)                #N,C,H,W -> N,H,W,C
#         patches=torch.zeros(N,self.n_patches**2,self.embedding.weight.shape[1])
#         for idx in range(N):
#             img_=img[idx]
#             for i in range(self.n_patches):
#                 for j in range(self.n_patches):
#                     patch=img_[i*self.p:(i+1)*self.p,j*self.p:(j+1)*self.p]
#                     patches[idx,(i*self.n_patches)+j]=patch.flatten()
#         return self.posenc(self.embedding(patches))







In [3]:
import torchvision.io
import torch
from torch.utils.data import Dataset

def load_tokenizer(tokenizer_model:str,data_dir:str=None):
    if not os.path.exists('data/tokenizer'):
        tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer',tokenizer_model)
        logging.info("Updating Tokenizer Vocab on Data")
        #insert code to get to process data_dir
        #[tokenizer(i) for i in tqdm(data_dir[:,1])]
        tokenizer.save_pretrained('data/tokenizer')
        return tokenizer
    else:
        tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer','data/tokenizer/')
        return tokenizer




class Stream_Dataset(Dataset):
    def __init__(self,seq_length,data_dir,img_dir,tokenizer_model='medicalai/ClinicalBERT'):
        self.data_dir=data_dir
        self.data=pd.read_csv(data_dir+'/'+'result0.csv').to_numpy()
        self.img_dir=img_dir
        self.max_length=seq_length
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self,idx):
        img_name=self.data[idx,0]
        semantic_str=self.data[idx,1]
        img=torchvision.io.read_image(self.data_dir+'/'+self.img_dir+'/'+img_name)
        return img,semantic_str
# dataset=Stream_Dataset(
#         seq_length=50,
#         data_dir="/home/mehedi/Desktop/raghib/FLICKR-30K IMAGE CAPTIONING/flickr30k_images",
#         img_dir='flickr30k_images',
#         tokenizer_model='bert-base-uncased'
# )


In [4]:

import torch.nn as nn
import torch
from torch import Tensor
from typing import Optional, Any, Union, Callable
from torch.nn import functional as F
from torch.nn import MultiheadAttention, LayerNorm,Dropout, Linear,TransformerEncoderLayer
def _get_activation_fn(activation: str) -> Callable[[Tensor], Tensor]:
    if activation == "relu":
        return F.relu
    elif activation == "gelu":
        return F.gelu

    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))

class CustomTransformerDecoderLayer(nn.Module):
    __constants__ = ['batch_first', 'norm_first']

    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False, 
                 need_attn: bool = True, device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                            **factory_kwargs)
        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                                 **factory_kwargs)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward, **factory_kwargs)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm_first = norm_first
        self.norm1 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm3 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)
        # Legacy string support for activation function.
        if isinstance(activation, str):
            self.activation = _get_activation_fn(activation)
        else:
            self.activation = activation
        self.need_attn=need_attn
    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super().__setstate__(state)

    def forward(
        self,
        tgt: Tensor,
        memory: Tensor,
        tgt_mask: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        tgt_is_causal: bool = False,
        memory_is_causal: bool = False,
    ) -> Tensor:
        r"""Pass the inputs (and mask) through the decoder layer.

        Args:
            tgt: the sequence to the decoder layer (required).
            memory: the sequence from the last layer of the encoder (required).
            tgt_mask: the mask for the tgt sequence (optional).
            memory_mask: the mask for the memory sequence (optional).
            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
            memory_key_padding_mask: the mask for the memory keys per batch (optional).
            tgt_is_causal: If specified, applies a causal mask as tgt mask.
                Mutually exclusive with providing tgt_mask. Default: ``False``.
            memory_is_causal: If specified, applies a causal mask as tgt mask.
                Mutually exclusive with providing memory_mask. Default: ``False``.
        Shape:
            see the docs in Transformer class.
        """
        # see Fig. 1 of https://arxiv.org/pdf/2002.04745v1.pdf

        x = tgt
        if self.norm_first:
            x = x + self._sa_block(x=self.norm1(x), attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask, is_causal=tgt_is_causal)
            x = x + self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask, memory_is_causal)
            x = x + self._ff_block(self.norm3(x))
        else:
            x_sa = self.norm1(x + self._sa_block(x=x, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask, is_causal=tgt_is_causal))
            x,attn=self._mha_block(x, memory, memory_mask, memory_key_padding_mask, memory_is_causal,)
            x = self.norm2(x_sa + x)
            x = self.norm3(x + self._ff_block(x))

        return x,attn


    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor], is_causal: bool = False) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           is_causal=is_causal,
                           need_weights=False)[0]
        return self.dropout1(x)

    # multihead attention block
    def _mha_block(self, x: Tensor, mem: Tensor,
                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor], is_causal: bool = False) -> Tensor:
        x,attn= self.multihead_attn(x, mem, mem,
                                attn_mask=attn_mask,
                                key_padding_mask=key_padding_mask,
                                is_causal=is_causal,
                                average_attn_weights=self.need_attn)

        return self.dropout2(x),attn

    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout3(x)


In [46]:
from typing import Tuple,Callable,List,Union

def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask




def load_bert_embedder(tokenizer_model:str,tokenizer):
    if not os.path.exists('data/model'):
        model=torch.hub.load('huggingface/pytorch-transformers', 'model',tokenizer_model)
        model=update_bert_model(model,tokenizer)
        return model
    else:
        model=torch.hub.load('huggingface/pytorch-transformers', 'model','data/model/')
        model=update_bert_model(model,tokenizer)
        return model




def update_bert_model(model,tokenizer):
    logging.info("Updating Model based on Tokenizer")
    model_vocab_len=model.embeddings.word_embeddings.weight.shape[0]
    if model_vocab_len!=len(tokenizer):
        model.resize_token_embeddings(len(tokenizer))
    model.save_pretrained('data/model')
    return model


class ImgPatcher(nn.Module):
    def __init__(self,n_patches:int,patch_dims:int,img_chw:Tuple[int,int,int]):
        super(ImgPatcher,self).__init__()
        C,H,W=img_chw
        assert torch.math.ceil(torch.math.sqrt(n_patches))-torch.math.sqrt(n_patches)==0, "Please make sure n_patches is of square dimensions"
        assert H==W, "Please make sure that the image is a square image"
        self.n_patches=n_patches
        self.patch_dims=patch_dims
        self.kernel_size=int(H//torch.math.sqrt(n_patches))
        self.conv=nn.Conv2d(in_channels=C,out_channels=self.patch_dims,kernel_size=self.kernel_size,stride=self.kernel_size)
    def forward(self,x):
        x=self.conv(x)
        x=x.permute(0,2,3,1)
        x_emb=x.reshape(x.size(0),self.n_patches,x.size(-1))
        return x_emb


class SpatialStream(nn.Module):
    def __init__(self, n_patches:int,img_chw:Tuple[int,int,int],dropout:float=0.1,d_model:int=768,nhead:int=8,dim_feedforward:int=2048,activation:str='relu',num_ts_blocks:int=2):
        super(SpatialStream,self).__init__()
        self.patcher=ImgPatcher(n_patches,d_model,img_chw)
        self.pos_enc=PositionalEncoding(d_model,dropout,n_patches)
        self.spattransformer_blocks=nn.ModuleList([TransformerEncoderLayer(d_model,nhead,dim_feedforward,batch_first=True,activation=activation) for i in range(num_ts_blocks)])
        
    def forward(self,x):
        embedded=self.patcher(x)
        encoded=self.pos_enc(embedded)
        enc_memory_list=[]
        for encoder_layer in self.spattransformer_blocks:
            encoded=encoder_layer(encoded)
            enc_memory_list.append(encoded)
        return enc_memory_list

class SemanticStream(nn.Module):
    def __init__(self,max_seql:int,dropout:float=0.1,num_ts_blocks:int=2,d_model:int=512,nhead:int=8,dim_feedforward:int=2048,activation:str='relu',tokenizer_model='medicalai/ClinicalBERT'):
        super(SemanticStream,self).__init__()
        self.max_seql=max_seql
        self.tokenizer=load_tokenizer(tokenizer_model=tokenizer_model)            #add this to dataloader instead of model along with patching
        self.bert_embedding = load_bert_embedder(tokenizer_model=tokenizer_model,tokenizer=self.tokenizer)
        self.pos_enc=PositionalEncoding(d_model,dropout,max_seql)
        self.semtransformer_blocks=nn.ModuleList([CustomTransformerDecoderLayer(d_model,nhead,dim_feedforward,batch_first=True,activation=activation) for _ in range(num_ts_blocks)])
        self.out=nn.Linear(d_model,len(self.tokenizer))
    def forward(self,target=None,enc_memory_list=None):
        if isinstance(target[0],str):
            tgt=self.tokenizer(target,max_length=self.max_seql,padding='max_length' if self.training else True,truncation=True,return_tensors='pt',add_special_tokens=self.training)
            tgt_padmask=tgt['attention_mask']==0
            tgt=tgt['input_ids']
        else:
            tgt=target
            tgt_padmask=(tgt==0)
        tgt_mask=generate_square_subsequent_mask(tgt.shape[-1])
        embedded=self.bert_embedding(input_ids=tgt,attention_mask=tgt_padmask).last_hidden_state
        encoded=self.pos_enc(embedded)
        for idx,(decoder_layer,memory) in enumerate(zip(self.semtransformer_blocks,enc_memory_list)):
            encoded,attn=decoder_layer(encoded,memory,tgt_mask=tgt_mask,tgt_key_padding_mask=tgt_padmask,tgt_is_causal=False)
        out=self.out(encoded)
        return out,attn
        
    def evaluate(self,enc_memory_list=None):
        batch_size=enc_memory_list[0].shape[0]
        target=['[CLS]' for _ in range(batch_size)]
        temptok=self.tokenizer(target,padding=True,return_tensors='pt',add_special_tokens=False)['input_ids']
        logits=[]
        while True:
            if len(logits)==self.max_seql:
                break
            out,attn=self.forward(target=temptok,enc_memory_list=enc_memory_list)
            logits.append(out[:,-1].unsqueeze(1))
            probs=F.log_softmax(out[:,-1],dim=-1)
            top_prob=probs.topk(1)[1]
            temptok=torch.cat([temptok,top_prob],axis=1)
        logits=torch.cat(logits,axis=1)    
        return logits,attn
        
        
                

    def save_bert(self):
        self.bert_embedding.save_pretrained('data/model/')        
        self.tokenizer.save_pretrained('data/tokenizer/')


class TwoStreamTransformer(nn.Module):
    def __init__(self, n_patches:int,img_chw:Tuple[int,int,int],max_seql:int,dropout:float=0.1,d_model:int=768,nhead:int=8,dim_feedforward:int=2048,activation:str='relu',num_ts_blocks:int=2,tokenizer_model:any='medicalai/ClinicalBERT'):
        super(TwoStreamTransformer,self).__init__()
        self.encoder=SpatialStream(n_patches,img_chw,dropout,d_model,nhead,dim_feedforward,activation,num_ts_blocks)
        self.decoder=SemanticStream(max_seql,dropout,num_ts_blocks,d_model,nhead,dim_feedforward,activation,tokenizer_model)

    def forward(self,images,target=None):
        enc_memory_list=self.encoder(images)
        if not self.training:
            logits,attn=self.decoder.evaluate(enc_memory_list=enc_memory_list)
        else:
            logits,attn=self.decoder(enc_memory_list=enc_memory_list,target=target)
        return logits,attn            



        

        

        
    

In [6]:
import torch
import numpy as np


In [7]:
class HFTokenizer:
    def __init__(self,tokenizer_model,max_length):
        tok_pth=os.path.join("data",tokenizer_model.split('/')[-1],"tokenizer")
        self.name=tokenizer_model
        self.tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer',tokenizer_model) if not os.path.exists(tok_pth) else torch.hub.load('huggingface/pytorch-transformers', 'tokenizer',tok_pth)
        self.save_pth=tok_pth
        self.tokenizer.model_max_length=max_length
        self.vocab=self.tokenizer.vocab
    def __call__(self,data,padding=None,truncation=True,return_tensors='pt',add_special_tokens=True):
        return self.tokenizer(data,padding=padding,truncation=truncation,return_tensors=return_tensors,add_special_tokens=add_special_tokens).input_ids
    
    def decode(self,tok_ids):
        return self.tokenizer.decode(tok_ids,skip_special_tokens=True,clean_up_tokenization_spaces=True)
    
    def __len__(self):
        return len(self.tokenizer)
    
    def save(self):
        self.tokenizer.save_pretrained(self.save_pth)
    
    def update(self,data:torch.utils.data.DataLoader):
        dpoints=0
        if isinstance(data,torch.utils.data.DataLoader):
            for (img,text) in data:
                self.tokenizer(text)
                dpoints+=len(text)
        elif isinstance(data,Union[np.array,torch.tensor]):
            data=data.tolist()
            self.tokenizer(data)
            dpoints+=len(data)
        else:
            self.tokenizer(data)
            dpoints+=len(data)
        print(f"Updated tokens from {dpoints} rows of data")
        self.save()

In [19]:
import os
# tokenizer=HFTokenizer('medicalai/ClinicalBERT',20)
s="I love chicken so much, I want to eat it every day, its"
data=[s[:i] for i in range(0,len(s),10)]


In [138]:
fill_pad.unsqueeze(1).shape


torch.Size([119547, 1])

In [216]:
import torch.nn.functional as F
def forward(target,incompleted_idx,seq_idx):
    return torch.nn.functional.one_hot(target[incompleted_idx,:seq_idx+1],num_classes=len(tokenizer))    


max_seql=20
target=tokenizer(data,truncation=True,padding='max_length')

batch_size=target.shape[0]

logits=torch.tensor([tokenizer.vocab['[CLS]'] for _ in range(batch_size)]).reshape(-1,1)
finished_probs=torch.zeros_like(target,dtype=torch.long)


out_main=torch.zeros((batch_size,max_seql,len(tokenizer))).float()+fill_pad.unsqueeze(0)
finished_logits=torch.zeros((batch_size,max_seql,len(tokenizer)))

seq_idx=0
completed_idx=[]
incompleted_idx=np.arange(batch_size)

while True:
    if logits.shape[1]==max_seql or incompleted_idx.size==0:
        break
    out=forward(target=target,incompleted_idx=incompleted_idx,seq_idx=seq_idx).float()
    lastout=out[:,-1]
    probs=F.log_softmax(lastout,dim=-1)
    out_main=torch.cat([out_main,lastout.unsqueeze(1)],axis=1) if seq_idx>0 else lastout
    top_prob=probs.topk(1)[1]
    logits=torch.cat([logits,top_prob],axis=1) if seq_idx>0 else top_prob
    completed_bool=(top_prob==tokenizer.vocab['[SEP]']).flatten()
    c_idx=incompleted_idx[np.nonzero(completed_bool)]
    c_idx=c_idx.tolist() if hasattr(c_idx,'__iter__') else [c_idx]
    incompleted_idx=np.array([i for i in incompleted_idx if i not in c_idx])
    if len(c_idx)>0:
        completed_idx.extend(c_idx)
        finished_probs[c_idx,:seq_idx+1]+=logits[np.where(completed_bool)[0],:seq_idx+1]
        logits=logits[(completed_bool==False).flatten()]
        out_main=out_main[(completed_bool==False).flatten(),:,:]
    seq_idx+=1
    print(f"logits_shape: {logits.shape}")
    print(out.shape)
    print(f"####{seq_idx}:  \n\tincompleted_idx: {incompleted_idx} \n\tcompleted_idx:  {completed_idx} \n\t finished_probs: {finished_probs}")
    #out,attn=model(logits,encmemorylist=memory)
    # logits.append(out[:,-1].unsqueeze(1))
    # probs=F.log_softmax(out[:,-1],dim=-1)
    # top_prob=probs.topk(1)[1]
    
print(finished_probs==target)



torch.Size([6, 1, 119547])
logits_shape: torch.Size([6, 1])
####1:  
	incompleted_idx: [0 1 2 3 4 5] 
	completed_idx:  [] 
	 finished_probs: tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
torch.Size([6, 2, 119547])


RuntimeError: Tensors must have same number of dimensions: got 2 and 3

In [220]:
completed_bool.shape

torch.Size([6])

In [213]:
out_main.ndim

4

In [156]:
def forward(target,incompleted_idx,seq_idx):
    return torch.nn.functional.one_hot(target[incompleted_idx,:seq_idx+1],num_classes=len(tokenizer))    

In [155]:
torch.nn.functional.one_hot(target[incompleted_idx,:0+1],num_classes=len(tokenizer))

tensor([[[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]]])

In [157]:
forward(target,incompleted_idx,0)

tensor([[[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0]]])

In [126]:
forward(torch.tensor([1,5,6,8,10]).expand(size=(10,5))).shape

torch.Size([10, 5, 119547])

In [131]:
target.shape

torch.Size([6, 20])

In [130]:
gen=(target[:,i] for i in range(target.shape[1]))
next(gen)

tensor([101, 101, 101, 101, 101, 101])

In [None]:
def evaluate(self,enc_memory_list=None):
    batch_size=enc_memory_list[0].shape[0]
    logits=torch.empty(size=(batch_size,1),dtype=torch.long).fill_(tokenizer.vocab['[CLS]'])
    finished_probs=np.zeros(batch_size,max_seql)
    fill_pad=torch.nn.functional.one_hot(torch.tensor(tokenizer.vocab['[PAD]']),num_classes=len(tokenizer))
    out_main=torch.zeros((batch_size,max_seql,len(tokenizer))).float()+fill_pad.unsqueeze(1)
    finished_logits=torch.zeros((batch_size,max_seq,len(tokenizer)))
    seq_idx=0
    completed_idx=[]
    incompleted_idx=np.arange(batch_size)
    
    
    while True:
        if logits.shape[1]==max_seql or incompleted_idx.size==0:
            break
        out,attn=model.forward(target=logits,enc_memory_list=enc_memory_list)
        lastout=out[:,-1]
        probs=F.log_softmax(lastout,dim=-1)
        out_main=torch.cat(out_main,lastout.unsqueeze(1),axis=1) if seq_idx>0 else lastout
        top_prob=probs.topk(1)[1]
        logits=torch.cat([logits,top_prob],axis=1) if seq_idx>0 else top_prob
        completed_bool=(top_prob==tokenizer.vocab['[SEP]']).flatten()
        c_idx=incompleted_idx[np.nonzero(completed_bool)]
        c_idx=c_idx.tolist() if hasattr(c_idx,'__iter__') else [c_idx]
        if len(c_idx)>0:
            incompleted_idx=np.array([i for i in incompleted_idx if i not in c_idx])
            completed_idx.extend(c_idx)
            # finished_probs[c_idx,:seq_idx+1]+=logits[np.where(completed_bool)[0],:seq_idx+1]
            finished_logits[c_idx,:seq_idx+1]+=out_main[np.where(completed_bool)[0],:seq_idx+1]
        logits=logits[(completed_bool==False).flatten()]
        out_main=out_main[(completed_bool==False).flatten()]
        seq_idx+=1
    logits=torch.cat(logits,axis=1)    
    return logits,attn
    

In [107]:
fill_pad=torch.nn.functional.one_hot(torch.tensor(tokenizer.vocab['[PAD]']),num_classes=len(tokenizer)).float()

In [112]:
temptok=torch.zeros((batch_size,1,len(tokenizer)))+fill_pad

In [119]:
idx=torch.tensor([1,3])
temptok[idx,0,:40]+=torch.vstack([fill_pad[:40],fill_pad[:40]])

In [121]:
fill_pad=torch.nn.functional.one_hot(torch.tensor(tokenizer.vocab['[PAD]']),num_classes=len(tokenizer))
temptok=torch.zeros((batch_size,max_seql,len(tokenizer))).float()+fill_pad.unsqueeze(0)

In [180]:
logits

tensor([], size=(0, 16), dtype=torch.int64)

In [177]:
max_seql=20
target=tokenizer(data,truncation=True,padding='max_length')
batch_size=target.shape[0]
logits_=logits=torch.tensor([tokenizer.vocab['[CLS]'] for _ in range(batch_size)]).reshape(-1,1)
seq_idx=0
completed_idx=[]
incompleted_idx=np.arange(batch_size)
finished_probs=torch.zeros(size=(batch_size,max_seql),dtype=torch.long)
while True:
    if logits.shape[1]==max_seql or incompleted_idx.size==0:
        break
    #out,attn=model(logits,encmemorylist=memory)
    # logits.append(out[:,-1].unsqueeze(1))
    # probs=F.log_softmax(out[:,-1],dim=-1)
    # top_prob=probs.topk(1)[1]
    top_prob=target[incompleted_idx,seq_idx].reshape(-1,1)
    logits=torch.cat([logits,top_prob],axis=1) if seq_idx>0 else top_prob
    completed_bool=(top_prob==tokenizer.vocab['[SEP]']).flatten()
    c_idx=incompleted_idx[np.nonzero(completed_bool)]
    c_idx=c_idx.tolist() if hasattr(c_idx,'__iter__') else [c_idx]
    incompleted_idx=np.array([i for i in incompleted_idx if i not in c_idx])
    if len(c_idx)>0:
        completed_idx.extend(c_idx)
        finished_probs[c_idx,:seq_idx+1]+=logits[np.where(completed_bool)[0],:seq_idx+1]
    logits=logits[(completed_bool==False).flatten()]
    print(f"logits_shape: {logits.shape};completed_bool")
    seq_idx+=1
    print(f"logits_shape: {logits.shape}")
    print(f"####{seq_idx}:  \n\tincompleted_idx: {incompleted_idx} \n\tcompleted_idx:  {completed_idx} \n\t finished_probs: {finished_probs}")
    #out,attn=model(logits,encmemorylist=memory)
    # logits.append(out[:,-1].unsqueeze(1))
    # probs=F.log_softmax(out[:,-1],dim=-1)
    # top_prob=probs.topk(1)[1]
    if seq_idx==10:
            COMPBOOL=completed_bool
            LOGIT=logits

    
print(finished_probs==target)

logits_shape: torch.Size([6, 1]);completed_bool
logits_shape: torch.Size([6, 1])
####1:  
	incompleted_idx: [0 1 2 3 4 5] 
	completed_idx:  [] 
	 finished_probs: tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
logits_shape: torch.Size([5, 2]);completed_bool
logits_shape: torch.Size([5, 2])
####2:  
	incompleted_idx: [1 2 3 4 5] 
	completed_idx:  [0] 
	 finished_probs: tensor([[101, 102,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0],
        [  0,   0,

In [46]:
COMPBOOL=[True,False,True,False,True,False,True,False]
np.where(COMPBOOL)[0]

array([0, 2, 4, 6])

In [49]:
np.where()

[0]

In [84]:
a=torch.rand((5,1,100))
print(
    torch.max(F.log_softmax(a,dim=-1),dim=-1)[1].shape,
    F.log_softmax(a,dim=-1).topk(1)[1].shape
)
(decoder.tokenizer(y,padding=True,max_length=decoder.max_seql,truncation=False,return_tensors='pt').input_ids==0).shape

torch.Size([5, 1]) torch.Size([5, 1, 1])




torch.Size([10, 26])

In [6]:
from dataset import *
dataset=Stream_Dataset(
        data_dir="/home/mehedi/Desktop/raghib/FLICKR-30K IMAGE CAPTIONING/flickr30k_images/",
        imgsz=300,
        img_dir='flickr30k_images',
        tokenizer_model='medicalai/ClinicalBERT',
        csv_dir='result0.csv'

)
dataload=DataLoader(dataset=dataset,batch_size=4)
x,y=next(iter(dataload))

FileNotFoundError: [Errno 2] No such file or directory: '/home/mehedi/Desktop/raghib/FLICKR-30K IMAGE CAPTIONING/flickr30k_images/result0.csv'

types.GenericAlias

In [47]:
activation='relu'
num_ts_blocks=2
model=TwoStreamTransformer(n_patches=36,img_chw=(3,300,300),max_seql=45,activation=activation,num_ts_blocks=num_ts_blocks)
# encoder=SpatialStream(n_patches=36,img_chw=(3,300,300),activation=activation,num_ts_blocks=num_ts_blocks)
# decoder=SemanticStream(max_seql=45,d_model=768,num_ts_blocks=num_ts_blocks)


Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main
Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main


In [48]:
model.train()
x,y=torch.rand((10,3,300,300)),['i love chicken' for _ in range(10)]
logits,attn=model(images=x,target=y)



In [49]:
logits.shape

torch.Size([10, 45, 119547])

In [10]:
len(load_tokenizer(tokenizer_model='medicalai/ClinicalBERT'))

Using cache found in /home/mehedi/.cache/torch/hub/huggingface_pytorch-transformers_main


119547

In [14]:
start=time.time()
torch.tril(torch.ones(45,5))
print(time.time()-start)

0.010996341705322266


In [24]:
out.shape

torch.Size([10, 45, 119547])

In [22]:
x,y=torch.rand((10,3,300,300)),torch.zeros(10,1)
y+=101
mem=encoder(x)
out,attn,temptok=decoder.evaluate(mem,get_logits=True)
print()
# print(decoder.training)
# y_hat,attention=decoder(enc_memory_list=mem,target=y)



tensor([[   101,  95648],
        [   101,  41026],
        [   101,  45135],
        [   101,  95305],
        [   101,  45135],
        [   101,  20901],
        [   101,  95648],
        [   101,  57118],
        [   101,  45135],
        [   101, 102133]]) ############################################################ torch.Size([10, 2])
tensor([[   101,  95648,  86171],
        [   101,  41026,  29440],
        [   101,  45135,  31014],
        [   101,  95305,  90805],
        [   101,  45135,  67841],
        [   101,  20901,  64118],
        [   101,  95648,  31014],
        [   101,  57118,  90805],
        [   101,  45135,  96073],
        [   101, 102133,  45135]]) ############################################################ torch.Size([10, 3])
tensor([[   101,  95648,  86171,  45839],
        [   101,  41026,  29440,  94638],
        [   101,  45135,  31014,  37297],
        [   101,  95305,  90805,  40710],
        [   101,  45135,  67841,  22437],
        [   101,  20901,  

In [12]:
out.shape

torch.Size([10, 45, 119547])

In [None]:
torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).sum().item()

0

{'input_ids': [101, 177, 16138, 11135, 21885, 102, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0]}

In [257]:
(torch.tensor([[0.1,0.1,0.8],[0.1,0.8,0.8],[0.1,0.1,0.8],[0.1,0.1,0.8]])==0.8).sum(axis=1)>0

tensor([True, True, True, True])

In [16]:
decoder.tokenizer(tgt,padding=False,add_special_tokens=False,return_tensors='pt')["input_ids"].shape

NameError: name 'encoder' is not defined

In [146]:
from transformers import BertTokenizerFast

In [122]:
len(torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'data/tokenizer/'))

Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main


30525

In [88]:
tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased')
model=torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased')

Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main
Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main
