In [240]:
from torch import math
import torch
import torch.nn as nn
import os, logging
from torch.utils.data import Dataset,DataLoader

from torchvision import transforms
import cv2
import numpy as np


class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position=torch.arange(max_len).unsqueeze(1)
        div_term=1/(10000**(torch.arange(0,d_model,2)/d_model))
        pe=torch.zeros(max_len,d_model)
        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)
        self.register_buffer('pe', pe)
    def forward(self,patches):
        return self.dropout(patches+self.pe[:patches.shape[1]])



# class ImgPatcher(nn.Module):
#     def __init__(self,n_patches:int,emb_d:int,channels_first:bool,img_shape:tuple):
#         super(ImgPatcher,self).__init__()
#         self.channels_first=channels_first
#         H,W,C=img_shape
#         if self.channels_first:
#             C,H,W=img_shape
#         self.chw=(C,H,W)
#         assert H==W, "Please make sure image has same height and width"
#         assert H%n_patches==0, "Please make sure n_patches is factor of image dims H and W"
#         self.n_patches=n_patches
#         self.p=H//n_patches
#         self.embedding=nn.Linear(H*W*C//n_patches**2,emb_d)                            #n_patches^2 patches of size (H/n_patches*W/n_patches)*C will be created from each image
#         print(self.embedding.weight.shape)
    
#     def forward(self,img):
#         img=img.float()
#         N=img.size(0)
#         if self.channels_first:
#             img=img.permute(0,2,3,1)                #N,C,H,W -> N,H,W,C
#         patches=torch.zeros(N,self.n_patches**2,self.embedding.weight.shape[1])
#         for idx in range(N):
#             img_=img[idx]
#             for i in range(self.n_patches):
#                 for j in range(self.n_patches):
#                     patch=img_[i*self.p:(i+1)*self.p,j*self.p:(j+1)*self.p]
#                     patches[idx,(i*self.n_patches)+j]=patch.flatten()
#         return self.posenc(self.embedding(patches))







'medicalai/ClinicalBERT'

In [14]:
import configparser


def load_config_ini(config_path):
    config=configparser.ConfigParser()
    config.read(config_path)
    config_=dict()
    for section in config:
        for param in section:
            if len(config[section][param])>0:
                config_[param]=int(config[section][param]) if config[section][param].isdigit() else config[section][param]
            else:
                config_[param]=None
    return config_


<class 'str'>
'medicalai/ClinicalBERT',


In [40]:
from datetime import datetime
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

SyntaxError: expected ':' (3637728078.py, line 5)

In [53]:
from dataset import Stream_Dataset,HFTokenizer
training_loader=Stream_Dataset(
    data_dir='flikr8k',
    csv_dir='train.csv',
    img_dir='images',
    imgsz=config.imgsz,
    tokenizer_model=config.tokenizer_model
)

ValueError: Cannot remove the default section.

In [241]:
import torchvision.io
import torch
from torch.utils.data import Dataset

def load_tokenizer(tokenizer_model:str,data_dir:str=None):
    if not os.path.exists('data/tokenizer'):
        tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer',tokenizer_model)
        logging.info("Updating Tokenizer Vocab on Data")
        #insert code to get to process data_dir
        #[tokenizer(i) for i in tqdm(data_dir[:,1])]
        tokenizer.save_pretrained('data/tokenizer')
        return tokenizer
    else:
        tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer','data/tokenizer/')
        return tokenizer




class Stream_Dataset(Dataset):
    def __init__(self,seq_length,data_dir,img_dir,tokenizer_model='medicalai/ClinicalBERT'):
        self.data_dir=data_dir
        self.data=pd.read_csv(data_dir+'/'+'result0.csv').to_numpy()
        self.img_dir=img_dir
        self.max_length=seq_length
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self,idx):
        img_name=self.data[idx,0]
        semantic_str=self.data[idx,1]
        img=torchvision.io.read_image(self.data_dir+'/'+self.img_dir+'/'+img_name)
        return img,semantic_str
# dataset=Stream_Dataset(
#         seq_length=50,
#         data_dir="/home/mehedi/Desktop/raghib/FLICKR-30K IMAGE CAPTIONING/flickr30k_images",
#         img_dir='flickr30k_images',
#         tokenizer_model='bert-base-uncased'
# )


In [242]:

import torch.nn as nn
import torch
from torch import Tensor
from typing import Optional, Any, Union, Callable
from torch.nn import functional as F
from torch.nn import MultiheadAttention, LayerNorm,Dropout, Linear,TransformerEncoderLayer
def _get_activation_fn(activation: str) -> Callable[[Tensor], Tensor]:
    if activation == "relu":
        return F.relu
    elif activation == "gelu":
        return F.gelu

    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))

class CustomTransformerDecoderLayer(nn.Module):
    __constants__ = ['batch_first', 'norm_first']

    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False, 
                 need_attn: bool = True, device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                            **factory_kwargs)
        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                                 **factory_kwargs)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward, **factory_kwargs)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm_first = norm_first
        self.norm1 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm3 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)
        # Legacy string support for activation function.
        if isinstance(activation, str):
            self.activation = _get_activation_fn(activation)
        else:
            self.activation = activation
        self.need_attn=need_attn
    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super().__setstate__(state)

    def forward(
        self,
        tgt: Tensor,
        memory: Tensor,
        tgt_mask: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        tgt_is_causal: bool = False,
        memory_is_causal: bool = False,
    ) -> Tensor:
        r"""Pass the inputs (and mask) through the decoder layer.

        Args:
            tgt: the sequence to the decoder layer (required).
            memory: the sequence from the last layer of the encoder (required).
            tgt_mask: the mask for the tgt sequence (optional).
            memory_mask: the mask for the memory sequence (optional).
            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
            memory_key_padding_mask: the mask for the memory keys per batch (optional).
            tgt_is_causal: If specified, applies a causal mask as tgt mask.
                Mutually exclusive with providing tgt_mask. Default: ``False``.
            memory_is_causal: If specified, applies a causal mask as tgt mask.
                Mutually exclusive with providing memory_mask. Default: ``False``.
        Shape:
            see the docs in Transformer class.
        """
        # see Fig. 1 of https://arxiv.org/pdf/2002.04745v1.pdf

        x = tgt
        if self.norm_first:
            x = x + self._sa_block(x=self.norm1(x), attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask, is_causal=tgt_is_causal)
            x = x + self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask, memory_is_causal)
            x = x + self._ff_block(self.norm3(x))
        else:
            x_sa = self.norm1(x + self._sa_block(x=x, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask, is_causal=tgt_is_causal))
            x,attn=self._mha_block(x, memory, memory_mask, memory_key_padding_mask, memory_is_causal,)
            x = self.norm2(x_sa + x)
            x = self.norm3(x + self._ff_block(x))

        return x,attn


    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor], is_causal: bool = False) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           is_causal=is_causal,
                           need_weights=False)[0]
        return self.dropout1(x)

    # multihead attention block
    def _mha_block(self, x: Tensor, mem: Tensor,
                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor], is_causal: bool = False) -> Tensor:
        x,attn= self.multihead_attn(x, mem, mem,
                                attn_mask=attn_mask,
                                key_padding_mask=key_padding_mask,
                                is_causal=is_causal,
                                average_attn_weights=self.need_attn)

        return self.dropout2(x),attn

    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout3(x)


In [243]:
def load_bert_embedder(tokenizer_model:str,tokenizer=None):
    tokenizer_file_name= tokenizer.name if tokenizer else tokenizer_model
    assert tokenizer_file_name==tokenizer_model, "Please make sure the HuggingFace tokenizer and embedder are from same repo"
    mod_pth=os.path.join("data",tokenizer_file_name.split('/')[-1],"model")
    model=torch.hub.load('huggingface/pytorch-transformers', 'model',(mod_pth if os.path.exists(mod_pth) else tokenizer_model))
    # model=torch.hub.load('huggingface/pytorch-transformers', 'model',mod_pth) 
    model=update_bert_model(model,tokenizer)
    return model



def update_bert_model(model,tokenizer=None):
    logging.info("Updating Model based on Tokenizer")
    tokenizer_file_name=model.name_or_path
    tok_pth=os.path.join("data",tokenizer_file_name.split('/')[-1],"tokenizer")
    if not tokenizer:
        print('NOT TOKENIZER')
        if not os.path.exists(tok_pth):
            raise ValueError(f"Please update tokenizer with desired dataset and keep at {tok_pth}")
        tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'tokenizer',tok_pth)
    model_vocab_len=model.embeddings.word_embeddings.weight.shape[0]
    if model_vocab_len!=len(tokenizer):
        model.resize_token_embeddings(len(tokenizer))
        model.save_pretrained(tok_pth.replace('tokenizer','model'))
    return model


load_bert_embedder(tokenizer_model='bert-base-uncased',tokenizer=tokenizer)



AssertionError: Please make sure the HuggingFace tokenizer and embedder are from same repo

In [33]:
from typing import Tuple,Callable,List,Union
from torch import nn
def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask




def load_bert_embedder(tokenizer_model:str,tokenizer=None):
    tokenizer_file_name= tokenizer.name if tokenizer else tokenizer_model
    assert tokenizer_file_name==tokenizer_model, "Please make sure the HuggingFace tokenizer and embedder are from same repo"
    mod_pth=os.path.join("data",tokenizer_file_name.split('/')[-1],"model")
    model=torch.hub.load('huggingface/pytorch-transformers', 'model',(mod_pth if os.path.exists(mod_pth) else tokenizer_model))
    # model=torch.hub.load('huggingface/pytorch-transformers', 'model',mod_pth) 
    model=update_bert_model(model,tokenizer,tokenizer_model)
    return model



def update_bert_model(model,tokenizer=None,tokenizer_model=None):
    logging.info("Updating Model based on Tokenizer")
    tokenizer_file_name=tokenizer.name if not tokenizer_model else tokenizer_model
    tok_pth=os.path.join("data",tokenizer_file_name.split('/')[-1],"tokenizer")
    if not tokenizer:
        if not os.path.exists(tok_pth):
            raise ValueError(f"Please update tokenizer with desired dataset and keep at {tok_pth}")
        else:
            tokenizer=torch.hub.load('huggingface/pytorch-transformers', 'model',tok_pth)
    model_vocab_len=model.embeddings.word_embeddings.weight.shape[0]
    if model_vocab_len<len(tokenizer):
        model.resize_token_embeddings(len(tokenizer))
    model.save_pretrained(tok_pth.replace('tokenizer','model'))
    return model


class ImgPatcher(nn.Module):
    def __init__(self,n_patches:int,patch_dims:int,img_chw:Tuple[int,int,int]):
        super(ImgPatcher,self).__init__()
        C,H,W=img_chw
        assert torch.math.ceil(torch.math.sqrt(n_patches))-torch.math.sqrt(n_patches)==0, "Please make sure n_patches is of square dimensions"
        assert H==W, "Please make sure that the image is a square image"
        self.n_patches=n_patches
        self.patch_dims=patch_dims
        self.kernel_size=int(H//torch.math.sqrt(n_patches))
        self.conv=nn.Conv2d(in_channels=C,out_channels=self.patch_dims,kernel_size=self.kernel_size,stride=self.kernel_size)
    def forward(self,x):
        x=self.conv(x)
        x=x.permute(0,2,3,1)
        x_emb=x.reshape(x.size(0),self.n_patches,x.size(-1))
        return x_emb


class SpatialStream(nn.Module):
    def __init__(self, n_patches:int,img_chw:Tuple[int,int,int],dropout:float=0.1,d_model:int=768,nhead:int=8,dim_feedforward:int=2048,activation:str='relu',num_ts_blocks:int=2):
        super(SpatialStream,self).__init__()
        self.patcher=ImgPatcher(n_patches,d_model,img_chw)
        self.pos_enc=PositionalEncoding(d_model,dropout,n_patches)
        self.spattransformer_blocks=nn.ModuleList([TransformerEncoderLayer(d_model,nhead,dim_feedforward,batch_first=True,activation=activation) for i in range(num_ts_blocks)])
        
    def forward(self,x):
        embedded=self.patcher(x)
        encoded=self.pos_enc(embedded)
        enc_memory_list=[]
        for encoder_layer in self.spattransformer_blocks:
            encoded=encoder_layer(encoded)
            enc_memory_list.append(encoded)
        return enc_memory_list

class SemanticStream(nn.Module):
    def __init__(self,max_seql:int,tokenizer=None,dropout:float=0.1,num_ts_blocks:int=2,d_model:int=512,nhead:int=8,dim_feedforward:int=2048,activation:str='relu',tokenizer_model='medicalai/ClinicalBERT'):
        super(SemanticStream,self).__init__()
        self.max_seql=max_seql
        self.bert_embedding = load_bert_embedder(tokenizer_model=tokenizer_model,tokenizer=tokenizer)
        self.pos_enc=PositionalEncoding(d_model,dropout,max_seql)
        self.semtransformer_blocks=nn.ModuleList([CustomTransformerDecoderLayer(d_model,nhead,dim_feedforward,batch_first=True,activation=activation) for _ in range(num_ts_blocks)])
        self.out=nn.Linear(d_model,self.bert_embedding.embeddings.word_embeddings.weight.shape[0])
    def forward(self,target=None,enc_memory_list=None):
        if isinstance(target[0],str):
            tgt=tokenizer(target,max_length=self.max_seql,padding='max_length' if self.training else True,truncation=True,return_tensors='pt',add_special_tokens=self.training)
        else:
            tgt=target
        tgt_padmask=(tgt==0)
        tgt_mask=generate_square_subsequent_mask(tgt.shape[-1])
        embedded=self.bert_embedding(input_ids=tgt,attention_mask=tgt_padmask).last_hidden_state
        encoded=self.pos_enc(embedded)
        for idx,(decoder_layer,memory) in enumerate(zip(self.semtransformer_blocks,enc_memory_list)):
            encoded,attn=decoder_layer(encoded,memory,tgt_mask=tgt_mask,tgt_key_padding_mask=tgt_padmask,tgt_is_causal=False)
        out=self.out(encoded)
        return out,attn
 

    def evaluate(self,enc_memory_list=None):
        batch_size=enc_memory_list[0].shape[0]
        logits=torch.tensor([tokenizer.vocab['[CLS]'] for _ in range(batch_size)]).reshape(-1,1)
        finished_probs=torch.zeros(batch_size,self.max_seql)    
        seq_idx=0
        completed_idx=[]
        incompleted_idx=np.arange(batch_size)

        while True:
            start=time.time()
            if logits.shape[1]==self.max_seql or incompleted_idx.size==0:
                break
            out,_=self.forward(target=logits,enc_memory_list=enc_memory_list)
            mid=time.time()
            lastout=out[:,-1].unsqueeze(1)
            probs=F.log_softmax(lastout,dim=-1)
            top_prob=probs.topk(1)[1].squeeze(-1)
            logits=torch.cat([logits,top_prob],axis=1) if seq_idx>0 else top_prob
            completed_bool=(top_prob==tokenizer.vocab['[SEP]']).flatten()
            c_idx=incompleted_idx[np.nonzero(completed_bool)]
            c_idx=c_idx.tolist() if hasattr(c_idx,'__iter__') else [c_idx]
            
            if len(c_idx)>0:
                incompleted_idx=np.array(list(set(incompleted_idx).difference(c_idx)))
                completed_idx.extend(c_idx)
                finished_probs[c_idx,:seq_idx+1]+=logits[np.where(completed_bool)[0],:seq_idx+1]
            logits=logits[completed_bool==False]
            seq_idx+=1
            end=time.time()
            print(f"inference:{mid-start}, Complete_Indexing={end-mid}")
        start=time.time()
        if incompleted_idx.size!=0:
            finished_probs[incompleted_idx]+=logits
        out,attn=self.forward(target=finished_probs.int(),enc_memory_list=enc_memory_list)
        end=time.time()
        print(end-start)
        return out,attn
      
        


class TwoStreamTransformer(nn.Module):
    def __init__(self, n_patches:int,img_chw:Tuple[int,int,int],max_seql:int,dropout:float=0.1,d_model:int=768,nhead:int=8,dim_feedforward:int=2048,activation:str='relu',num_ts_blocks:int=2,tokenizer_model:any='medicalai/ClinicalBERT'):
        super(TwoStreamTransformer,self).__init__()
        self.encoder=SpatialStream(n_patches,img_chw,dropout,d_model,nhead,dim_feedforward,activation,num_ts_blocks)
        self.decoder=SemanticStream(max_seql,dropout,num_ts_blocks,d_model,nhead,dim_feedforward,activation,tokenizer_model)

    def forward(self,images,target=None):
        enc_memory_list=self.encoder(images)
        if not self.training:
            logits,attn=self.decoder.evaluate(enc_memory_list=enc_memory_list)
        else:
            logits,attn=self.decoder(enc_memory_list=enc_memory_list,target=target)
        return logits,attn            



        




Model
imgsz
n_patches
max_seql
d_model
nhead
dim_feedforward
num_ts_blocks
tokenizer_model
activation
Training
num_epochs
optimizer
learning_rate
momentum
rms_alpha
device
precision
device_n
train_size
eval_size
Data
data_dir
img_dir
csv_dir


{'imgsz': 300,
 'n_patches': 36,
 'max_seql': 20,
 'd_model': 768,
 'nhead': 8,
 'dim_feedforward': 2048,
 'num_ts_blocks': 4,
 'tokenizer_model': 'medicalai/ClinicalBERT',
 'activation': 'relu',
 'num_epochs': 20,
 'optimizer': 'adam                 #adam, sgd, rmsprop',
 'learning_rate': '1e-5',
 'momentum': '0.9',
 'rms_alpha': '0.99',
 'device': 'cuda',
 'precision': 'mixed-16         #Floating Point Precision | Options: [16-mixed|bf16-mixed|32-true|64-true|64|32|16|bf16]',
 'device_n': '1                 #Number of Devices',
 'train_size': 20,
 'eval_size': 10,
 'data_dir': 'flickr8k',
 'img_dir': 'images',
 'csv_dir': None}

In [8]:
from torchmetrics.text import BLEUScore

In [32]:
def load_config_ini(config_path):
    config=configparser.ConfigParser()
    config.read(config_path)
    config_=dict()
    for section in [i for i in config if 'DEFAULT' not in i]:
        print(section)
        for param in config[section]:
            print(param)
            if len(config[section][param])>0:
                config_[param]=int(config[section][param]) if config[section][param].isdigit() else config[section][param]
            else:
                config_[param]=None
    return config_


In [26]:
from config import Config
config=Config(config_path='config.ini')

KeyError: 'M'

In [6]:
from torchmetrics import ROUGEScore, BLEU, METEOR

ImportError: cannot import name 'ROUGEScore' from 'torchmetrics' (f:\Users\noorr\anaconda3\envs\torch_\lib\site-packages\torchmetrics\__init__.py)

In [286]:
import time
memory=[torch.rand([len(y),36,768]) for _ in range(2)]
y=torch.tensor([[   101,  10551,  14739,  75980,  12682,  10169,  48201,  99274,  10157,
          40830,  25157,  10160,  10455,  27925,  11371,  60083,  10230,  10950,
          10106,    102],
        [   101,  11736,  10588,  10106,  19118,  11250,  10107,  10301,  24210,
            169,  49429,  80870,  13005,  11787,    119,    102,      0,      0,
              0,      0],
        [   101,    169,  18048,  10106,    169,  78200,  67348,  10124, 106793,
          10741,    169,  11847,  10108,  16527,  35819,  10106,  10151,  14722,
          13170,    102],
        [   101,  30455,  10106,    169,  23254,  81050,  10111,  11250,  10124,
          32173,  10135,  16527,  10835,  10111,  20169,  11269,  11327,    169,
          39051,    102]])

decoder=SemanticStream(max_seql=20,tokenizer=tokenizer,d_model=768,num_ts_blocks=2)


decoder.eval()
out,attn=decoder.evaluate(enc_memory_list=memory)
out.shape

Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main


inference:2.41316294670105, Complete_Indexing=1.064598798751831
inference:0.5931708812713623, Complete_Indexing=0.05593156814575195
inference:0.16003131866455078, Complete_Indexing=0.17299985885620117
inference:0.5084817409515381, Complete_Indexing=0.021002531051635742
inference:0.17104220390319824, Complete_Indexing=0.022999286651611328
inference:0.1816844940185547, Complete_Indexing=0.008000850677490234
inference:0.16700100898742676, Complete_Indexing=0.01400136947631836
inference:0.11499786376953125, Complete_Indexing=0.010000944137573242
inference:0.11900186538696289, Complete_Indexing=0.009997844696044922
inference:0.12399959564208984, Complete_Indexing=0.01100301742553711
inference:0.16499781608581543, Complete_Indexing=0.011003255844116211
inference:0.17501568794250488, Complete_Indexing=0.012001752853393555
inference:0.13351750373840332, Complete_Indexing=0.05500316619873047
inference:0.21015024185180664, Complete_Indexing=0.01000213623046875
inference:0.1739969253540039, Compl

torch.Size([4, 20, 119547])

In [19]:
from torchmetrics.text import TranslationEditRate
metric = TranslationEditRate(return_sentence_level_score=True)
preds = ['the cat is on the mat', 'there is no cat on the mat']
target = [ 'no feline over the mat','there is a cat on the mat',]
metric(preds, target)

(tensor(0.4167), tensor([0.8000, 0.1429]))

In [2]:
bleu_fn=BLEUScore()

In [10]:
l=['i like food ', 'i love food']; l2=['i like food']
bleu_fn(l,l2).item()

0.0

In [None]:
# activation='relu'
# num_ts_blocks=2
model=TwoStreamTransformer(n_patches=36,img_chw=(3,300,300),max_seql=45,activation=activation,num_ts_blocks=num_ts_blocks)
# encoder=SpatialStream(n_patches=36,img_chw=(3,300,300),activation=activation,num_ts_blocks=num_ts_blocks)
# decoder=SemanticStream(max_seql=45,d_model=768,num_ts_blocks=num_ts_blocks)
model.eval()
model(images=x,target=y)

Using cache found in /home/mehedi/.cache/torch/hub/huggingface_pytorch-transformers_main


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) torch.float32


RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [None]:
model.train()
x,y=torch.rand((10,3,300,300)),['i love chicken' for _ in range(10)]
logits,attn=model(images=x,target=y)

Traceback (most recent call last):
  File "_pydevd_bundle/pydevd_cython.pyx", line 1078, in _pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch
  File "_pydevd_bundle/pydevd_cython.pyx", line 297, in _pydevd_bundle.pydevd_cython.PyDBFrame.do_wait_suspend
  File "/home/mehedi/anaconda3/envs/hf_env/lib/python3.10/site-packages/debugpy/_vendored/pydevd/pydevd.py", line 1976, in do_wait_suspend
    keep_suspended = self._do_wait_suspend(thread, frame, event, arg, suspend_type, from_this_thread, frames_tracker)
  File "/home/mehedi/anaconda3/envs/hf_env/lib/python3.10/site-packages/debugpy/_vendored/pydevd/pydevd.py", line 2011, in _do_wait_suspend
    time.sleep(0.01)
KeyboardInterrupt


KeyboardInterrupt: 

In [260]:
tokenizer=HFTokenizer('medicalai/ClinicalBERT',20)

Using cache found in C:\Users\noorr/.cache\torch\hub\huggingface_pytorch-transformers_main


In [None]:
len(load_tokenizer(tokenizer_model='medicalai/ClinicalBERT'))

Using cache found in /home/mehedi/.cache/torch/hub/huggingface_pytorch-transformers_main


119547

In [None]:
start=time.time()
torch.tril(torch.ones(45,5))
print(time.time()-start)

0.010996341705322266


In [None]:
out.shape

torch.Size([10, 45, 119547])

In [None]:
x,y=torch.rand((10,3,300,300)),torch.zeros(10,1)
y+=101
mem=encoder(x)
out,attn,temptok=decoder.evaluate(mem,get_logits=True)
print()
# print(decoder.training)
# y_hat,attention=decoder(enc_memory_list=mem,target=y)



tensor([[   101,  95648],
        [   101,  41026],
        [   101,  45135],
        [   101,  95305],
        [   101,  45135],
        [   101,  20901],
        [   101,  95648],
        [   101,  57118],
        [   101,  45135],
        [   101, 102133]]) ############################################################ torch.Size([10, 2])
tensor([[   101,  95648,  86171],
        [   101,  41026,  29440],
        [   101,  45135,  31014],
        [   101,  95305,  90805],
        [   101,  45135,  67841],
        [   101,  20901,  64118],
        [   101,  95648,  31014],
        [   101,  57118,  90805],
        [   101,  45135,  96073],
        [   101, 102133,  45135]]) ############################################################ torch.Size([10, 3])
tensor([[   101,  95648,  86171,  45839],
        [   101,  41026,  29440,  94638],
        [   101,  45135,  31014,  37297],
        [   101,  95305,  90805,  40710],
        [   101,  45135,  67841,  22437],
        [   101,  20901,  