In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

In [10]:
class Transformer(nn.Module):
  def __init__(
    self,
    embedding_size,
    src_vocab_size,
    trg_vocab_size,
    src_pad_idx,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len_s,
    max_len_t,
    device,
):
    super(Transformer, self).__init__()
    self.src_word_embedding = nn.Embedding(src_vocab_size, embedding_size)
    self.src_position_embedding = nn.Embedding(max_len_s, embedding_size)
    self.trg_word_embedding = nn.Embedding(trg_vocab_size, embedding_size)
    self.trg_position_embedding = nn.Embedding(max_len_t, embedding_size)

    self.device = device
    self.transformer = nn.Transformer(
        embedding_size,
        num_heads,
        num_encoder_layers,
        num_decoder_layers,
        forward_expansion,
        dropout,
    )
    self.fc_out = nn.Linear(embedding_size, trg_vocab_size)
    self.dropout = nn.Dropout(dropout)
    self.src_pad_idx = src_pad_idx

  def make_src_mask(self, src):
    src_mask = src.transpose(0, 1) == self.src_pad_idx

    # (N, src_len)
    return src_mask.to(self.device)

  def forward(self, src, trg):
    src_seq_length, N = src.shape
    trg_seq_length, N = trg.shape

    src_positions = (
        torch.arange(0, src_seq_length)
        .unsqueeze(1)
        .expand(src_seq_length, N)
        .to(self.device)
    )

    trg_positions = (
        torch.arange(0, trg_seq_length)
        .unsqueeze(1)
        .expand(trg_seq_length, N)
        .to(self.device)
    )

    embed_src = self.dropout(
        (self.src_word_embedding(src) + self.src_position_embedding(src_positions))
    )
    embed_trg = self.dropout(
        (self.trg_word_embedding(trg) + self.trg_position_embedding(trg_positions))
    )

    src_padding_mask = self.make_src_mask(src)
    trg_mask = self.transformer.generate_square_subsequent_mask(trg_seq_length).to(self.device)
    out = self.transformer(
        embed_src,
        embed_trg,
        src_key_padding_mask=src_padding_mask,
        tgt_mask=trg_mask,
    )
    out = self.fc_out(out)
    return out

In [29]:
import pickle

#load it
with open(f'test_cpu4ep.pickle', 'rb') as file:
    model = pickle.load(file)

#load it
with open(f'token_Sentence.pickle', 'rb') as file:
    token_Sentence = pickle.load(file)

In [14]:
def display_image(name):
  img = image.load_img(name,target_size=(512,512,3))
  img = image.img_to_array(img)
  img = img/255
  plt.imshow(img)

In [23]:
def tokenize(x):
    """
    Tokenize x
    :param x: List of sentences/strings to be tokenized
    :return: Tuple of (tokenized x data, tokenizer used to tokenize x)
    """
    tokenizer=Tokenizer()
    tokenizer.fit_on_texts(x)
    t=tokenizer.texts_to_sequences(x)
    # TODO: Implement
    return t, tokenizer


In [24]:
def pad(x, length=None):
  """
  Pad x
  :param x: List of sequences.
  :param length: Length to pad the sequence to.  If None, use length of longest sequence in x.
  :return: Padded numpy array of sequences
  """
  # TODO: Implement
  padding=pad_sequences(x,padding='post',maxlen=length)
  return padding

In [25]:
def preprocess(sentences):
  text_tokenized, text_tokenizer = tokenize(sentences)
  text_pad = pad(text_tokenized)
  return text_pad, text_tokenizer

In [33]:
import numpy as np

def create_batch(src, tar , batchsize , i):

  src, tar =  np.transpose(src[(i-1)*batchsize : (i-1)*batchsize + batchsize]) , np.transpose(tar[(i-1)*batchsize : (i-1)*batchsize + batchsize])

  return torch.tensor(src).long(),torch.tensor(tar).long()

In [36]:
import numpy as np
from tensorflow.keras.preprocessing import image
from keras.preprocessing.sequence import pad_sequences

name = "maksssksksss48.png"
img = image.load_img(name, target_size=(32,32,3))
img = image.img_to_array(img)
#img = img/255
#plt.imshow(img)
img = np.reshape(img,(32*32*3))
    
image_arr = []
image_arr.append(img)
img_arr = np.array(image_arr)

sent = " i am a boy with bg wings on a charoit by the pool on the river side"
sentence = []
sentence.append(sent)
sentence[0] = '<SOS> '+sentence[0]+'<EOS>'
max_len_t = len(sentence[0])

sentence = pad(token_Sentence.texts_to_sequences(sentence) , length = max_len_t)
src , tar = create_batch(img_arr,sentence, 1,1)
src = src.to(device)
tar = tar.to(device)
model.eval()
output =  model(src,tar)

IndexError: index out of range in self

In [None]:

#   image_location, sent = df.iloc[index,0], df.iloc[index,2]
  image_arr = []
  img = load_image('/content/sample_data/flickr30k_images/flickr30k_images/'+str(image_location))
  image_arr.append(img)
  img_arr = np.array(image_arr)
  sentence = []
  sentence.append(sent)
  sentence[0] = '<SOS> '+sentence[0]+'<EOS>'
  sentence = pad(token_Sentence.texts_to_sequences(sentence) , length = max_len_t)
  src , tar = create_batch(img_arr,sentence, 1,1)
  src = src.to(device)
  tar = tar.to(device)
  model.eval()
  output =  model(src,tar)
  loss = criterion(output.view(-1, output.shape[2]), tar.reshape(-1))
  sentence_formed = ''
  val, ind = torch.max(output.view(-1, output.shape[2]), 1)
  for word in ind:
      #print('--->'+sentence_formed+'    '+str(word.item()))
      if word.item() == 3: # EOS
              break
      for key, value in token_Sentence.word_index.items():
          #print(value == word.item())
          if value == word.item() and value != 2: # sos
              sentence_formed = sentence_formed + key +' '
              break
  display_image('/content/sample_data/flickr30k_images/flickr30k_images/'+str(image_location))
  return sentence_formed , loss