<a href="https://colab.research.google.com/github/prajaktakini/Implement-RNN-From-Scratch/blob/main/From_Scratch_RNN_Encoder_Decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
drive_path = '/content/drive/My Drive/Projects/Language Models'


In [None]:
!pip install datasets==2.15.0




In [None]:
import torch
import numpy as np
import random
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
from collections import Counter

from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import nltk

In [None]:
class Tokenizer:

  def __init__(self, pad_token='<pad>', unk_token='<unk>', start_token = '<s>', end_token = '</s>'):
      self.word2idx = {
          pad_token: 0,
          unk_token: 1,
          start_token: 2,
          end_token: 3
      }
      self.idx2word = {v: k for k, v in self.word2idx.items()}
      self.special_tokens = [pad_token, unk_token, start_token, end_token]


  def fit(self, sentences: list[str], min_freq:int=1):
      # Build vocabulary from list of sentences
      word_counts = Counter()

      for sentence in sentences:
          words = sentence.strip().split()
          word_counts.update(words)

      for word, count in word_counts.items():
          # Only consider the word which has >= min_freq
          if count >= min_freq and word not in self.word2idx:
              self.word2idx[word] = len(self.word2idx)
              self.idx2word[len(self.idx2word)] = word


  def encode(self, sentence: str, add_special_tokens: bool = True):
      words = sentence.strip().split()
      if add_special_tokens:
          words = [self.special_tokens[2]] + words + [self.special_tokens[3]]

      # Get index of every word in this sentence, if this word does not appear in the vocab dictionary, return self.special_tokens[1] = "<unk>"
      return [self.word2idx.get(word, self.word2idx[self.special_tokens[1]]) for word in words]


  def decode(self, indices: list[int], skip_special_tokens: bool = True):
      # Converts sequence of indices to sequence of words

      words = [self.idx2word[idx] for idx in indices]
      if skip_special_tokens:
          words = [word for word in words if word not in self.special_tokens]
      return " ".join(words)


  def __len__(self):
    return len(self.word2idx)


In [None]:
# https://huggingface.co/datasets/anujsahani01/English-Marathi
class TranslationDataset(Dataset):

  def __init__(self, english_setences: list[str], marathi_setences: list[str], en_tokenizer: Tokenizer, mar_tokenizer: Tokenizer):
    self.english_sentences = english_setences
    self.marathi_sentences = marathi_setences
    self.en_tokenizer = en_tokenizer
    self.mar_tokenizer = mar_tokenizer

    # Pad and mask sequences during dataset preparation
    self.padded_data = self._pad_and_mask_sequences()


  def _pad_and_mask_sequences(self):
    # Pad and mask sequences during dataset preparation
    padded_data = []

    max_en_length = 105
    max_mr_length = 105

    for en_sentence, mr_sentence in zip(self.english_sentences, self.marathi_sentences):
      en_encoded = self.en_tokenizer.encode(en_sentence)
      mr_encoded = self.mar_tokenizer.encode(mr_sentence)

      # pad sequences
      en_padded = en_encoded + [self.en_tokenizer.word2idx['<pad>']] * (max_en_length - len(en_encoded))
      mr_padded = mr_encoded + [self.mar_tokenizer.word2idx['<pad>']] * (max_mr_length - len(mr_encoded))

      # create attention masks
      en_mask = [1] * len(en_encoded) + [0] * (max_en_length - len(en_encoded))
      mr_mask = [1] * len(mr_encoded) + [0] * (max_mr_length - len(mr_encoded))

      padded_data.append({
          "en_text": en_sentence,
          "mr_text": mr_sentence,
          "en_input_ids": en_padded,
          "mr_input_ids": mr_padded,
          "en_attention_mask": en_mask,
          "mr_attention_mask": mr_mask,
      })

    return padded_data


  def __len__(self):
    return len(self.english_sentences)


  def __getitem__(self, index):
    return self.padded_data[index]


In [None]:
class Helper:

  def collate_batch(batch, device):
    "Collate batch of examples with padding"

    # convert lists to tensors
    en_input_ids = torch.tensor([item['en_input_ids'] for item in batch], dtype=torch.long).to(device)
    mr_input_ids = torch.tensor([item['mr_input_ids'] for item in batch], dtype=torch.long).to(device)
    en_attention_mask = torch.tensor([item['en_attention_mask'] for item in batch], dtype=torch.bool).to(device)
    mr_attention_mask = torch.tensor([item['mr_attention_mask'] for item in batch], dtype=torch.bool).to(device)

    return {
        "en_input_ids": en_input_ids,
        "mr_input_ids": mr_input_ids,
        "en_attention_mask": en_attention_mask,
        "mr_attention_mask": mr_attention_mask,
        "en_text": [item['en_text'] for item in batch],
        "mr_text": [item['mr_text'] for item in batch]
    }

In [None]:
# Define the activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def softmax(x):
    exp_x = np.exp(x - np.max(x))  # For numerical stability
    return exp_x / np.sum(exp_x, axis=0)

In [None]:
class EncoderRNN:

    def __init__(self, vocab_size, embedding_dim, hidden_size, device):
      self.vocab_size = vocab_size # Size of the vocabulary
      self.embedding_dim = embedding_dim # Size of the embedding vectors
      self.hidden_size = hidden_size
      self.device = device

      # Embedding layer: maps token indices to dense vectors
      self.embedding = torch.nn.Embedding(vocab_size, embedding_dim).to(device)

      # Encoder parameters
      self.W_ih = torch.randn(hidden_size, embedding_dim, device=device) * (2.0 / embedding_dim) ** 0.5 # Input to hidden
      self.W_hh = torch.randn(hidden_size, hidden_size, device=device) * (2.0 / hidden_size) ** 0.5 # Hidden to Hidden
      self.b_h = torch.zeros(hidden_size, 1, device=device) # Hidden bias

    def forward(self, input_seq):
      batch_size, seq_len = input_seq.shape # Input shape: (batch_size, seq_length)

      # Convert token indices to dense vectors
      inputs_embedded = self.embedding(input_seq.to(self.device)) # Shape: (batch_size, seq_length, embedding_dim)

      hidden_state = torch.zeros(batch_size, self.hidden_size, 1, device=self.device) # (batch, hidden, 1) # Move hidden state to GPU
      hidden_states = [] # This will store tensors, so ensure they are on the GPU

      # Process each time step
      for t in range(seq_len):
          hidden_state = torch.tanh(
                torch.einsum('ij,bj->bi', self.W_ih, inputs_embedded[:, t, :])[:, :, None] +
                torch.einsum('ij,bj->bi', self.W_hh, hidden_state[:, :, 0])[:, :, None] +
                self.b_h
            )
          # H * embedding dim batch size * embedding dim -> batch size * hidden size * 1
          # H * H, Batch * Hidden -> Batch * Hidden * 1
          # +
          # print("hidden state ")
          # print(hidden_state.shape)
          # print(hidden_state)
          hidden_states.append(hidden_state) # Each hidden_state is already on the GPU


      return hidden_states[-1], hidden_states


    def backward(self, d_hidden, inputs, enc_hidden_states, learning_rate=1e-3):
      batch_size, seq_len = inputs.shape
      dW_ih, dW_hh = torch.zeros_like(self.W_ih), torch.zeros_like(self.W_hh)
      db_h = torch.zeros_like(self.b_h)

      # Convert token indices to dense vectors
      inputs_embedded = self.embedding(inputs.to(self.device)) # Shape: (batch_size, seq_len, embedding_dim)

      for t in reversed(range(seq_len)):
          # Derivative of tanh
          d_hidden = d_hidden * (1 - enc_hidden_states[t] ** 2) # Shape: (batch_size, hidden_size, 1)


          # Gradients for input-to-hidden and hidden-to-hidden weights
          dW_ih += torch.einsum('bi,bj->ij', d_hidden[:, :, 0], inputs_embedded[:, t, :]) # batch * hidden * (batch, embedding) -> hidden * embedding
          dW_hh += torch.einsum('bi,bj->ij', d_hidden[:, :, 0], (enc_hidden_states[t - 1][:, :, 0] if t > 0 else torch.zeros_like(enc_hidden_states[0][:, :, 0])))
          # enc_hidden_state[t - 1] = shape(batch, hidden, 1)
          # (batch * hidden) * (batch * hidden) -> (hidden * hidden)
          db_h += torch.sum(d_hidden, dim=0) # Shape: (hidden, 1)

          # Propagate gradients to previous time step
          d_hidden = torch.einsum('ij,bj->bi', self.W_hh.T, d_hidden[:, :, 0])[:, :, None] # (hidden * hidden) * (batch * hidden) -> (batcj * hidden, 1)



      # Update parameters using gradient descent
      self.W_ih -= learning_rate * dW_ih / batch_size
      self.W_hh -= learning_rate * dW_hh / batch_size
      self.b_h -= learning_rate * db_h / batch_size


In [None]:

class DecoderRNN:

    def __init__(self, vocab_size, embedding_dim, hidden_size, device):
      self.vocab_size = vocab_size # Size of the vocabulary
      self.embedding_dim_enc = embedding_dim # Size of the embedding vectors
      self.hidden_size = hidden_size
      self.device = device

      # Embedding Layer: Maps token indices to dense vectors
      self.embedding = torch.nn.Embedding(vocab_size, embedding_dim).to(device) # we are doing neural machine translation so languages are not shared between Encoder and Decoder hence define another embedding layer

      # Decoder parameters
      self.W_ih = torch.randn(hidden_size, embedding_dim, device=device) * (2.0 / embedding_dim) ** 0.5 # Input to hidden
      self.W_hh = torch.randn(hidden_size, hidden_size, device=device) * (2.0 / hidden_size) ** 0.5 # Hidden to Hidden
      self.W_ho = torch.randn(vocab_size, hidden_size, device=device) * (2.0 / hidden_size) ** 0.5 # Hidden to Output

      self.b_h = torch.zeros(hidden_size, 1, device=device) # Hidden bias
      self.b_o = torch.zeros(vocab_size, 1, device=device) # Output bias


    def forward(self, target_seq, hidden_state):
      batch_size, seq_len = target_seq.shape
      target_embedded = self.embedding(target_seq.to(self.device)) # Use decoder embedding # Shape: (batch_size, seq_length, embedding_dim)

      outputs, predictions = [], []
      loss = 0
      hidden_states = []  # Store hidden states

      for t in range(seq_len):
          # Update hidden state
          hidden_state = torch.tanh(
                torch.einsum('ij,bj->bi', self.W_ih, target_embedded[:, t, :])[:, :, None] +
                torch.einsum('ij,bj->bi', self.W_hh, hidden_state[:, :, 0])[:, :, None] +
                self.b_h
            )
          hidden_states.append(hidden_state)

#h * e * b * e -> b * h

          #output = torch.einsum('ij,bj->bi', self.W_ho, hidden_state[:, :, 0]) + self.b_o
          output = torch.einsum('ij,bj->bi', self.W_ho, hidden_state.squeeze(-1)) + self.b_o.squeeze(-1)

          # Calculate Probabilities with Numerical Stability
          output_exp = torch.exp(output - torch.max(output, dim=1, keepdim=True).values)
          probs = output_exp / torch.sum(output_exp, dim=1, keepdim=True)


          # Calculate Loss with Epsilon
          epsilon = 1e-8  # Small positive value
          loss -= torch.sum(torch.log(probs + epsilon)[torch.arange(batch_size), torch.argmax(target_embedded[:, t, :], dim=1)])

          outputs.append(probs)
          predictions.append(torch.argmax(probs, dim=1))

      return torch.stack(outputs), loss / (batch_size * seq_len), hidden_states


    def backward(self, outputs, target_seq, dec_hidden_states, learning_rate=1e-3):
        batch_size, seq_len = target_seq.shape
        target_embedded = self.embedding(target_seq.to(self.device)) # Use decoder embedding # Shape: (batch_size, seq_length, embedding_dim)


        dW_ih, dW_hh, dW_ho = torch.zeros_like(self.W_ih), torch.zeros_like(self.W_hh), torch.zeros_like(self.W_ho)
        db_h, db_o = torch.zeros_like(self.b_h), torch.zeros_like(self.b_o)
        d_hidden = torch.zeros(batch_size, self.hidden_size, 1, device=self.device)

        # Convert target tokens to one-hot vectors
        target_one_hot = torch.zeros(batch_size, seq_len, self.vocab_size, device=self.device)
        target_one_hot.scatter_(dim=2, index=target_seq.unsqueeze(-1), value=1)  # Shape: (batch_size, seq_len, vocab_size)

        # Convert token indices to dense vectors
        #target_embedded = self.embedding(target_seq) # Shape: (batch_size, seq_len, embedding_dim)

        for t in reversed(range(seq_len)):


            # Output error (Gradient of Softmax)
            output_error = (outputs[t] - target_one_hot[:, t, :])[:, :, None] # Shape: (batch_size, vocab_size, 1)


            # dec_hidden_states = # (batch, hidden, 1)
            # Gradients for output layer
            dW_ho += torch.einsum('bi,bj->ij', output_error[:, :, 0], dec_hidden_states[t][:, :, 0])  # batch * vocab * (batch * hidden) -> vocab * hidden
            db_o += torch.sum(output_error, dim=0) # (vocab, 1)

            # Gradients for hidden state
            d_hidden = torch.einsum('ji,bj->bi', self.W_ho, output_error[:, :, 0])[:, :, None] + d_hidden # vocab * hidden * (batch * vocab) -> batch * hidden * 1
            d_hidden *= (1 - dec_hidden_states[t] ** 2) # Derivative of tanh

            # Gradients for input-to-hidden and hidden-to-hidden weights
            dW_ih += torch.einsum('bi,bj->ij', d_hidden[:, :, 0], target_embedded[:, t, :]) # batch * hidden * (batch, embedding) -> (hidden * embedding)
            dW_hh += torch.einsum('bi,bj->ij', d_hidden[:, :, 0], (dec_hidden_states[t - 1][:, :, 0] if t > 0 else torch.zeros_like(dec_hidden_states[0][:, :, 0])))
            # dec_hidden_state = batch * hidden so batch * hidden * (batch * hidden) -> hidden * hidden
            db_h += torch.sum(d_hidden, dim=0) # (hidden , 1)

            # Propagate gradients to previous time step
            if t > 0:  # Only propagate if we're not at the first timestep
                d_hidden = torch.einsum('ij,bj->bi', self.W_hh.T, d_hidden[:, :, 0])[:, :, None] # (hidden * hidden) * (batch * hidden) -> (batch * hidden * 1)


        # Update parameters using gradient descent
        self.W_ih -= learning_rate * dW_ih / batch_size
        self.W_hh -= learning_rate * dW_hh / batch_size
        self.W_ho -= learning_rate * dW_ho / batch_size
        self.b_h -= learning_rate * db_h / batch_size
        self.b_o -= learning_rate * db_o / batch_size

        return d_hidden

In [None]:
class EncoderDecoderRNN:

    def __init__(self, en_tokenizer, mr_tokenizer, device):
        self.en_tokenizer = en_tokenizer
        self.mr_tokenizer = mr_tokenizer
        self.vocab_size_en = len(en_tokenizer)
        self.vocab_size_mr = len(mr_tokenizer)
        self.embedding_dim = 300
        self.hidden_size = 512
        self.device = device

        self.encoder = EncoderRNN(self.vocab_size_en, self.embedding_dim, self.hidden_size, device)
        self.decoder = DecoderRNN(self.vocab_size_mr, self.embedding_dim, self.hidden_size, device)

    def train(self, train_loader, num_epochs=10):

            encoder = self.encoder
            decoder = self.decoder

            for epoch in range(num_epochs):
                total_loss = 0
                for batch in train_loader:
                    en_input_ids = batch["en_input_ids"].to(self.device)
                    mr_input_ids = batch["mr_input_ids"].to(self.device)

                    # Forward pass
                    # print("en_input_ids_shape")
                    # print(en_input_ids.shape)
                    hidden_state, enc_hidden_states = encoder.forward(en_input_ids)

                    # print("hidden state shape train")
                    # print(hidden_state.shape)

                    # print("train decoder input shape")
                    # print(mr_input_ids.shape)
                    outputs, loss, dec_hidden_states = decoder.forward(mr_input_ids, hidden_state)



                    # Backward pass and optimization
                    decoder_grad = decoder.backward(outputs, mr_input_ids, dec_hidden_states)
                    encoder.backward(decoder_grad, en_input_ids, enc_hidden_states)

                    total_loss += loss.item()

            print(f"Epoch {epoch + 1} / {num_epochs}, Loss: {total_loss / len(train_loader):.4f}")

    def predict(self, sentence, en_tokenizer, mr_tokenizer, max_length=50):
        # Tokenize the input sentence

        input_seq = torch.tensor(en_tokenizer.encode(sentence), dtype=torch.long).unsqueeze(0).to(self.device)

        print("input_seq_shape")
        print(input_seq.shape)
        print(input_seq)

        # Pass through the encoder
        hidden_state, _ = self.encoder.forward(input_seq)

        print("hidden state shape predict")
        print(hidden_state.shape)


        # Initialize the decoder input with the start token
        start_token = mr_tokenizer.word2idx['<s>']
        decoder_input = torch.tensor([[start_token]], dtype=torch.long).to(self.device)

        print("predict decoder_input shape")
        print(decoder_input.shape)

        # Initialize the output sequence
        output_seq = []

        # Generate the output sequence
        for _ in range(max_length):
            # Forward pass through the decoder
            outputs, _, dec_hidden_states = self.decoder.forward(decoder_input, hidden_state) # (batch size, seq_len, vocab) i.e. (1, 1, 689)
            hidden_state = dec_hidden_states[-1] # Get the last hidden state for the next time step

            print("outputs shape")
            print(outputs.shape)
            #print(outputs)

            # Get the predicted token (greedy decoding)
            probs = outputs[:, -1, :]  # Get probabilities for the last token in the sequence (1, 689)

            # Get the predicted token (greedy decoding)
            predicted_token = torch.argmax(probs, dim=-1)
            predicted_token = predicted_token.item() if predicted_token.numel() == 1 else predicted_token
            print("predicted token ")
            #print(predicted_token.shape)
            print(predicted_token)

            #predicted_token =  torch.argmax(probs, dim=-1).item() if probs.dim() == 1 else torch.argmax(probs, dim=-1) # This is where the problem is most likely.


            # Append the predicted token to the output sequence
            output_seq.append(predicted_token)

            # Break if the end token is generated
            if predicted_token == mr_tokenizer.word2idx['</s>']:
                break

            # Update the decoder input for the next time step
            decoder_input = torch.tensor([[predicted_token]], dtype=torch.long).to(self.device)

        # Convert the output sequence to a sentence
        predicted_sentence = mr_tokenizer.decode(output_seq, skip_special_tokens=True)

        return predicted_sentence


## Below code cells can be ignored if you load data from Excel file

In [None]:
# Import the dataset
dataset = load_dataset("anujsahani01/English-Marathi")

train_data = dataset["train"]
test_data = dataset["test"]

english_sentences = [item["english"] for item in train_data]
marathi_sentences = [item["marathi"] for item in train_data]


NameError: name 'load_dataset' is not defined

In [None]:

# Tokenize sentences
english_tokenized = [sentence.split() for sentence in english_sentences]
marathi_tokenized = [sentence.split() for sentence in marathi_sentences]

# Calculate lengths
english_lengths = [len(tokens) for tokens in english_tokenized]
marathi_lengths = [len(tokens) for tokens in marathi_tokenized]


filtered_english_sentences = []
filtered_marathi_sentences = []
max_token_len = 95
# Filter English and Marathi sentences
for i in range(len(english_sentences)):
    if len(english_tokenized[i]) <= max_token_len and len(marathi_tokenized[i]) <= max_token_len:
        filtered_english_sentences.append(english_sentences[i])
        filtered_marathi_sentences.append(marathi_sentences[i])

In [None]:
# Randomly sample 10,000 sentences
random.seed(42)  # For reproducibility
sampled_indices = random.sample(range(len(filtered_english_sentences)), 10000)
sampled_english = [filtered_english_sentences[i] for i in sampled_indices]
sampled_marathi = [filtered_marathi_sentences[i] for i in sampled_indices]

NameError: name 'filtered_english_sentences' is not defined

In [None]:
df = pd.DataFrame({'English': sampled_english, 'Marathi': sampled_marathi})

file_path = drive_path + '/filtered_sentences.xlsx'
df.to_excel(file_path, index=False)


## Load Data from Excel file

In [None]:
file_path = drive_path + '/filtered_sentences.xlsx'
df = pd.read_excel(file_path)
sampled_english = df['English'].tolist()
sampled_marathi = df['Marathi'].tolist()

In [None]:


# Split into train (8,000) and test (2,000)
train_english = sampled_english[:800]
train_marathi = sampled_marathi[:800]
test_english = sampled_english[800:1000]
test_marathi = sampled_marathi[800:1000]

# Print statistics
print(f"Total sampled sentences: {len(sampled_english)}")
print(f"Train sentences: {len(train_english)}")
print(f"Test sentences: {len(test_english)}")

Total sampled sentences: 10000
Train sentences: 800
Test sentences: 200


In [None]:
print(train_english[0:10])
print(train_marathi[0:10])

["BJP's Amrish Patel wins from Dhule-Nandurbar", 'im not a small dude.', 'Causes for depression', '"The servant answered Saul again, and said, ""Behold, I have in my hand the fourth part of a shekel of silver. I will give that to the man of God, to tell us our way."""', 'In addition, antiviral drug 3D printed mask, nanofiber coated N-95 mask, Povidone Iodine thin-film coated mask is being proposed for mass consumption.', '"""Our opponents are conspiring against us by trying to lay a foundation for caste and communal riots through international funding."', 'Today I am the most happy guy in the world.', "The movie is directed by Farhan Akhtar's sister Zoya Akhtar.", 'People whose yearly income is less than Rs 8 lakh can avail of this reservation.', 'Healthy wholesome breakfast: It is very important to have a nutritious breakfast to kick-start a hiking trip as it will keep you going.']
['विधान परिषदेच्या धुळे-नंदुरबार पोटनिवडणुकीत भाजपाचे अमरिश पटेल यांचा विजय झाला.', 'मी मामूली मुलगी नाह

In [None]:
#  Tokenization
en_tokenizer = Tokenizer()
mr_tokenizer = Tokenizer()

# Fit data
en_tokenizer.fit(train_english)
mr_tokenizer.fit(train_marathi)

In [None]:
len(mr_tokenizer)

22747

In [None]:
# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create dataset
dataset = TranslationDataset(train_english, train_marathi, en_tokenizer, mr_tokenizer)

batch_size = 32
# Create DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=lambda batch: Helper.collate_batch(batch, device), shuffle=True)

num_epochs = 10



# Create an instance of EncoderDecoderRNN
encoder_decoder = EncoderDecoderRNN(en_tokenizer, mr_tokenizer, device)

# Call the train method on the instance
encoder_decoder.train(dataloader, num_epochs)



OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 17456 has 14.74 GiB memory in use. Of the allocated memory 14.40 GiB is allocated by PyTorch, and 217.35 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
test_sentence = "Causes for depression"

translated_sentence = encoder_decoder.predict(test_sentence, en_tokenizer, mr_tokenizer)
print(f"Translated sentence: {translated_sentence}")
print(len(translated_sentence))

In [None]:
print(mr_tokenizer.idx2word)

{0: '<pad>', 1: '<unk>', 2: '<s>', 3: '</s>', 4: 'विधान', 5: 'परिषदेच्या', 6: 'धुळे-नंदुरबार', 7: 'पोटनिवडणुकीत', 8: 'भाजपाचे', 9: 'अमरिश', 10: 'पटेल', 11: 'यांचा', 12: 'विजय', 13: 'झाला.', 14: 'मी', 15: 'मामूली', 16: 'मुलगी', 17: 'नाही.', 18: 'डिप्रेशनमध्ये', 19: 'जाण्यायामागची', 20: 'कारणं', 21: 'तेव्हा', 22: 'पुन्हा', 23: 'नोकाराने', 24: 'सांगितले,', 25: '“माझ्याकडे', 26: 'थोडे', 27: 'पैसे', 28: 'आहेत', 29: 'तेच', 30: 'आपण', 31: 'त्या', 32: 'परमेश्वराच्या', 33: 'माणसाला', 34: 'देऊ.', 35: 'मग', 36: 'तो', 37: 'आपल्याला', 38: 'पुढची', 39: 'वाट', 40: 'दाखवेल.”', 41: 'फेस', 42: 'मास्क', 43: 'आणि', 44: 'नेहमीचे', 45: 'यावर', 46: 'प्रामुख्याने', 47: 'बरेचसे', 48: 'प्रस्ताव', 49: 'होते.याशिवाय', 50: 'एन्टीव्हायरल', 51: 'ड्रग', 52: '3', 53: 'डी', 54: 'प्रिंटेड', 55: 'मास्क,', 56: 'नॅनोफायबर', 57: 'कोटेड', 58: 'एन-95', 59: 'साठीही', 60: 'आले.', 61: "'आमचे", 62: 'विरोधक', 63: 'आंतरराष्\u200dट्रीय', 64: 'फंडिंगच्या', 65: 'माध्यमाने', 66: 'जात', 67: 'संप्रदायावर', 68: 'आधारीत', 69: 'दंगलींचा', 7