In [1]:
import torch.nn as nn
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv(r"c:\Users\dhruv\Downloads\100_Unique_QA_Dataset.csv")

data.head(5)

Unnamed: 0,question,answer
0,What is the capital of France?,Paris
1,What is the capital of Germany?,Berlin
2,Who wrote 'To Kill a Mockingbird'?,Harper-Lee
3,What is the largest planet in our solar system?,Jupiter
4,What is the boiling point of water in Celsius?,100


In [3]:
## tokenizer 
def tokenizer(text):
    text = text.lower()
    text = text.replace("?","")
    text = text.replace("'","")
    text = text.replace(".","")
    return text.split()

In [4]:
tokenizer("i am dhruv.")

['i', 'am', 'dhruv']

In [5]:
tokenizer("What is the capital of France?")

['what', 'is', 'the', 'capital', 'of', 'france']

In [6]:
vocab = {"<UNK>":0}
len(vocab)

1

In [7]:
def build_vocab(row):
    # print(row["question"] ,"|", row["answer"])
    tokenized_question = tokenizer(row["question"])
    tokenized_answer = tokenizer(row["answer"])

    merged_tokens = tokenized_question + tokenized_answer

    for tok in merged_tokens:
        if tok not in vocab.keys():
            vocab[tok] = len(vocab)

print(vocab)

{'<UNK>': 0}


In [8]:
data.apply(build_vocab, axis=1)

0     None
1     None
2     None
3     None
4     None
      ... 
85    None
86    None
87    None
88    None
89    None
Length: 90, dtype: object

In [9]:
print(vocab)

{'<UNK>': 0, 'what': 1, 'is': 2, 'the': 3, 'capital': 4, 'of': 5, 'france': 6, 'paris': 7, 'germany': 8, 'berlin': 9, 'who': 10, 'wrote': 11, 'to': 12, 'kill': 13, 'a': 14, 'mockingbird': 15, 'harper-lee': 16, 'largest': 17, 'planet': 18, 'in': 19, 'our': 20, 'solar': 21, 'system': 22, 'jupiter': 23, 'boiling': 24, 'point': 25, 'water': 26, 'celsius': 27, '100': 28, 'painted': 29, 'mona': 30, 'lisa': 31, 'leonardo-da-vinci': 32, 'square': 33, 'root': 34, '64': 35, '8': 36, 'chemical': 37, 'symbol': 38, 'for': 39, 'gold': 40, 'au': 41, 'which': 42, 'year': 43, 'did': 44, 'world': 45, 'war': 46, 'ii': 47, 'end': 48, '1945': 49, 'longest': 50, 'river': 51, 'nile': 52, 'japan': 53, 'tokyo': 54, 'developed': 55, 'theory': 56, 'relativity': 57, 'albert-einstein': 58, 'freezing': 59, 'fahrenheit': 60, '32': 61, 'known': 62, 'as': 63, 'red': 64, 'mars': 65, 'author': 66, '1984': 67, 'george-orwell': 68, 'currency': 69, 'united': 70, 'kingdom': 71, 'pound': 72, 'india': 73, 'delhi': 74, 'discov

In [10]:
len(vocab)

324

In [94]:
def text_to_indices(text, vocab= vocab):
    indexed_text = []

    for token in tokenizer(text):

        if token in vocab.keys():
            indexed_text.append(vocab[token])
        else:
            indexed_text.append(0)

    return indexed_text

In [12]:
text_to_indices("Hello Bro", vocab)

[0, 0]

In [13]:
text_to_indices("what are you doing?", vocab)

[1, 81, 0, 0]

In [14]:
data

Unnamed: 0,question,answer
0,What is the capital of France?,Paris
1,What is the capital of Germany?,Berlin
2,Who wrote 'To Kill a Mockingbird'?,Harper-Lee
3,What is the largest planet in our solar system?,Jupiter
4,What is the boiling point of water in Celsius?,100
...,...,...
85,Who directed the movie 'Titanic'?,JamesCameron
86,Which superhero is also known as the Dark Knight?,Batman
87,What is the capital of Brazil?,Brasilia
88,Which fruit is known as the king of fruits?,Mango


In [15]:
import torch 
from torch.utils.data import Dataset, DataLoader

In [16]:
class QADataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        numerical_question = text_to_indices(self.data.iloc[index]["question"], self.vocab)

        numerical_answer = text_to_indices(self.data.iloc[index]["answer"], self.vocab)

        return torch.tensor(numerical_question), torch.tensor(numerical_answer)

In [17]:
dataset = QADataset(data, vocab)

In [18]:
dataset[0]

(tensor([1, 2, 3, 4, 5, 6]), tensor([7]))

In [19]:
dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)

dataloader

<torch.utils.data.dataloader.DataLoader at 0x21f2d1a0700>

In [20]:
for question, answer in dataloader:
    print(question, answer)

tensor([[ 42, 174,   2,  62,  39, 175, 176,  12, 177, 178]]) tensor([[179]])
tensor([[ 42, 137,   2, 226,  12,   3, 227, 228]]) tensor([[155]])
tensor([[ 10,  11, 189, 158, 190]]) tensor([[191]])
tensor([[  1,   2,   3,   4,   5, 113]]) tensor([[114]])
tensor([[42, 18,  2, 62, 63,  3, 64, 18]]) tensor([[65]])
tensor([[ 78,  79, 195,  81,  19,   3, 196, 197, 198]]) tensor([[199]])
tensor([[10, 29,  3, 30, 31]]) tensor([[32]])
tensor([[  1,   2,   3, 212,   5,  14, 213, 214]]) tensor([[215]])
tensor([[ 10,  75,   3, 296,  19, 297]]) tensor([[298]])
tensor([[ 42, 216, 118, 217, 218,  19,  14, 219,  43]]) tensor([[220]])
tensor([[ 42, 250, 251, 118, 252, 253]]) tensor([[254]])
tensor([[ 42, 200,   2,  14, 201, 202, 203, 204]]) tensor([[205]])
tensor([[1, 2, 3, 4, 5, 6]]) tensor([[7]])
tensor([[  1,   2,   3, 234,   5, 235]]) tensor([[131]])
tensor([[  1,   2,   3, 146,  86,  19, 192, 193]]) tensor([[194]])
tensor([[10, 55,  3, 56,  5, 57]]) tensor([[58]])
tensor([[ 42, 101,   2,   3,  17]]

In [21]:
### RNN Architecture 

In [22]:
len(vocab)

324

In [23]:
import torch.nn as nn

In [25]:
class RNN(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()

        self.embed = nn.Embedding(vocab_size, 50)
        self.rnn = nn.RNN(50,64)
        self.fc = nn.Linear(64, vocab_size)
    
    def forward(self):
        pass

In [26]:
dataset[0]

(tensor([1, 2, 3, 4, 5, 6]), tensor([7]))

In [27]:
x = nn.Embedding(324, 50)

In [28]:
x(dataset[0][0])

tensor([[ 0.0621, -0.7427,  0.2121,  0.8294,  0.3969, -0.7774,  0.6204,  1.2912,
          0.0299, -0.9470, -1.0739, -0.8856,  0.1457,  0.5873, -1.3774,  0.3687,
         -0.5773, -0.7195, -0.4101, -0.4019, -1.5068,  0.5913, -0.2260,  0.5965,
          1.4326, -0.0757, -0.0316,  0.7341,  1.5086, -0.1661,  1.4666, -1.2047,
          0.7110,  0.7187, -1.3652, -0.8472,  2.1517, -2.8920,  0.7809,  0.1302,
         -0.0712, -1.2409,  0.0439, -0.1625,  0.4222,  0.7002, -0.6309,  0.1296,
         -1.5759, -0.3125],
        [-0.8526,  0.0535,  2.3013, -0.9323, -1.5039,  0.1026, -0.2821,  0.1111,
         -0.3175, -0.0659, -1.3438, -0.6470, -1.0736, -0.9502, -0.1284, -1.8788,
          0.8402, -0.7376,  2.0433,  0.8176,  1.1893, -2.7850,  0.9778, -0.3563,
         -0.1651, -0.2396, -0.2400, -0.2247,  0.8292, -2.3022, -0.1794, -1.1799,
         -0.2787,  0.5147, -0.0519, -0.3777, -0.1250, -1.7501, -0.1389,  0.5590,
          1.8613, -0.1705,  0.1126,  1.0943, -1.7425, -0.4176,  0.9169, -0.0896,


In [29]:
x(dataset[0][0]).shape

torch.Size([6, 50])

In [31]:
x(dataset[0][0]).shape

torch.Size([6, 50])

In [32]:
x(dataset[17][0]).shape

torch.Size([3, 50])

In [33]:
dataset[17][0]

tensor([10, 75, 76])

In [34]:
a = x(dataset[0][0])
a

tensor([[ 0.0621, -0.7427,  0.2121,  0.8294,  0.3969, -0.7774,  0.6204,  1.2912,
          0.0299, -0.9470, -1.0739, -0.8856,  0.1457,  0.5873, -1.3774,  0.3687,
         -0.5773, -0.7195, -0.4101, -0.4019, -1.5068,  0.5913, -0.2260,  0.5965,
          1.4326, -0.0757, -0.0316,  0.7341,  1.5086, -0.1661,  1.4666, -1.2047,
          0.7110,  0.7187, -1.3652, -0.8472,  2.1517, -2.8920,  0.7809,  0.1302,
         -0.0712, -1.2409,  0.0439, -0.1625,  0.4222,  0.7002, -0.6309,  0.1296,
         -1.5759, -0.3125],
        [-0.8526,  0.0535,  2.3013, -0.9323, -1.5039,  0.1026, -0.2821,  0.1111,
         -0.3175, -0.0659, -1.3438, -0.6470, -1.0736, -0.9502, -0.1284, -1.8788,
          0.8402, -0.7376,  2.0433,  0.8176,  1.1893, -2.7850,  0.9778, -0.3563,
         -0.1651, -0.2396, -0.2400, -0.2247,  0.8292, -2.3022, -0.1794, -1.1799,
         -0.2787,  0.5147, -0.0519, -0.3777, -0.1250, -1.7501, -0.1389,  0.5590,
          1.8613, -0.1705,  0.1126,  1.0943, -1.7425, -0.4176,  0.9169, -0.0896,


In [35]:
a.shape

torch.Size([6, 50])

In [36]:
y = nn.RNN(50,64)

In [37]:
b = y(a)
b

(tensor([[-0.7582,  0.0106,  0.1175,  0.0665,  0.5176, -0.2352,  0.7940,  0.1266,
          -0.1372,  0.5551, -0.0764, -0.6043,  0.3012, -0.2871, -0.3280, -0.7240,
          -0.0642, -0.5721,  0.1801,  0.1043,  0.0148,  0.3511,  0.0631,  0.0646,
          -0.4578, -0.5183,  0.4811, -0.1907, -0.5849,  0.3604,  0.1271, -0.5093,
          -0.5934,  0.0985, -0.6398, -0.6216, -0.1749,  0.1441, -0.2551, -0.5052,
           0.5427, -0.3812,  0.0859,  0.2571, -0.2822, -0.0867,  0.3776, -0.6227,
           0.3754, -0.5618, -0.1449,  0.4801,  0.1361,  0.7683,  0.7132,  0.1830,
          -0.4308, -0.4199,  0.3710, -0.2537,  0.3576,  0.3167,  0.4798, -0.4393],
         [ 0.5783, -0.7445, -0.1654, -0.0263,  0.0071, -0.1196, -0.6305,  0.2348,
           0.7108,  0.7858, -0.6345, -0.3477, -0.3519, -0.2200,  0.8256, -0.1633,
           0.5795,  0.1401, -0.1655,  0.2812,  0.6416, -0.4351,  0.5532, -0.7994,
          -0.0186, -0.5728, -0.5099, -0.0507,  0.0487,  0.8425,  0.7070, -0.3187,
          -0.82

In [39]:
len(b)

2

In [41]:
b[0]  ## this are the hidden states

tensor([[-0.7582,  0.0106,  0.1175,  0.0665,  0.5176, -0.2352,  0.7940,  0.1266,
         -0.1372,  0.5551, -0.0764, -0.6043,  0.3012, -0.2871, -0.3280, -0.7240,
         -0.0642, -0.5721,  0.1801,  0.1043,  0.0148,  0.3511,  0.0631,  0.0646,
         -0.4578, -0.5183,  0.4811, -0.1907, -0.5849,  0.3604,  0.1271, -0.5093,
         -0.5934,  0.0985, -0.6398, -0.6216, -0.1749,  0.1441, -0.2551, -0.5052,
          0.5427, -0.3812,  0.0859,  0.2571, -0.2822, -0.0867,  0.3776, -0.6227,
          0.3754, -0.5618, -0.1449,  0.4801,  0.1361,  0.7683,  0.7132,  0.1830,
         -0.4308, -0.4199,  0.3710, -0.2537,  0.3576,  0.3167,  0.4798, -0.4393],
        [ 0.5783, -0.7445, -0.1654, -0.0263,  0.0071, -0.1196, -0.6305,  0.2348,
          0.7108,  0.7858, -0.6345, -0.3477, -0.3519, -0.2200,  0.8256, -0.1633,
          0.5795,  0.1401, -0.1655,  0.2812,  0.6416, -0.4351,  0.5532, -0.7994,
         -0.0186, -0.5728, -0.5099, -0.0507,  0.0487,  0.8425,  0.7070, -0.3187,
         -0.8239,  0.0039, 

In [44]:
b[1] ## this is the hidden state of the last time step

tensor([[-0.0931,  0.3695, -0.0796, -0.4695,  0.6362, -0.7905, -0.8241, -0.1165,
          0.7327,  0.0240, -0.5103,  0.6462,  0.3690, -0.0809, -0.1801, -0.2828,
          0.2258, -0.1833,  0.0947,  0.7829,  0.3282, -0.0721,  0.1956, -0.5023,
          0.3788,  0.8920,  0.8579,  0.0197,  0.2646, -0.6914, -0.5154,  0.4494,
          0.2058,  0.2976, -0.7672, -0.2452,  0.3046,  0.6955,  0.5416,  0.0878,
          0.8952,  0.0897,  0.3136,  0.0248, -0.4917, -0.4041, -0.0407, -0.4497,
          0.7078,  0.3850,  0.0647, -0.6471, -0.3927, -0.6475,  0.0171,  0.4565,
          0.3090, -0.0071,  0.3795,  0.0255,  0.3940,  0.2355, -0.7120, -0.4200]],
       grad_fn=<SqueezeBackward1>)

In [43]:
b[0].shape, b[1].shape

(torch.Size([6, 64]), torch.Size([1, 64]))

In [46]:
a.shape

torch.Size([6, 50])

In [47]:
## in rnn we pass the input in form of time steps one by one
## so we have first we have 6 words and we convert each word into 50 dim vector
## so we have 6*50 dim vector
## now we pass this to rnn and get the output and we get both hidden states and the final output states
# after passing through the rnn we get the output of 1*64 dim vector
## now we pass this to the linear layer and get the output of vocab size

In [48]:
## so we need to pass the hidden state of the last time step to the fully connected layer
## this will be the input to the fully connected layer

In [49]:
z = nn.Linear(64, 324)
z

Linear(in_features=64, out_features=324, bias=True)

In [50]:
out = z(b[1])
out

tensor([[-7.1781e-02, -1.8217e-02,  3.4726e-01,  1.0153e-01,  7.3660e-03,
         -1.2130e-02, -3.2297e-01, -2.0542e-01, -4.1461e-01, -1.2326e-01,
          8.4545e-02, -3.4144e-02, -2.4872e-01, -2.1583e-01, -2.2227e-01,
          2.7429e-01, -6.3701e-01, -1.1361e-01,  5.2409e-02,  5.2254e-01,
          2.7719e-01,  5.6149e-02,  3.6041e-01, -2.1452e-01,  1.3011e-01,
          2.1814e-01,  2.6714e-01, -8.0143e-03, -9.0027e-02,  1.9925e-02,
          1.8504e-01, -7.1738e-01,  4.8499e-02, -6.3305e-01,  1.9920e-01,
         -5.1527e-01, -1.8398e-01,  3.4664e-01, -4.3962e-01,  1.6305e-01,
          3.7175e-04,  1.5142e-01, -8.0838e-02,  2.9616e-01, -1.5816e-01,
          3.8022e-01,  5.3197e-01, -1.0072e-01, -7.6375e-02,  4.7947e-01,
          9.8064e-02, -3.8944e-02,  1.2730e-01, -4.7630e-01,  8.2522e-02,
          4.9854e-02, -3.4218e-02,  9.8782e-02, -2.6045e-01,  4.9358e-01,
          4.1057e-02, -2.8656e-01, -5.2371e-02, -3.9260e-01, -7.8172e-02,
          9.5577e-02, -2.6691e-01,  3.

In [56]:
b[1].shape

torch.Size([1, 64])

In [62]:
b[1]

tensor([[-0.0931,  0.3695, -0.0796, -0.4695,  0.6362, -0.7905, -0.8241, -0.1165,
          0.7327,  0.0240, -0.5103,  0.6462,  0.3690, -0.0809, -0.1801, -0.2828,
          0.2258, -0.1833,  0.0947,  0.7829,  0.3282, -0.0721,  0.1956, -0.5023,
          0.3788,  0.8920,  0.8579,  0.0197,  0.2646, -0.6914, -0.5154,  0.4494,
          0.2058,  0.2976, -0.7672, -0.2452,  0.3046,  0.6955,  0.5416,  0.0878,
          0.8952,  0.0897,  0.3136,  0.0248, -0.4917, -0.4041, -0.0407, -0.4497,
          0.7078,  0.3850,  0.0647, -0.6471, -0.3927, -0.6475,  0.0171,  0.4565,
          0.3090, -0.0071,  0.3795,  0.0255,  0.3940,  0.2355, -0.7120, -0.4200]],
       grad_fn=<SqueezeBackward1>)

In [64]:
b[1][-1].shape

torch.Size([64])

In [51]:
out.shape

torch.Size([1, 324])

In [55]:
torch.max(out, 1)

torch.return_types.max(
values=tensor([0.6947], grad_fn=<MaxBackward0>),
indices=tensor([131]))

In [84]:
class RNN(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()

        self.embed = nn.Embedding(vocab_size, 50)
        self.rnn = nn.RNN(50,64, batch_first=True)
        self.fc = nn.Linear(64, vocab_size)
    
    def forward(self, question):
        embedding = self.embed(question)
        hidden, final = self.rnn(embedding)
        output = self.fc(final[-1])
        return output

In [85]:
learning_rate = 0.01
epochs = 20
learning_rate, epochs

(0.01, 20)

In [86]:
model = RNN(len(vocab))
model

RNN(
  (embed): Embedding(324, 50)
  (rnn): RNN(50, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=324, bias=True)
)

In [87]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [93]:
## training loop

for epoch in range(epochs):
    total_loss = 0

    for question, answer in dataloader:

        optimizer.zero_grad()

        output = model(question)
        loss = criterion(output, answer[-1])

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"epoch : {epoch+1} | loss : {total_loss/len(dataloader)}")


epoch : 1 | loss : 0.4186107061834049
epoch : 2 | loss : 0.4532295361126248
epoch : 3 | loss : 0.37396684722111273
epoch : 4 | loss : 0.38722448030598794
epoch : 5 | loss : 0.22695530112110715
epoch : 6 | loss : 0.17251554012118347
epoch : 7 | loss : 0.13331403512997106
epoch : 8 | loss : 0.12419762150230883
epoch : 9 | loss : 0.1835422975390505
epoch : 10 | loss : 0.1136810540152952
epoch : 11 | loss : 0.25454814779380713
epoch : 12 | loss : 0.1500426088045957
epoch : 13 | loss : 0.2538717197177713
epoch : 14 | loss : 0.17345311959942364
epoch : 15 | loss : 0.1860178290735866
epoch : 16 | loss : 0.11274468630163408
epoch : 17 | loss : 0.12703109990293468
epoch : 18 | loss : 0.07740500083820431
epoch : 19 | loss : 0.14105142513154886
epoch : 20 | loss : 0.08710948849515414


In [97]:
## testing the model

def predict(model, question, threshold=0.5):
    model.eval()
    numerical_question = text_to_indices(question)
    print(numerical_question)

In [98]:
predict(model, "i am dhruv panchal")

[0, 0, 0, 0]


In [99]:
predict(model, "what are you doing?")

[1, 81, 0, 0]


In [111]:
## testing the model

def predict(model, question, threshold=0.5):
    model.eval()
    numerical_question = text_to_indices(question)
    question_tensor = torch.tensor(numerical_question).unsqueeze(0)
    # print(question_tensor.shape)
    pre = model(question_tensor)
    # print(pre.shape)
    output = torch.nn.functional.softmax(pre, dim=1)
    # print(output.shape)
    # print(output)
    value, index = torch.max(output, 1)
    # print(value)
    # print(value.shape)
    # print(index)
    # print(index.shape)
    if value < threshold:
        print("sorry i dont know")
    else:
        print(list(vocab.keys())[index])

In [112]:
predict(model, "i am dhruv panchal")

sorry i dont know


In [113]:
predict(model, "where are you from?")

sorry i dont know


In [114]:
predict(model, "what is the capital of france?")

paris


In [116]:
predict(model, "who created you?")

6


In [117]:
predict(model, "who needs you?")

6


In [119]:
predict(model, "you are insane")

8
