**MODEL OUTPUT**

Just run this script it will download all dependencies 👇

In [10]:
!gdown "https://drive.google.com/uc?id=1J78MUhgrvq720oQkCEkIlxo9becbG0BY"
!gdown "https://drive.google.com/uc?id=13TeQjXINKQvFR_1sj6OT2j63OtwJXwzn"
!gdown "https://drive.google.com/uc?id=1tebdUijCsq8z7r5bo3_Hoq5jAM0fotzR"

Downloading...
From: https://drive.google.com/uc?id=1J78MUhgrvq720oQkCEkIlxo9becbG0BY
To: /content/vocab.pkl
100% 38.2k/38.2k [00:00<00:00, 21.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=13TeQjXINKQvFR_1sj6OT2j63OtwJXwzn
To: /content/trained_model.pkl
100% 856k/856k [00:00<00:00, 82.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1tebdUijCsq8z7r5bo3_Hoq5jAM0fotzR
To: /content/inv_vocab.pkl
100% 38.3k/38.3k [00:00<00:00, 49.4MB/s]


In [11]:
import torch                        # use to do tensor computation fast
import torch.nn as nn               # nn pytorch dedicated library for neural networks
import torch.nn.functional as F

In [12]:


class TinyNextWordModel(nn.Module):
    def __init__(self, vocab_size=2000, embed_dim=32, hidden_dim=64):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.gru = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        x = self.embed(x)
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])
        return out

In [13]:

def simple_tokenizer(text):
    return text.lower().split()


In [14]:
vocab = torch.load("vocab.pkl")
inv_vocab = torch.load("inv_vocab.pkl")
vocab_size = min(len(vocab), 2000)
model99 = torch.load("trained_model.pkl", weights_only=False)

this is the main code 👇

In [15]:
def predict(model, text_input, seq_len=20, top_k=5):
    model.eval()
    tokens = simple_tokenizer(text_input)  # same tokenizer as training
    encoded = [vocab.get(t, 0) for t in tokens[-seq_len:]]
    if len(encoded) < seq_len:
        encoded = [0]*(seq_len - len(encoded)) + encoded
    x = torch.tensor(encoded).unsqueeze(0)

    with torch.no_grad():
        out = model(x)
        probs = F.softmax(out, dim=1)
        top_probs, top_idxs = probs.topk(top_k, dim=1)

    results = [(inv_vocab.get(idx.item(), "unknown"), prob.item())
               for idx, prob in zip(top_idxs[0], top_probs[0])]
    return results

this is the driving code that call predict function 👇

as these is biased towards space theme i reccomendes to use question like:



solar ?


pluto is a ?


black ?


( ofc dont use question mark)

In [17]:
ques = " sun is very "
preds = predict(model99, ques, top_k=5)
for word, prob in preds:
    print(word,"             ", f"{prob*100:.2f}","%  probability")


<unk>               62.54 %  probability
working               12.36 %  probability
large               9.46 %  probability
place               3.13 %  probability
visible               2.10 %  probability
