In [1]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# function is used to onehot encode a number
# number (int) refers to the number to be endcoded
# total (int) is the numebr out of which we are encoding number
# returns onehot encoded 1D tensor with [total] amount of zeros, with a 1 in the place of [number]
def onehot(number, total):
    # fill the encoded tensor with [total amount of zeros]
    encoded = torch.zeros(total)
    # place a 1 on [number]
    encoded[number] = 1
    # return
    return encoded

In [3]:
# this function is used to onehot encode a sentence / text
# text (str) refers to the text that will be encoded
# returns onehot encoded values of the text based of off the encoder_words.txt file
def encode_text(text):
    # split the text to be encoded into words (split every space)
    text = text.split()
    # define the tensor that we will be adding the encoded values to
    encoded = torch.tensor([], dtype=torch.float32)
    
    # open the encoder_words.txt file
    with open("encoder_words.txt") as encoder_words:
        # extract the text from the file, and lower all of it, split it into words (split every \n)
        encoder_words = encoder_words.read().lower().split("\n")
        
        # loop over the words that should be encoded
        for x in text:
            # command explanation:
            # 1: get the index of the word that we are currently trying to encode from the encoder words list
            # 2: onehot encode the current word [number] = index of thee word, [total] = length of all words from vocabulary
            # 3: append the encoded tensor to the [encoded] collection variable
            # 4: if there is no such word in the encoder_words list, we append all zeros
            encoded = torch.cat((encoded, onehot([i for i, s in enumerate(encoder_words) if s == x], len(encoder_words))))
    
    # return
    return encoded

In [5]:
encode_text("one two three asdasdasdasd four five")

tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0.])