In [1]:
import torch
import torch.nn as nn
import random
import torch.optim as optim

# known phonemes/graphemes
phonemes = [
    '0',
    'ō', 
    'ē',
    'f',
    '1'
]

graphemes = [
    '0', 'a', 'b', '1'
]

# one hot encodes the word: returns an array of one hot encoded characters
def nemes_to_1_hot_seq(string, nemes="phonemes"):
    string = '0' + string + '1'
    l = phonemes if nemes == "phonemes" else graphemes
    seq = []
    for i in string:
        vec = [0] * len(l)
        vec[l.index(i)] = 1
        seq.append(vec)

    return torch.FloatTensor([seq])


In [None]:
# define model architecture
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.GRU(len(phonemes), 512, 1, batch_first=True, bidirectional=False)
        
    def forward(self, x):
        # push vector through encoder
        out, h_n = self.encoder(x)

        # return context vector
        return h_n

class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.decoder = nn.GRU(len(graphemes), 512, 1, batch_first=True, bidirectional=False)
        self.fc = nn.Linear(512, len(graphemes))

    def forward(self, input, hidden_layer):
        """
        Since this function gets called once at a time rather than taking in
        a sequence of vectors, we need to pass it the last output. This will be just
        a vector of numbers that can be converted to the embedding representing that last output
        """
        out, h_n = self.decoder(input, hidden_layer)
        # print("H")
        return self.fc(h_n), h_n

class seq2seq(nn.Module):
    def __init__(self, device):
        super().__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
        self.device = device
    
    def forward(self, in_seq, out_seq, tf_ratio=0.5):
        out_len = out_seq.shape[1]
        # storing the outputs of the sequence
        outputs = torch.zeros(out_len, 1, len(graphemes)).to(self.device)

        hidden = self.encoder(in_seq)

        out_seq = out_seq.squeeze(0)

        input = out_seq[0].unsqueeze(0).unsqueeze(0)
        
        for i in range(1, out_len):
            out, hidden = self.decoder(input, hidden)
            outputs[i] = out

            if random.random() > tf_ratio:
                # teacher forcing (make next input what the current output token should be)
                input = out_seq[i].unsqueeze(0).unsqueeze(0)
            else:
                x = input.argmax(1)[0]
                input = torch.zeros(1, 1, len(graphemes))
                input[0][0][x] = 1
                
        return outputs
        

In [131]:
"""training"""

device = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 100
model = seq2seq(device)
# what a beautiful architecture
print(model)
optimizer = optim.Adam(model.parameters())
loss_func = nn.CrossEntropyLoss()

# dataset OBJ!

def train(epochs, )
# print(x(nemes_to_1_hot_seq("ff"), nemes_to_1_hot_seq('a', "graphemes")))


seq2seq(
  (encoder): Encoder(
    (encoder): GRU(5, 512, batch_first=True)
  )
  (decoder): Decoder(
    (decoder): GRU(4, 512, batch_first=True)
    (fc): Linear(in_features=512, out_features=4, bias=True)
  )
)


In [2]:
encoder = Encoder()
decoder = Decoder()
in_seq = nemes_to_1_hot_seq("ff")
out_seq = nemes_to_1_hot_seq('a', "graphemes")

tf_ratio=0.5

out_len = out_seq.shape[0]
# for storing
outputs = torch.zeros(out_len, 1, len(graphemes))

hidden = encoder(in_seq)
input = out_seq[0, :]


In [136]:
input

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.]])

In [3]:
def phonemes_to_1_hot_seq(string):
    seq = []
    for i in string:
        vec = [0] *  len(phonemes)
        vec[phonemes.index(i)] = 1
        seq.append(vec)

    return torch.FloatTensor([seq])

vec = phonemes_to_1_hot_seq("ōō")

e = Encoder ()
d = Decoder ()

hidden = e(vec)

previous_output = torch.zeros(1, 1, len(graphemes))

previous_output, hidden = d(previous_output, hidden)

# previous_output = previous_output.argmax(1) 

# decoder = nn.GRU(len(graphemes), 512, 1, batch_first=True, bidirectional=False)

# output, hidden = decoder(previous_output, hidden)

# fc = nn.Linear(512, len(graphemes))

# fc(hidden)
# d(e(vec))


In [28]:
# previous_output.shape
previous_output, hidden = d(previous_output, hidden)
previous_output
# previous_output, hidden = d(previous_output, hidden)
# previous_output.argmax(3) 
# previous_output = previous_output.argmax(1) 
# print(previous_output)
# previous_output, hidden = decoder(previous_output, hidden)


tensor([[[-0.0457,  0.0219,  0.0179, -0.0148]]], grad_fn=<AddBackward0>)

In [4]:
hidden = e(vec)

previous_output = torch.zeros(1, 2, len(graphemes))
# previous_output.shape
# hidden.shape
# d(previous_output, hidden)

decoder = nn.GRU(len(graphemes), 512, 1, batch_first=True, bidirectional=False)

decoder(previous_output, hidden)

(tensor([[[ 0.0233,  0.0011, -0.0088,  ...,  0.0153,  0.0336, -0.0280],
          [ 0.0108,  0.0076, -0.0168,  ...,  0.0231,  0.0282, -0.0414]]],
        grad_fn=<TransposeBackward1>),
 tensor([[[ 1.0777e-02,  7.6281e-03, -1.6848e-02,  4.1977e-03, -1.2871e-02,
            3.1418e-03,  2.1550e-02, -5.7571e-03, -3.0954e-02,  1.0037e-02,
            2.6939e-02,  9.0874e-03,  1.7136e-02,  2.6673e-02, -1.0385e-02,
           -1.8989e-02,  3.2243e-02, -1.9373e-02,  8.1069e-03, -1.0501e-02,
           -4.1139e-04,  1.3408e-02, -6.0382e-03,  3.1077e-02, -5.5523e-03,
            2.1457e-02,  1.1449e-02,  4.0108e-02,  3.2752e-02,  1.7254e-02,
            1.0245e-02,  4.0089e-03,  3.5461e-02,  2.5902e-03,  2.3008e-02,
            7.3063e-03, -1.6150e-03, -1.1977e-02, -3.3592e-02, -1.1931e-02,
           -2.9569e-02, -4.2114e-02,  1.3337e-02, -6.8814e-03, -3.8481e-02,
            1.5909e-03, -3.7625e-03,  1.6526e-02, -1.2831e-02,  9.7519e-03,
           -8.3313e-03, -1.1014e-02,  5.0301e-04, -1.12

In [11]:
string = 'ē'
seq = []
for i in string:
    vec = [0] *  len(phonemes)
    vec[phonemes.index(i)] = 1
    seq.append(vec)
[seq]
torch.FloatTensor([seq])

tensor([[[0., 1.]]])