# Phoneme to Grapheme Conversion with a Recurrent Generative Model 
This project will discuss...

In [3]:
import torch
import torch.nn as nn
import random
import torch.optim as optim

# known phonemes/graphemes
phonemes = [
    '0',
    'ō', 
    'ē',
    'f',
    '1'
]

graphemes = [
    '0', 'a', 'b', '1'
]

# one hot encodes the word: returns an array of one hot encoded characters
def nemes_to_1_hot_seq(string, nemes="phonemes"):
    string = '0' + string + '1'
    l = phonemes if nemes == "phonemes" else graphemes
    seq = []
    for i in string:
        vec = [0] * len(l)
        vec[l.index(i)] = 1
        seq.append(vec)

    return torch.FloatTensor([seq])


In [4]:
# define model architecture
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.GRU(len(phonemes), 512, 1, batch_first=True, bidirectional=False)
        
    def forward(self, x):
        # push vector through encoder
        out, h_n = self.encoder(x)

        # return context vector
        return h_n

class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.decoder = nn.GRU(len(graphemes), 512, 1, batch_first=True, bidirectional=False)
        self.fc = nn.Linear(512, len(graphemes))

    def forward(self, input, hidden_layer):
        """
        Since this function gets called once at a time rather than taking in
        a sequence of vectors, we need to pass it the last output. This will be just
        a vector of numbers that can be converted to the embedding representing that last output
        """
        out, h_n = self.decoder(input, hidden_layer)
        # print("H")
        return self.fc(h_n), h_n

class seq2seq(nn.Module):
    def __init__(self, device):
        super().__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
        self.device = device
    
    def forward(self, in_seq, out_seq, tf_ratio=0.5):
        out_len = out_seq.shape[1]
        # storing the outputs of the sequence
        outputs = torch.zeros(out_len, 1, len(graphemes)).to(self.device)

        hidden = self.encoder(in_seq)

        out_seq = out_seq.squeeze(0)

        input = out_seq[0].unsqueeze(0).unsqueeze(0)
        
        for i in range(1, out_len):
            out, hidden = self.decoder(input, hidden)
            outputs[i] = out

            if random.random() > tf_ratio:
                # teacher forcing (make next input what the current output token should be)
                input = out_seq[i].unsqueeze(0).unsqueeze(0)
            else:
                x = input.argmax(1)[0]
                input = torch.zeros(1, 1, len(graphemes))
                input[0][0][x] = 1
                
        return outputs
        

In [22]:
"""training"""

device = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 100
model = seq2seq(device)
# what a beautiful architecture
print(model)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
loss_func = nn.CrossEntropyLoss()

in_seq = nemes_to_1_hot_seq("ōō")
out_seq = nemes_to_1_hot_seq("a", "graphemes")


model_output = model(in_seq, out_seq)
# get rid of first items because they are the start token,
# which shouldnt be included in loss
model_output = model_output[1:]
model_output = model_output.squeeze(1)
out_seq = out_seq[1:]
print(model_output)
print(model_output.shape)
loss = loss_func(model_output, torch.LongTensor([1, 3]))
loss.backward()
optimizer.step()
loss.clear()

# print(model(in_seq, out_seq))
# print(out_seq)

# loss_func(model(in_seq, out_seq).squeeze(0), torch.FloatTensor(0, 1, 1))
# dataset OBJ!

# for epoch in range(EPOCHS):
#     for (in_seq, out_seq) in dataloader():
#         outputs = model(in_seq, out_seq)
# print(x(nemes_to_1_hot_seq("ff"), nemes_to_1_hot_seq('a', "graphemes")))


seq2seq(
  (encoder): Encoder(
    (encoder): GRU(5, 512, batch_first=True)
  )
  (decoder): Decoder(
    (decoder): GRU(4, 512, batch_first=True)
    (fc): Linear(in_features=512, out_features=4, bias=True)
  )
)
tensor([[-0.0344, -0.0213, -0.0112,  0.0098],
        [-0.0370, -0.0186, -0.0226, -0.0061]], grad_fn=<SqueezeBackward1>)
torch.Size([2, 4])


tensor(1.3824, grad_fn=<NllLossBackward0>)

In [None]:
def predict_graphemes(phonemes):
    