<a href="https://colab.research.google.com/github/smargetic/Natural_Language_Processing/blob/main/Machine_Translation/Neural_Net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#pytorch
import torch
import torch.nn as nn

In [None]:
class Embedding_Node(nn.Module):
  def __init__(self, vocab_size, embedding_dim):
    super(Embedding_Node, self).__init__()

    self.W_e = nn.Parameter(torch.randn(vocab_size, embedding_dim))

  def forward(self, x):
    return self.W_e[x]

In [None]:
class RNN_Node(nn.Module):
  def __init__(self, hidden_dim):
    super(RNN_Node, self).__init__()

    #all parameters that go into the rnn node
    self.layer_weight = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.hidden_weight = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.bias = nn.Parameter(torch.zeros(hidden_dim))


  #how it's connected to the next rnn node
  def forward(self, input, prev_hidden):
    hidden_node = torch.sigmoid(torch.dot(self.hidden_weight, prev_hidden)+
                                     torch.dot(self.layer_weight, input)+
                                     self.bias)


    return hidden_node


In [None]:
class LSTM_Node(nn.Module):
  def __init__(self, hidden_dim):
    super(LSTM_Node, self).__init__()

    #forget weight / prev layer forget weight / forget bias
    self.W_f = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.U_f = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.b_f = nn.Parameter(torch.zeros(hidden_dim))

    #input weight / prev layer input weight / input bias
    self.W_i = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.U_i = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.b_i = nn.Parameter(torch.zeros(hidden_dim))

    #output weight / prev layer output weight / output bis
    self.W_o = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.U_o = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.b_o = nn.Parameter(torch.zeros(hidden_dim))

    #cell weight / prev layer cell weight / cell bias
    self.W_c = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.U_c = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
    self.b_c = nn.Parameter(torch.zeros(hidden_dim))

    # #store h_t and c_t
    # self.h_t = None
    # self.c_t = None

  def forward(self, input, prev_hidden, prev_cell):
    #forget
    f_t = torch.sigmoid(torch.dot(self.W_f, prev_hidden)+
                                     torch.dot(self.U_f, input)+
                                     self.b_f)
    #input
    i_t = torch.sigmoid(torch.dot(self.W_i, prev_hidden)+
                                     torch.dot(self.U_i, input)+
                                     self.b_i)
    #output
    o_t = torch.sigmoid(torch.dot(self.W_o, prev_hidden)+
                                     torch.dot(self.U_o, input)+
                                     self.b_o)

    #new cell data
    nc_t = torch.tanh(torch.dot(self.W_c, prev_hidden)+
                                     torch.dot(self.U_c, input)+
                                     self.b_c)

    #cell
    c_t = f_t*prev_cell + i_t*nc_t

    #hidden
    h_t = o_t*torch.tanh(c_t)

    # #store vals
    # self.h_t = h_t
    # self.c_t = c_t

    return h_t, c_t


  # def get_hidden(self):
  #   return self.h_t

  # def get_cell(self):
  #   return self.c_t




In [None]:
#for this, we basically said that what should be in the __init__ function should be any parameters that could be learned

#ok, so for node elements, we'll say that in the __init__ funciton we should have the parameters whos weight should be learned
#but for nn layers, well have things that define the layers/ hyperparameters

#__init__ layer = anything that needs to be consistently referenced in forward pass (especially parameters that need to be updated)

In [None]:
class EncoderNN(nn.Module):
  def __init__(self, node_type, n_layers, hidden_dim, vocab_size):
    super(EncoderNN, self).__init__()

    #model parameters
    self.n_layers = n_layers
    self.node_type = node_type

    #define embedding node
    self.embedding_node = Embedding_Node(vocab_size, hidden_dim) # I guess this is one node?

    #create layer of nodes based on type
    if(node_type=="rnn"):
      self.nodes = nn.ModuleList([RNN_Node(hidden_dim) for i in range(0,n_layers)])
    else:
      self.nodes = nn.ModuleList([LSTM_Node(hidden_dim) for i in range(0,n_layers)])

    #store initial hidden states
    self.hidden = torch.randn(n_layers, hidden_dim)
    self.cell = torch.randn(n_layers, hidden_dim) #only really applicable for lstm



  def forward(self, inputs):

    #get initial ndoes
    node_list = self.nodes

    #take initial hidden states - 1 for each layer
    hidden_list = self.hidden
    cell_list = self.cell

    for word in inputs:
      #input is initial embedding
      input = self.embedding_node.forward(word)

      for i in range(0,self.n_layers):
        #if not first word, then previous output
        if i!=0:
          input = hidden_list[i-1]

        #forward nodes
        if(self.node_type=="rnn"):
          hidden_list[i] = node_list[i].forward(input, hidden_list[i])
        else:
          self.hidden_list[i], cell_list[i] = node_list[i].forward(input, hidden_list[i], cell_list[i])

    outputs = hidden_list if self.node_type=="rnn" else (hidden_list, cell_list)

    return outputs

In [None]:
class DecoderNN(nn.Module):
  def __init__(self, node_type, n_layers, hidden_dim, vocab_size):
    super(DecoderNN, self).__init__()

    #model parameters
    self.n_layers = n_layers
    self.node_type = node_type

    #define embedding node
    self.embedding_node = Embedding_Node(vocab_size, hidden_dim) # I guess this is one node?

    #create layer of nodes based on type
    if(node_type=="rnn"):
      self.nodes = nn.ModuleList([RNN_Node(hidden_dim) for i in range(0,n_layers)])
    else:
      self.nodes = nn.ModuleList([LSTM_Node(hidden_dim) for i in range(0,n_layers)])

    #store initial hidden states
    self.hidden = torch.randn(n_layers, hidden_dim)
    self.cell = torch.randn(n_layers, hidden_dim) #only really applicable for lstm

    #output weights
    self.W_out = nn.Parameter(torch.randn(vocab_size, hidden_dim))

  def forward(self, *args):

    ### pseudo code

    #while the output is not the eof token

    return None







In [None]:
class Seq2Seq(nn.Module):
  def __init__(self, encoder_args, decoder_args):
    super(Seq2Seq, self).__init__()

    # node_type, n_layers, hidden_dim, vocab_size


    #model structure
    self.encoder = EncoderNN(encoder_args)
    self.decoder = DecoderNN(decoder_args)

  def forward(self, inputs):

    if (self.encoder.node_type == "rnn"):
      hidden_list = self.encoder.forward(inputs)
    else:
      hidden_list, cell_list = self.encoder.forward(inputs)

    if(self.decoder.node_type == "rnn"):
      outputs = self.decoder.forward(hidden_list)
    else:
      hidden_list, cell_list = self.decoder.forward(hidden_list, cell_list)

    return outputs
