In [3]:
import os

import numpy as np

import pickle as pkl
import gzip

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

In [5]:
data_dir = '/'.join(os.getcwd().split("/")[:-1]) + '/data/'
emb_dir = '/'.join(os.getcwd().split("/")[:-1]) + '/embeddings/'

def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pkl.load(f)
        return loaded_object

In [None]:
vi_emb = load_zipped_pickle(emb_dir + "vi_embeddings_100K.p")
vi_id2word = load_zipped_pickle(emb_dir + "id2word_vi_dic.p")
vi_word2id = load_zipped_pickle(emb_dir + "word2id_vi_dic.p")


In [13]:
vi_id2word[0]

'<pad>'

In [None]:
class HybridEmbeddings(nn.Module):
    def __init__(self, fixed_embeddings, learned_embeddings):
        super(HybridEmbeddings, self).__init__()
        self.fixed_embeddings = fixed_embeddings
        self.num_fixed = self.fixed_embeddings.num_embeddings - 1

        self.learned_embeddings = learned_embeddings
        self.num_learned = self.learned_embeddings.num_embeddings - 1

    @property
    def embedding_dim(self):
        return self.fixed_embeddings.embedding_dim

    def forward(self, ids_tensor):
        fixed_ids = transform_ids(
            ids_tensor,
            start=NUM_SPECIAL,
            end=NUM_SPECIAL + self.num_fixed,
        )
        learned_ids = transform_ids(
            ids_tensor,
            start=NUM_SPECIAL + self.num_fixed,
            end=NUM_SPECIAL + self.num_fixed + self.num_learned,
        )
        embeddings = (
            self.fixed_embeddings(fixed_ids)
            + self.learned_embeddings(learned_ids)
        )
        return embeddings