In [2]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, emb_size=100, authors=2, maxwords=50):
        self.embs = nn.Embedding(emb_size)
        self.reduce = nn.Linear(emb_size, 1)
        self.linear = nn.Linear(maxwords, authors)
    
    def forward(self, batch):
        # (batchsize, 50 words)
        encoded = self.embs(batch)
        # (batchsize, 50 words, 100 embeddings)
        reduced = self.reduce(encoded)
        # (batchsize, 50 words, 1 score)
        reduced = reduced.unsqueeze(-1)
        # (batchsize, 50 words)
        classified = self.linear(reduced)
        return classified
    
    def predict(self, batch):
        return nn.Sigmoid(self.forward())
    

class Vocabulary(object):
    def __init__(self):
        self.itw = {0: "<UNK>", 1: "<PAD>"}
        self.wti = {"<UNK>": 0, "<PAD>": 1}
        self.maxsize = 50
        self.unk = self.wti["<UNK>"]
    
    @property
    def count(self):
        return len(self.itw)
    
    def unique_transcode(self, word, learn=True):
        i = self.wti.get(word)
        if not i:  # i = None
            if not learn:
                return self.unk
            i = self.wti[word] = self.count()
            self.itw[i] = word
        return i
    
    def transcode(self, sentence):
        return [self.unique_transcode(word) for word in sentence] + [self.pad]*(50-len(sentence)) # Pad

    
class Reader(object):
    def __init__(self, files: Dict[str, int]):
        self.files: Dict[str, int] = files  # Filepath, classe
    