In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext import data
import math

torch.manual_seed(1)

<torch._C.Generator at 0x20fc123d370>

In [28]:
import random
from torch import Tensor
from typing import Tuple
import time

In [2]:
path ='./data'

## 데이터 로드

In [3]:
import sentencepiece as spm
spm.SentencePieceTrainer.Train('--input={}/train.dat --model_prefix=m --vocab_size=50000'.format(path))
sp = spm.SentencePieceProcessor()
sp.Load('m.model')

True

In [4]:
tokenizer = lambda x: sp.EncodeAsPieces(x)

In [5]:
PAD, EOS, BOS = 1,2,3
fix_length=None

In [6]:
SRC = data.Field(sequential=True, # False면 tokenization이 적용되지 않는다.
            use_vocab=True, # False면 data는 이미 numerical한 상태여야 한다.
            fix_length=fix_length,
            preprocessing=None,
            lower=True,
            tokenize=tokenizer,
        #                  include_lengths=True, # list의 길이까지 튜플 형식으로 반환
#             batch_first=True, # Whether to produce tensors with the batch dimension first
            )

In [7]:
TGT = data.Field(sequential=True,
            use_vocab=True,
            fix_length=fix_length,
            preprocessing=None,
            lower=True,
            tokenize=tokenizer,
#                  include_lengths=True,
#             batch_first=True
            )

In [8]:
train_data, val_data, test_data = data.TabularDataset.splits(path=path+'/',
                                                        train='train.dat',
                                                        validation='val.dat',
                                                        test='test.dat',
                                                        format='tsv',
                                                        fields=[('tgt', TGT), ('src',SRC)])

In [9]:
print(f"Number of training examples: {len(train_data.examples)}")
print(f"Number of validation examples: {len(val_data.examples)}")
print(f"Number of testing examples: {len(test_data.examples)}")

Number of training examples: 35360
Number of validation examples: 4420
Number of testing examples: 4420


In [10]:
print(vars(train_data.examples[0]))

{'tgt': ['▁fast', '▁methods', '▁for', '▁recover', 'ing', '▁sparse', '▁parameters', '▁in', '▁linear', '▁low', '▁rank', '▁models'], 'src': ['▁in', '▁this', '▁paper', ',', '▁we', '▁investigate', '▁the', '▁recovery', '▁of', '▁a', '▁sparse', '▁weight', '▁vector', '▁(', 'parameters', '▁vector', ')', '▁from', '▁a', '▁set', '▁of', '▁noisy', '▁linear', '▁combinations', '.', '▁however', ',', '▁only', '▁partial', '▁information', '▁about', '▁the', '▁matrix', '▁representing', '▁the', '▁linear', '▁combinations', '▁is', '▁available', '.', '▁assum', 'ing', '▁a', '▁low', '-', 'rank', '▁structure', '▁for', '▁the', '▁matrix', ',', '▁one', '▁natural', '▁solution', '▁would', '▁be', '▁to', '▁first', '▁apply', '▁a', '▁matrix', '▁completion', '▁on', '▁the', '▁data', ',', '▁and', '▁then', '▁to', '▁solve', '▁the', '▁resulting', '▁compressed', '▁sensing', '▁problem', '.', '▁in', '▁big', '▁data', '▁applications', '▁such', '▁as', '▁massive', '▁mimo', '▁and', '▁medical', '▁data', ',', '▁the', '▁matrix', '▁completio

In [11]:
SRC.build_vocab(train_data)
TGT.build_vocab(train_data)

In [12]:
print(f"Unique tokens in source (abstract) vocabulary: {len(SRC.vocab)}")
print(f"Unique tokens in target (title) vocabulary: {len(TGT.vocab)}")

Unique tokens in source (abstract) vocabulary: 41855
Unique tokens in target (title) vocabulary: 17770


In [13]:
torch.cuda.is_available()

False

In [14]:
batch_size = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [15]:
# bucketiterator defines an iterator that batches examples of similar lengths together
train_iter, valid_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data),
     batch_size=batch_size,
     sort_key=lambda x: data.interleave_keys(len(x.tgt), len(x.src)),
     sort_within_batch=True,
     device=device
    )

In [16]:
SRC.vocab.stoi

defaultdict(<bound method Vocab._default_unk_index of <torchtext.vocab.Vocab object at 0x0000020FC2B089C8>>,
            {'<unk>': 0,
             '<pad>': 1,
             '▁the': 2,
             '.': 3,
             ',': 4,
             '▁of': 5,
             '-': 6,
             '▁and': 7,
             '▁a': 8,
             '▁to': 9,
             '▁in': 10,
             '▁we': 11,
             '▁is': 12,
             '▁for': 13,
             '▁that': 14,
             '▁on': 15,
             '▁with': 16,
             '▁this': 17,
             '▁(': 18,
             '▁learning': 19,
             '▁are': 20,
             '▁as': 21,
             '▁by': 22,
             ')': 23,
             '▁neural': 24,
             's': 25,
             '▁': 26,
             '▁an': 27,
             '▁network': 28,
             'ing': 29,
             '▁our': 30,
             '▁networks': 31,
             '▁deep': 32,
             '▁data': 33,
             '▁can': 34,
             '▁from': 35,
        

In [17]:
TGT.vocab.stoi

defaultdict(<bound method Vocab._default_unk_index of <torchtext.vocab.Vocab object at 0x0000020FB19D21C8>>,
            {'<unk>': 0,
             '<pad>': 1,
             '-': 2,
             '▁for': 3,
             '▁of': 4,
             '▁learning': 5,
             '▁and': 6,
             '▁neural': 7,
             '▁deep': 8,
             '▁a': 9,
             '▁in': 10,
             ':': 11,
             '▁networks': 12,
             '▁the': 13,
             '▁with': 14,
             's': 15,
             'ing': 16,
             '▁': 17,
             '▁on': 18,
             '▁network': 19,
             '▁gradient': 20,
             '▁to': 21,
             '▁convolutional': 22,
             'ed': 23,
             '▁machine': 24,
             '▁using': 25,
             '▁multi': 26,
             '▁recurrent': 27,
             '▁classification': 28,
             '▁data': 29,
             '▁from': 30,
             ',': 31,
             'using': 32,
             '▁via': 33,
           

In [18]:
# test
for batch in train_iter:
    break
print(batch.src)
print(batch.tgt)

tensor([[ 2892,    10,    32,  ...,    27,    10,     8],
        [    4,    17,   258,  ...,   481,    17,   148],
        [   16,   980,    19,  ...,  2352,   980,   140],
        ...,
        [  254,   886,   284,  ...,     1,     1,     1],
        [  271,   802, 12142,  ...,     1,     1,     1],
        [    3,     3,     1,  ...,     1,     1,     1]])
tensor([[  804,    13,    17,   671,     7,   243,     9, 11046,    26,     7,
          6307,   998, 16696,    62,   117,     8],
        [    4,   151,    82,  2735,    19,   935,   277,    10,   110,    19,
           211,    23,   265,     2,  1509,    27],
        [ 2059,   544,  2034,  1982,    65,    62,  1526, 10759,   393,  1198,
          6366,  1226,     6,   236,    50,   669],
        [ 1601,   835,   617,   554,    50,   193,    19,   116,    39,   389,
           203,    11, 15019,   600,  3239,    36],
        [   94,   240,     2,    10,    47,  3643,    14,    15,     8,    37,
          7048,   688,    11,    27

In [19]:
for batch in train_iter:
    break
small_tgt = batch.tgt
small_src = batch.src
print(small_src.shape)
print(small_tgt.shape)

torch.Size([253, 16])
torch.Size([15, 16])


In [20]:
small_tgt[0]

tensor([ 966, 1442, 1408,    8, 9788,  182,  252, 1297, 4287,  714,   55,  127,
           9, 1236,   44,  100])

In [29]:
class Encoder(nn.Module):
    def __init__(self, 
                 input_dim: int,
                 emb_dim: int, 
                 hid_dim: int, 
                 n_layers:int, 
                 dropout:float):
        super().__init__()
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self,
                src: Tensor) -> Tuple[Tensor]:
        embedded = self.dropout(self.embedding(src))
        outputs, (hx, cx) = self.rnn(embedded)
        return hx, cx
        

In [30]:
class Decoder(nn.Module):
    def __init__(self,
                 output_dim: int,
                 emb_dim: int,
                 hid_dim: int,
                 n_layers: int,
                 dropout: float):
        super().__init__()
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        self.linear_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, 
                hx: Tensor,
                cx: Tensor):
        x = x.unsqueeze(0)
        embedded = self.dropout(self.embedding(x))
        output, (hx, cx) = self.rnn(embedded, (hx, cx))
        prediction = self.linear_out(output.squeeze(0))
        return prediction, hx, cx
        

In [31]:
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder: nn.Module,
                 decoder: nn.Module,
                 device: torch.device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim
        assert encoder.n_layers == encoder.n_layers
    
    def forward(self,
                src: Tensor,
                tgt: Tensor, 
                teacher_forcing_ratio: float = .5) -> Tensor:
        batch_size = tgt.shape[1]
        tgt_len = tgt.shape[0]
        tgt_vocab_size = self.decoder.output_dim
        
        outputs = torch.zeros(tgt_len, batch_size, tgt_vocab_size).to(self.device)
        hx, cx = self.encoder(src)
        inp = tgt[0,:]
        for t in range(1, tgt_len):
            output, hx, cx = self.decoder(inp, hx, cx)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.argmax(1)
            inp = tgt[t] if teacher_force else top1
        return outputs
 


In [32]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TGT.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Seq2Seq(enc, dec, device).to(device)

In [33]:
def init_weights(m: nn.Module):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(41855, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(17770, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (linear_out): Linear(in_features=512, out_features=17770, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [34]:
def count_parameters(model: nn.Module):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 31,736,426 trainable parameters


In [35]:
PAD_IDX = TGT.vocab.stoi[TGT.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = PAD_IDX)

In [36]:
def train(model: nn.Module,
          iterator: data.BucketIterator,
          optimizer, criterion,
          clip: float):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(iterator):
        src = batch.src
        tgt = batch.tgt
        
        optimizer.zero_grad()
        output = model(src, tgt)
        output = output[1:].view(-1, output.shape[-1])
        tgt = tgt[1:].view(-1)
        
        loss = criterion(output, tgt)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip) 
        # clip the gradients to prevent form them from exploding
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator) # averaged over all batches

In [37]:
def evaluate(model: nn.Module,
             iterator: data.BucketIterator,
             criterion):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch.src
            tgt = batch.tgt
            output = model(src, tgt, 0) # trun off teacher forcing
            output = output[1:].view(-1, output.shape[-1])
            tgt = tgt[1:].view(-1)
            loss = criterion(output, tgt)
            epoch_loss += loss.item()
    return epoch_loss / len(iterator)

In [38]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [39]:
optimizer = optim.Adam(model.parameters())

In [40]:
N_EPOCHS = 3
CLIP = 1

best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iter, criterion)
    
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
    

Epoch: 01 | Time: 173m 46s
	Train Loss: 6.600 | Train PPL: 735.263
	 Val. Loss: 6.730 |  Val. PPL: 837.055
Epoch: 02 | Time: 175m 12s
	Train Loss: 5.998 | Train PPL: 402.726
	 Val. Loss: 6.581 |  Val. PPL: 721.138
Epoch: 03 | Time: 174m 40s
	Train Loss: 5.684 | Train PPL: 294.190
	 Val. Loss: 6.451 |  Val. PPL: 633.074


In [41]:
model.load_state_dict(torch.load('tut1-model.pt'))

test_loss = evaluate(model, test_iter, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

| Test Loss: 6.479 | Test PPL: 651.592 |


In [74]:
def sample(abstract: str,
           title_len: int):
    sample_src = SRC.process([abstract])
    model.eval()
    init_tgt = torch.zeros([title_len,1], dtype = torch.int64, device = device)
    with torch.no_grad():
        output = model(sample_src, init_tgt,0)
#         output = output[1:].view(-1, output.shape[-1])
#         tgt = init_tgt[1:].view(-1)
#         loss = criterion(output, tgt)
        return output

In [75]:
torch.ones([15,1], dtype = torch.int64, device = device)

tensor([[1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1]])

In [76]:
abstract = "The notion of approachability in repeated games with vector payoffs was introduced by Blackwell in the 1950s, along with geometric conditions for approachability and corresponding strategies that rely on computing {\\em steering directions} as projections from the current average payoff vector to the (convex) target set. Recently, Abernethy, Batlett and Hazan (2011) proposed a class of approachability algorithms that rely on the no-regret properties of Online Linear Programming for computing a suitable sequence of steering directions. This is first carried out for target sets that are convex cones, and then generalized to any convex set by embedding it in a higher-dimensional convex cone. In this paper we present a more direct formulation that relies on the support function of the set, along with suitable Online Convex Optimization algorithms, which leads to a general class of approachability algorithms. We further show that Blackwell's original algorithm and its convergence follow as a special case.\n"

In [87]:
sample_emb = sample(abstract, 15)

In [88]:
sample_emb

tensor([[[  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000]],

        [[-10.2533, -10.5065,   4.9751,  ...,  -8.1297,  -7.0462, -10.5310]],

        [[ -8.5878,  -8.5561,   3.1518,  ...,  -5.4253,  -3.1851,  -8.8318]],

        ...,

        [[ -8.9973,  -8.8578,   4.2204,  ...,  -7.3072,  -5.8002,  -8.9551]],

        [[ -8.0867,  -7.9898,   3.6826,  ...,  -6.9334,  -4.9049,  -7.9369]],

        [[ -7.3639,  -7.3315,   3.4692,  ...,  -6.6031,  -4.5958,  -7.2013]]])

In [60]:
usetgtvocab = [TGT.vocab.itos[idx] for idx in sample_idxs]

TypeError: only integer tensors of a single element can be converted to an index

In [None]:
usetgtvocab

In [0]:
https://github.com/pytorch/text/issues/346

In [0]:
def evaluate(model: nn.Module,
             iterator: data.BucketIterator,
             criterion):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch.src
            tgt = batch.tgt
            output = model(src, tgt, 0) # trun off teacher forcing
            output = output[1:].view(-1, output.shape[-1])
            tgt = tgt[1:].view(-1)
            loss = criterion(output, tgt)
            epoch_loss += loss.item()
    return epoch_loss / len(iterator)

In [0]:
def sample(data_loader, net, prime, sex, origin):

    origin_tensor = data_loader.ORIGIN.process([origin]).float().to(device)
    sex_tensor = data_loader.SEX.process([sex]).float().to(device)

    prime = prime.lower()
    prime_tensor = data_loader.BABYNAME.process([prime])[:, :-1].to(device)
    bsz, prime_tensor_length = prime_tensor.size()

    # 인풋을 모델에 넣어 출력합니다.
    net.eval()
    with torch.no_grad():
        # batch_size = 1
        hidden = net.init_hidden(1)

        for step in range(prime_tensor_length):
            with torch.no_grad():
                predication, hx, cx = net(encoder, )
            probabilities = F.softmax(outputs, 1)

    return probabilities.squeeze()

In [0]:
tensor([   5,  425,    9,    5, 1629, 2202,   52,   38,   18,   13,  145, 9863,
         179,   18,  125,   13], device='cuda:0')

In [0]:
torch.ones([15,1])

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])