<a href="https://colab.research.google.com/github/ovbystrova/dpl/blob/master/notebooks/GAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# https://github.com/williamSYSU/TextGAN-PyTorch
# https://github.com/DSleeps/Text-GAN
# https://github.com/rtst777/TextGAN
# https://github.com/MuratArda-coder/GAN-Text-Generation

In [0]:
# TODO Gumbel_softmax
# TODO train/test loop
# TODO Перенести модели в models
# А всю train/test процедуру в modules мб

In [1]:
!git clone https://github.com/ovbystrova/dpl.git
import os
os.chdir('/content/dpl')

Cloning into 'dpl'...
remote: Enumerating objects: 4, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 189 (delta 0), reused 0 (delta 0), pack-reused 185[K
Receiving objects: 100% (189/189), 228.87 MiB | 33.56 MiB/s, done.
Resolving deltas: 100% (101/101), done.
Checking out files: 100% (28/28), done.


In [0]:
%%capture
!pip install tokenizers

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchtext.datasets import WikiText2
from torchtext import data

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [0]:
from modules.sntpiece_tokenization import make_tokenizer, clean_data, special_tokens

In [0]:
LM_PATH = '/content/language_model'
EMB_PATH  = '/content/embeddings_y.pt'
BATCH_SIZE = 64
SEQ_LENGTH = 40

#Data

In [0]:
%%capture
!wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
!unzip 'wikitext-2-v1.zip'

In [7]:
tokenizer = make_tokenizer()
tokenizer

Tokenizer(vocabulary_size=30000, model=SentencePieceBPE, unk_token=<unk>, replacement=▁, add_prefix_space=True, dropout=None)

In [0]:
def tokenize(text, tokenizer=tokenizer):
    text = clean_data(text)
    text = special_tokens(text)
    return tokenizer.encode(text).tokens

In [0]:
%%capture
TEXT = data.Field(sequential=True, 
                  include_lengths=False, 
                  batch_first=True, 
                  tokenize=tokenize,
                  lower=True, 
                  pad_first=True)

train, valid, test = WikiText2.splits(TEXT)

TEXT.build_vocab(train, valid, unk_init = torch.Tensor.normal_, vectors='glove.6B.200d')
vocab = TEXT.vocab

In [10]:
print('Vocab size:', len(TEXT.vocab.itos))
TEXT.vocab.itos[:10]

Vocab size: 22734


['<unk>', '<pad>', '▁', '▁the', '<eos>', '<start>', '▁,', '▁of', '▁and', '▁in']

# Model

## Generator

In [0]:
class MyModel(nn.Module):
    
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super(MyModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        self.rnn = nn.LSTM(input_size=embed_dim,
                           hidden_size=hidden_size,
                           bidirectional=True,
                           batch_first=True,
                          )
        
        self.fc = nn.Linear(hidden_size * 2, vocab_size)
        self.fc2 = nn.Linear(vocab_size, embed_dim)
        
        self.init_weights()
        
    def init_weights(self):
        nn.init.uniform_(self.embedding.weight)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.zeros_(self.fc.bias)
        
    def forward(self, batch):
        
        x = batch.text.T if hasattr(batch, 'text') else batch #
        
        x = self.embedding(x)           
        x, _ = self.rnn(x)  # (bs,sq,hs)
        x = self.fc(x)  # (bs,sq,vocab_size)
        # x = F.softmax(x, dim=-1)  # (bs,sq,vocab_size)
        # x = self.fc2(x)  # (bs,sq,embed_dim)
        return x

In [0]:
model = MyModel(vocab_size=len(TEXT.vocab.itos),
                embed_dim=200,
                hidden_size=128,
               )
model.to(device)
# model.embedding.weight.data.copy_(TEXT.vocab.vectors);

optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)

criterion = nn.CrossEntropyLoss(ignore_index=1)
cosine = nn.CosineSimilarity(dim=-1)
mse = nn.MSELoss()
embed_y = nn.Embedding(len(TEXT.vocab.itos), 200).to(device)  
embed_y.weight.data.copy_(torch.load(EMB_PATH));

In [77]:
embed_y.weight.data, model.embedding.weight.data

(tensor([[ 3.0568,  0.5324,  0.5345,  ...,  0.9580, -2.3471, -0.8556],
         [ 0.7384,  2.5978,  0.7651,  ..., -1.1332, -0.6784,  0.7710],
         [-1.2101,  0.7566, -0.1192,  ..., -0.0474,  0.1571, -0.9504],
         ...,
         [ 0.9455,  1.1042, -1.0881,  ..., -0.7559, -1.0561, -1.2703],
         [ 0.3659,  0.6635,  0.2631,  ...,  0.1843,  0.2070, -0.4724],
         [-0.3153, -0.4700,  0.1212,  ...,  1.4813,  0.4461,  0.7010]],
        device='cuda:0'),
 tensor([[0.9057, 0.7520, 0.7495,  ..., 0.6955, 0.7204, 0.5740],
         [0.5404, 0.5462, 0.7705,  ..., 0.4520, 0.8386, 0.5561],
         [0.4474, 0.6563, 0.7178,  ..., 0.1666, 0.2965, 0.6375],
         ...,
         [0.9409, 0.5975, 0.5933,  ..., 0.7461, 0.0658, 0.6108],
         [0.1584, 0.4989, 0.3269,  ..., 0.7748, 0.3414, 0.0637],
         [0.5473, 0.2768, 0.6072,  ..., 0.7621, 0.7178, 0.3665]],
        device='cuda:0'))

In [18]:
train_iterator_g, valid_iterator_g, test_iterator_g = data.BPTTIterator.splits(
    (train, valid, test),
    batch_size=BATCH_SIZE,
    bptt_len=SEQ_LENGTH,
    device=device,
    repeat=False, 
    shuffle=True)

b = next(iter(train_iterator_g)); vars(b).keys()

dict_keys(['batch_size', 'dataset', 'fields', 'text', 'target'])

In [19]:
for el in train_iterator_g:
    x = el.text
    y = el.target.T
    print(x.size(), y.size())
    pred = model(el)
    y_emb = embed_y(y)
    
    loss_mse = mse(pred, y_emb)
    print('mse', loss_mse)
    loss_cosine = cosine(pred, y_emb)
    print('cosine', loss_cosine.size())

    mean_cosine  = torch.mean(loss_cosine, dim=-1)
    mean_mean_cosine = torch.mean(mean_cosine)
    print(mean_cosine.size(), mean_mean_cosine.size())
    print(1-mean_mean_cosine)
    break

torch.Size([40, 64]) torch.Size([64, 40])
mse tensor(0.9678, device='cuda:0', grad_fn=<MeanBackward0>)
cosine torch.Size([64, 40])
torch.Size([64]) torch.Size([])
tensor(1.0025, device='cuda:0', grad_fn=<RsubBackward1>)


## Discriminator

### from RelGAN

https://github.com/williamSYSU/TextGAN-PyTorch

In [0]:
class CNNDiscriminator(nn.Module):
    def __init__(self, embed_dim, vocab_size, filter_sizes, num_filters, padding_idx, gpu=False,
                 dropout=0.2):
        super(CNNDiscriminator, self).__init__()
        self.embedding_dim = embed_dim
        self.vocab_size = vocab_size
        self.padding_idx = padding_idx
        self.feature_dim = sum(num_filters)
        self.gpu = gpu

        # self.embeddings = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, n, (f, embed_dim)) for (n, f) in zip(num_filters, filter_sizes)
        ])
        self.highway = nn.Linear(self.feature_dim, self.feature_dim)
        self.feature2out = nn.Linear(self.feature_dim, 2)
        self.dropout = nn.Dropout(dropout)

        self.init_params()

    def forward(self, inp):
        """
        Get final predictions of discriminator
        :param inp: batch_size * seq_len * embed_dim
        :return: pred: batch_size * 2
        """
        feature = self.get_feature(inp)
        pred = self.feature2out(self.dropout(feature))

        return pred

    def get_feature(self, inp):
        """
        Get feature vector of given sentences
        :param inp: batch_size * max_seq_len
        :return: batch_size * feature_dim
        """
        emb = self.embeddings(inp).unsqueeze(1)  # batch_size * 1 * max_seq_len * embed_dim
        convs = [F.relu(conv(emb)).squeeze(3) for conv in self.convs]  # [batch_size * num_filter * length]
        pools = [F.max_pool1d(conv, conv.size(2)).squeeze(2) for conv in convs]  # [batch_size * num_filter]
        pred = torch.cat(pools, 1)  # tensor: batch_size * feature_dim
        highway = self.highway(pred)
        pred = torch.sigmoid(highway) * F.relu(highway) + (1. - torch.sigmoid(highway)) * pred  # highway

        return pred

    def init_params(self):
        for param in self.parameters():
            if param.requires_grad and len(param.shape) > 0:
                stddev = 1 / math.sqrt(param.shape[0])
                if cfg.dis_init == 'uniform':
                    torch.nn.init.uniform_(param, a=-0.05, b=0.05)
                elif cfg.dis_init == 'normal':
                    torch.nn.init.normal_(param, std=stddev)
                elif cfg.dis_init == 'truncated_normal':
                    truncated_normal_(param, std=stddev)

In [0]:
class RelGAN_D(CNNDiscriminator):
    def __init__(self, embed_dim, max_seq_len, num_rep, vocab_size, padding_idx, gpu=False, dropout=0.25):
        super(RelGAN_D, self).__init__(embed_dim, vocab_size, dis_filter_sizes, dis_num_filters, padding_idx,
                                       gpu, dropout)

        self.embed_dim = embed_dim
        self.max_seq_len = max_seq_len
        self.feature_dim = sum(dis_num_filters)
        self.emb_dim_single = int(embed_dim / num_rep)

        self.embeddings = nn.Linear(vocab_size, embed_dim, bias=False)
        # self.embeddings = nn.Embedding(vocab_size, embed_dim)

        self.convs = nn.ModuleList([
            nn.Conv2d(1, n, (f, self.emb_dim_single), stride=(1, self.emb_dim_single)) for (n, f) in
            zip(dis_num_filters, dis_filter_sizes)
        ])

        self.highway = nn.Linear(self.feature_dim, self.feature_dim)
        self.feature2out = nn.Linear(self.feature_dim, 100)
        self.out2logits = nn.Linear(100, 1)
        self.dropout = nn.Dropout(dropout)

        self.init_params()

    def forward(self, inp):
        """
        Get logits of discriminator
        :param inp: batch_size * seq_len * embed_dim
        :return logits: [batch_size * num_rep] (1-D tensor)
        """

        emb = self.embeddings(inp).unsqueeze(1) if inp.size()[-1]==self.vocab_size else inp.unsqueeze(1)
        # emb = self.embeddings(inp).unsqueeze(1)  # batch_size * 1 * max_seq_len * embed_dim
        cons = [F.relu(conv(emb)) for conv in self.convs]  # [batch_size * num_filter * (seq_len-k_h+1) * num_rep]
        pools = [F.max_pool2d(con, (con.size(2), 1)).squeeze(2) for con in cons]  # [batch_size * num_filter * num_rep]
        pred = torch.cat(pools, 1)
        pred = pred.permute(0, 2, 1).contiguous().view(-1, self.feature_dim)  # (batch_size * num_rep) * feature_dim
        highway = self.highway(pred)
        pred = torch.sigmoid(highway) * F.relu(highway) + (1. - torch.sigmoid(highway)) * pred  # highway

        pred = self.feature2out(self.dropout(pred))
        logits = self.out2logits(pred).squeeze(1)  # [batch_size * num_rep]

        return logits

In [0]:
# Iterators for dicriminator
train_iterator_d, valid_iterator_d, test_iterator_d = data.BPTTIterator.splits(
    (train, valid, test),
    batch_size=BATCH_SIZE,
    bptt_len=SEQ_LENGTH,
    device=device,
    repeat=False, 
    shuffle=True)

In [0]:
import math
from collections import namedtuple
dis_filter_sizes = [2, 3, 4, 5]
dis_num_filters = [300, 300, 300, 300]
cfg = namedtuple('cfg', ['dis_init'])
cfg.dis_init='uniform'

In [88]:
discriminator = RelGAN_D(embed_dim=200,
                         max_seq_len=SEQ_LENGTH+10,
                         num_rep=1,
                         vocab_size=len(TEXT.vocab),
                         padding_idx=TEXT.vocab.stoi['<pad>'],
                         gpu=True)
discriminator.to(device)

bce = nn.BCEWithLogitsLoss()
bce.to(device)

BCEWithLogitsLoss()

In [94]:
for el in train_iterator_d:
    with torch.no_grad():
        x = embed_y(el.text.T)
        print(x.size(), len(x.size()))
        print(x.type())
        y = torch.ones((x.size()[0])).to(device)
        print(y.size(), y)

        preds = discriminator(x)
        print(preds.size())
        bce_crit = bce(preds, y)
        print(bce_crit)
        print(preds)
        # print(x.size())
        # print(y.size())
        # out = model(x)
        # print(y.unique(return_counts=True))
        # print(x.size())
        # print(y.size())
        # print(out.size())
        # print(criterion(out, y.long()))
        print('===And now the generator===')
        for elem in train_iterator_g:
            with torch.no_grad():
                x_g = elem.text
                y_g = elem.target.T
                print(x_g.size(), y_g.size())
                pred = model(el)
                y_emb = embed_y(y_g)

                g_preds = discriminator(pred)
                y_gen = torch.zeros((x.size()[0])).to(device)
                print(preds.size())
                bce_g = bce(preds, y_gen)
                print(bce_g)
                print(preds)
                break
        break

torch.Size([64, 40, 200]) 3
torch.cuda.FloatTensor
torch.Size([64]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0')
torch.Size([64])
tensor(0.5397, device='cuda:0')
tensor([ 4.8655e-01, -1.7155e-01,  7.0880e-01,  7.8546e-01,  5.3187e-01,
        -1.3842e-01,  5.4085e-01,  8.0878e-01,  1.7995e-01,  4.6545e-01,
         1.7111e-01,  1.9617e-01,  6.9969e-01,  4.6513e-01,  9.4334e-01,
         5.4059e-01,  1.6986e-01,  7.1093e-01,  5.4225e-01,  6.6428e-01,
         7.6511e-02,  6.3933e-01,  1.9427e-01,  2.8120e-01,  2.9529e-01,
         1.1103e-02, -1.4094e-01,  2.3796e-01,  5.8419e-01,  3.0412e-01,
         1.5034e-01, -6.3595e-02,  2.1817e-01,  2.9322e-01,  6.4291e-01,
         5.5429e-01,  4.8597e-01,  7.1266e-01,  2.3863e-01,  9.193

In [0]:
# Как прокидывать генерацию?

### from LeakGAN

### from me

## test if everything is ok

In [0]:
def _test_epoch(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    losses = []
    cosines = []

    n_batches = len(iterator)
    with torch.no_grad():
        for batch in iterator:
            pred = model(batch)
            y_emb = embed_y(batch.target.T)
            loss = mse(pred, y_emb)
            losses.append(loss.item())
            epoch_loss += loss.data.item()

            cosine_batch = (1- torch.mean(torch.mean(cosine(pred, y_emb)), dim=-1)).item()
            cosines.append(cosine_batch)

    return epoch_loss / n_batches, losses, cosines

In [52]:
test_loss, epoch_test, epoch_tcosines = _test_epoch(model, test_iterator, mse)
test_loss

0.9693561161557833

In [90]:
# load the weights of pretrained generator (notebooks/Language_Model_exp.ipynb)
model.load_state_dict(torch.load(LM_PATH))

<All keys matched successfully>

In [0]:
test_loss, epoch_test, epoch_tcosines = _test_epoch(model, test_iterator_g, mse)
test_loss

# GAN stuff

# Train stuff