In [7]:
import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time

import matplotlib.pyplot as plt
import seaborn
seaborn.set_context(context='talk')


In [13]:
def attention(Q, K, V, mask=None, dropout=None):
    d_k = Q.size(-1)
    scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, V), p_attn
    

In [14]:
class LayerNorm(nn.Module):
    def __init__(self, features, eps=1e-6):
        super().__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.ones(features))
        self.eps = eps
    
    def forward(self, x):
        mean = torch.mean(x, -1, keepdim=True)
        std  = torch.mean(x, -1, keepdim=True)
        return self.a_2 *  (x - mean) / (std + self.eps) + self.b_2

In [16]:
class Encoder(nn.Module):
    def __init__(self, layer, N):
        super().__init__()
        self.layers = nn.ModuleList([copy.deepcopy(layer) for _ in range(N)])
        self.norm = LayerNorm(layer.size)
        
    def forward(self, x, mask):
        for layer in self.layers:
            x = layer(x, mask)
        return self.norm(x)

In [None]:
class SubLayerConnection(nn.Module):
    def __init__(self, size, dropout):
        super().__init__()
        self.norm = LayerNorm(size)
        self.dropout  = dropout
    
    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))
        

In [None]:
class Encoder(nn.Module):
    def __init__(self, size, self_attn, feed_forward, dropout):
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer1 = SubLayerConnection(size, dropout)
        self.sublayer2 = SubLayerConnection(size, dropout)
        self.size = size
    
    def forward(self, x, mask):
        x = self.sublayer(x + self.self_a

In [5]:
class Generator(nn.Module):
    def __init__(self, d_model, vocab):
        super().__init__()
        self.proj = nn.Linear(d_model, vocab)
        
    def forward(self, x):
        return F.log_softmax(self.proj(x), dim=-1)

In [6]:
class EncoderDecoder(nn.Module):
    def __init__(self, decoder, encoder, src_embed, tgt_embed, generator):
        super().__init__()
        self.decoder = decoder
        self.encoder = encoder
        self.src_embed = src_embed
        self.tgt_embed = tgt_embed
        self.generator = generator
    
    def encode(self, src, src_mask):
        return self.encoder(self.src_embed(src), src_mask)
        
    def forward(self, src, tgt, src_mask, tgt_mask):
        

SyntaxError: incomplete input (3357663587.py, line 14)