In [6]:
import numpy as np
import torch

In [243]:
data_str = """
A transformer is a passive component that transfers electrical energy from one electrical circuit to another circuit, or multiple circuits. A varying current in any coil of the transformer produces a varying magnetic flux in the transformer's core, which induces a varying electromotive force (EMF) across any other coils wound around the same core. Electrical energy can be transferred between separate coils without a metallic (conductive) connection between the two circuits. Faraday's law of induction, discovered in 1831, describes the induced voltage effect in any coil due to a changing magnetic flux encircled by the coil.

Transformers are used to change AC voltage levels, such transformers being termed step-up or step-down type to increase or decrease voltage level, respectively. Transformers can also be used to provide galvanic isolation between circuits as well as to couple stages of signal-processing circuits. Since the invention of the first constant-potential transformer in 1885, transformers have become essential for the transmission, distribution, and utilization of alternating current electric power.[1] A wide range of transformer designs is encountered in electronic and electric power applications. Transformers range in size from RF transformers less than a cubic centimeter in volume, to units weighing hundreds of tons used to interconnect the power grid.
"""

data = data_str.replace("\n", "").split(" ")
unique_data = list(set(data))
n_embedding = len(unique_data)
emb_dict = dict([(val, i) for i, val in enumerate(unique_data)])
n_embedding

131

In [504]:
class SelfAttentionWithEmbedding(torch.nn.Module):

    def __init__(self, n_init, n_out, n_embedding):
        super().__init__()

        self.scale = np.sqrt(n_init)
        
        self.Embedding = torch.nn.Embedding(n_embedding, n_init)
        
        self.Q = torch.nn.Linear(n_init, n_init)
        self.K = torch.nn.Linear(n_init, n_init)
        self.V = torch.nn.Linear(n_init, n_init)
        
        self.softmax = torch.nn.Softmax(dim=1)

        self.output = torch.nn.Linear(n_init**2, n_embedding)

    def forward(self, x):

        x_emb = self.Embedding(x)
        
        x_q = self.Q(x_emb)
        x_k = self.K(x_emb)
        x_v = self.V(x_emb)

        x_k_t = torch.transpose(x_k, 1, 2)
        
        output = self.softmax(torch.matmul(x_q, x_k_t) / self.scale)
        output = torch.matmul(output, x_v)
        output = torch.flatten(output, start_dim=1)

        output = self.output(output)

        return output

        

In [505]:
layer.parameters

<bound method Module.parameters of SelfAttentionWithEmbedding(
  (Embedding): Embedding(131, 12)
  (Q): Linear(in_features=12, out_features=12, bias=True)
  (K): Linear(in_features=12, out_features=12, bias=True)
  (V): Linear(in_features=12, out_features=12, bias=True)
  (softmax): Softmax(dim=1)
  (output): Linear(in_features=144, out_features=131, bias=True)
)>

In [506]:
data_emb = [emb_dict[val] for val in data[:12]]

In [507]:
data_emb

[108, 28, 64, 17, 71, 114, 106, 23, 99, 12, 60, 78]

In [508]:
data[12]

'electrical'

In [509]:
emb_dict[data[12]]

99

In [510]:
x = np.array(data_emb).reshape(1, 12)

In [511]:
x = torch.from_numpy(x)

In [512]:
layer = SelfAttentionWithEmbedding(12, 8, n_embedding)

In [513]:
layer.forward(x)

tensor([[ 0.0248,  0.0173, -0.1710,  0.0514, -0.0041, -0.3186, -0.0025,  0.2035,
         -0.0157,  0.0225,  0.0505, -0.1653,  0.0206, -0.1190,  0.0205, -0.1348,
          0.1364, -0.2349,  0.2018, -0.2313,  0.0162, -0.0228,  0.1332,  0.0253,
          0.0073,  0.1287, -0.2020,  0.0620, -0.0932,  0.0098,  0.2622,  0.0920,
         -0.0019,  0.0501,  0.0100,  0.0077,  0.2352,  0.0547, -0.0805, -0.1764,
          0.1908, -0.1180,  0.0113,  0.1625,  0.1603, -0.3160, -0.1270,  0.0599,
         -0.0989,  0.1712,  0.1203, -0.1016,  0.0243, -0.0719, -0.0382,  0.0602,
          0.1114,  0.0172,  0.0882, -0.1649, -0.3203,  0.0409,  0.1870, -0.0880,
         -0.0185, -0.0700,  0.0333,  0.1328, -0.0137,  0.2448,  0.1801, -0.2372,
         -0.1669,  0.1002, -0.0740,  0.0838,  0.0070,  0.0054, -0.0818,  0.1834,
          0.1512, -0.1780, -0.0691, -0.0704,  0.0524,  0.1105, -0.5593,  0.4018,
         -0.1180,  0.0725, -0.0204,  0.0799, -0.1308,  0.3303, -0.2414,  0.0246,
          0.1403,  0.0641,  

In [514]:
def create_dataset(data, n_seq=12):

    data_clear = data.replace("\n", "").split(" ")
    unique_data = list(set(data_clear))
    n_embedding = len(unique_data)
    emb_dict = dict([(val, i) for i, val in enumerate(unique_data)])
    data_emb = [emb_dict[i] for i in data_clear]
    
    dataset_emb = np.array([[k for k in data_emb[i:(i+12)]] for i in range(len(data_emb) - n_seq - 1)])
    y = [d[-1] for d in dataset_emb[1:]]
    
    x = torch.from_numpy(np.array(dataset_emb[:-1]))
    y = torch.from_numpy(np.array(y, dtype='int64'))
    y_onehot = torch.nn.functional.one_hot(y, num_classes=n_embedding).type(torch.DoubleTensor)
    
    
    return torch.utils.data.TensorDataset(x, y_onehot)

    

In [515]:
dataset = create_dataset(data_str)

In [516]:
x

tensor([[108,  28,  64,  17,  71, 114, 106,  23,  99,  12,  60,  78]],
       dtype=torch.int32)

In [517]:
model = torch.nn.Sequential()
model.add_module('fc1', layer)
model(x[:1])

tensor([[ 0.0248,  0.0173, -0.1710,  0.0514, -0.0041, -0.3186, -0.0025,  0.2035,
         -0.0157,  0.0225,  0.0505, -0.1653,  0.0206, -0.1190,  0.0205, -0.1348,
          0.1364, -0.2349,  0.2018, -0.2313,  0.0162, -0.0228,  0.1332,  0.0253,
          0.0073,  0.1287, -0.2020,  0.0620, -0.0932,  0.0098,  0.2622,  0.0920,
         -0.0019,  0.0501,  0.0100,  0.0077,  0.2352,  0.0547, -0.0805, -0.1764,
          0.1908, -0.1180,  0.0113,  0.1625,  0.1603, -0.3160, -0.1270,  0.0599,
         -0.0989,  0.1712,  0.1203, -0.1016,  0.0243, -0.0719, -0.0382,  0.0602,
          0.1114,  0.0172,  0.0882, -0.1649, -0.3203,  0.0409,  0.1870, -0.0880,
         -0.0185, -0.0700,  0.0333,  0.1328, -0.0137,  0.2448,  0.1801, -0.2372,
         -0.1669,  0.1002, -0.0740,  0.0838,  0.0070,  0.0054, -0.0818,  0.1834,
          0.1512, -0.1780, -0.0691, -0.0704,  0.0524,  0.1105, -0.5593,  0.4018,
         -0.1180,  0.0725, -0.0204,  0.0799, -0.1308,  0.3303, -0.2414,  0.0246,
          0.1403,  0.0641,  

In [518]:
model = torch.nn.Sequential()
model.add_module('fc1', layer)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

for t in range(300):
    # Training loop for mini-batches
    epoch_loss = 0
    errors = 0
    
    for batch_idx, (x, y) in enumerate(train_loader):
        # Make predictions with the current parameters.
        # x, y = x.to(device), y.to(device)
        y_pred = model(x)
        
        # Compute the loss value.
        # print(y_pred.shape, y.shape)
        loss = loss_fn(y_pred, y)
        # print(loss)
        # input()
        epoch_loss += torch.sum(loss) / 16
        
        # input()
        errors += torch.sum(torch.argmax(y_pred, dim=1) != torch.argmax(y, dim=1))
        
        # Update the parameters.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(epoch_loss / (batch_idx + 1), 'errors:', errors)

tensor(0.3069, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(186)
tensor(0.3014, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(182)
tensor(0.2977, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(180)
tensor(0.2939, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(180)
tensor(0.2902, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(180)
tensor(0.2867, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(180)
tensor(0.2831, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(179)
tensor(0.2794, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(178)
tensor(0.2754, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(178)
tensor(0.2716, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(178)
tensor(0.2671, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(176)
tensor(0.2631, dtype=torch.float64, grad_fn=<DivBackward0>) errors: tensor(175)
tensor(0.2584, dtype=torch.float64, grad

In [519]:
torch.max(model(x), dim=1), torch.argmax(model(x), dim=1), torch.argmax(y, dim=1)

(torch.return_types.max(
 values=tensor([13.8297, 20.3500, 15.4333, 16.2169, 14.9565, 15.1938, 18.7971, 16.5171,
         13.4548, 12.8462, 17.9277, 12.8777, 14.1945], grad_fn=<MaxBackward0>),
 indices=tensor([  4, 111,  86,   1,  54, 102,  64,  84,  25,  42, 103,  67,  17])),
 tensor([  4, 111,  86,   1,  54, 102,  64,  84,  25,  42, 103,  67,  17]),
 tensor([  4, 111,  86,   1,  54, 102,  64,  84,  25,  42, 103,  67,  17]))

In [503]:
len(dataset)

189