In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import MultiheadAttention
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

In [2]:
class FeedFoward(nn.Module):
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
        )

    def forward(self, x):
        return self.net(x)

In [3]:
class Enc(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads):
        super(Enc, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        
        self.multihead_attn = MultiheadAttention(embed_dim=input_dim, num_heads=num_heads)
        self.ffwd = FeedFoward(input_dim)
        
    def forward(self, inputs):
        #debo hacer algo con el input para convertirlo en K, Q y V???
        mah, _ = self.multihead_attn(Q, K, V)
        realim1 = inputs + mah
        output = self.ffwd.forward(realim1) + inputs
        
        return output

In [40]:
class TRF(nn.Module):
    def __init__(self,
                 d_input:int,
                 d_output:int,
                 d_model:int, 
                 n_heads:int, 
                 d_ff:int, 
                 num_enc_layers:int,
                 
                 d_out_clasif:int,
                 largo:int,
                 drp:float = 0.2,
                 norm_first:bool = False


                 ):
        super(TRF, self).__init__()

        self.d_model = d_model
        capa_encoder = nn.TransformerEncoderLayer(d_model, # Dimension del modelo
                                                n_heads, # Cantidad de cabezas
                                                d_ff, # Dimensión del feed forward
                                                drp,  # Dropout
                                                'relu', # Activación
                                                batch_first= True, # La entrada es de la forma: (batch, seq, features)
                                                norm_first= norm_first # Define si debemos normalizar primero
                                                )

        self.in_lin = nn.Linear(d_input, d_model)
        self.encoder = nn.TransformerEncoder(capa_encoder,num_enc_layers)
        self.out_lin = nn.Linear(d_model, d_output)
        self.out_clasif = nn.Linear(d_output*largo, d_out_clasif)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def gen_pe(self, batch, largo):
        # calculate the div_term
        position = torch.arange(largo).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2) * (-np.log(10000.0) / self.d_model))
        pe = torch.zeros(batch, largo, self.d_model)
        pe[:,:, 0::2] = torch.sin(position * div_term)
        pe[:,:, 1::2] = torch.cos(position * div_term)

        # the output has a shape of (1, max_length, d_model)
        return pe                           


    def forward(self, x):
        assert not torch.isnan(x).any(), 'Hay nan en la entrada'
        # proyectamos la entrada a la dim. del modelo
        x = self.in_lin(x)
        assert not torch.isnan(x).any()

        # Añadimos la pe
        x = x + self.gen_pe(x.shape[0], x.shape[1])
        assert not torch.isnan(x).any()

        # Obtenemos la salida del encoder
        # x = self.encoder(x, mask = nn.Transformer.generate_square_subsequent_mask(20), is_causal  = True) # variante causal del forward
        x = self.encoder(x)
        assert not torch.isnan(x).any()
        
        # Proyectamamos la salida del encoder a la salida 
        x = self.out_lin(x)
        # print(x.shape)
        # print(torch.flatten(x, 1, -1).shape)
        assert not torch.isnan(x).any()
        x = self.sigmoid(self.relu(self.out_clasif(torch.flatten(x, 1, -1))))
        print(x)
        return x

In [15]:
x = torch.randn(23, 900)
x = x.flatten()
x.shape

torch.Size([20700])

In [42]:
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

X_train = torch.randn(320, 900, 23)  
y_train = torch.randint(0, 2, (320,)).unsqueeze(-1)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


model = TRF(23, 10, 10, 1, 10, 3, 1, 900)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Loop de entrenamiento
epochs = 10
for epoch in range(epochs):
    model.train()  # Poner el modelo en modo de entrenamiento
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Paso de entrenamiento
        optimizer.zero_grad()  # Reiniciar los gradientes
        outputs = model(inputs)  # Propagación hacia adelante
        loss = criterion(outputs, labels.float())  # Calcular la pérdida
        loss.backward()  # Retropropagación
        optimizer.step()  # Actualizar los pesos
        running_loss += loss.item() * inputs.size(0)
    
    # Calcular el loss promedio en la época
    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")

print("Entrenamiento completado.")




tensor([[0.6171],
        [0.6236],
        [0.5456],
        [0.6096],
        [0.5387],
        [0.5077],
        [0.5518],
        [0.5696],
        [0.5530],
        [0.5000],
        [0.5461],
        [0.5275],
        [0.5708],
        [0.5401],
        [0.5171],
        [0.5837],
        [0.5000],
        [0.5979],
        [0.5378],
        [0.5350],
        [0.5444],
        [0.5194],
        [0.5000],
        [0.5641],
        [0.5531],
        [0.5008],
        [0.5845],
        [0.5124],
        [0.5667],
        [0.6181],
        [0.5682],
        [0.5468]], grad_fn=<SigmoidBackward0>)
tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],

In [36]:
torch.randint(0, 2, (100,)).unsqueeze(-1).shape

torch.Size([100, 1])

In [None]:
pe[:,:, 1::2] = torch.ones(64,128,16)

In [24]:
pe[:,:, 1::2]

tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        ...,

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1., 

In [12]:
torch.arange(0, 7, 2)

tensor([0, 2, 4, 6])