In [9]:
import numpy as np
import torch

In [10]:
all_sequences = np.load("all_sequences.npy")
all_labels = np.load("all_labels.npy")

# creating the validation and test set.

np.random.seed(42)

shuffled_indices = np.random.permutation(len(all_sequences))
all_sequences = all_sequences[shuffled_indices]
all_labels = all_labels[shuffled_indices]

train_size = int(len(all_sequences) * 0.9)

# split the sequences
train_sequences = all_sequences[:train_size]
train_labels = all_labels[:train_size]

other_sequences = all_sequences[train_size:]
other_labels = all_labels[train_size:]

shuffled_indices = np.random.permutation(len(other_sequences))

other_sequences = other_sequences[shuffled_indices]
other_labels = other_labels[shuffled_indices]

validation_size = int(len(other_sequences) * 0.5)

validation_sequences = other_sequences[:validation_size]
validation_labels = other_labels[:validation_size]

test_sequences = other_sequences[validation_size:]
test_labels = other_labels[validation_size:]

In [34]:
print(train_sequences[1])

tensor([[-2.7811e-01, -3.8744e-01, -6.4389e-01, -6.0222e-01,  2.6498e-01,
         -2.8519e+00, -2.8283e+00],
        [-1.4785e-01, -3.1204e-01, -2.9317e-01, -3.0522e-01,  3.1792e-02,
          1.3975e+00,  1.3922e+00],
        [-2.0680e-01, -1.7268e-01, -5.1679e-01, -5.5498e-01, -4.8027e-01,
         -6.2843e-01, -6.2566e-01],
        [-2.7717e-01,  2.5727e-03, -8.8759e-01, -9.5812e-01, -2.3933e-01,
         -7.5079e-01, -7.4823e-01],
        [-3.2778e-01,  2.2458e-01, -8.9002e-01, -9.6282e-01, -2.6540e-01,
         -5.3913e-01, -5.3832e-01],
        [-3.2562e-01,  3.8340e-01, -8.3577e-01, -8.8719e-01, -3.5218e-01,
          2.5833e-02,  2.4892e-02],
        [-2.6780e-01,  4.0867e-01, -7.3276e-01, -7.9934e-01, -3.2229e-01,
          6.2185e-01,  6.1963e-01],
        [-3.5401e-01,  5.0041e-01, -8.8187e-01, -1.0294e+00, -1.6229e-01,
         -9.2037e-01, -9.1819e-01],
        [-2.4625e-01,  4.8404e-01, -4.7255e-01, -5.9032e-01, -4.3589e-01,
          1.1566e+00,  1.1539e+00],
        [-

In [19]:

# Convert train_sequences and train_labels to tensors if they are not already
train_sequences = torch.tensor(train_sequences, dtype=torch.float32)  # Assuming sequences are floats
train_labels = torch.tensor(train_labels, dtype=torch.long)  # Assuming labels are integers

# Convert train_sequences and train_labels to tensors if they are not already
validation_sequences = torch.tensor(validation_sequences, dtype=torch.float32)  # Assuming sequences are floats
validation_labels = torch.tensor(validation_labels, dtype=torch.long)  # Assuming labels are integers

# Check the shapes to ensure they match
print("Train Sequences Shape:", train_sequences.shape)  # Should be (num_samples, sequence_length, feature_dim)
print("Train Labels Shape:", train_labels.shape)  # Should be (num_samples,)

print("Validation Sequence Shape:" , validation_sequences.shape)
print("Validation Labels:", validation_labels.shape)


Train Sequences Shape: torch.Size([7483, 24, 7])
Train Labels Shape: torch.Size([7483, 4])
Validation Sequence Shape: torch.Size([416, 24, 7])
Validation Labels: torch.Size([416, 4])


  train_sequences = torch.tensor(train_sequences, dtype=torch.float32)  # Assuming sequences are floats
  train_labels = torch.tensor(train_labels, dtype=torch.long)  # Assuming labels are integers


In [20]:
from torch.utils.data import DataLoader, TensorDataset

BATCH_SIZE = 64  # Same as in your Keras code
EPOCHS = 100

# Assuming train_sequences, train_labels, validation_sequences, validation_labels are PyTorch tensors
train_dataset = TensorDataset(train_sequences, train_labels)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)


val_dataset = TensorDataset(validation_sequences, validation_labels)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)



In [41]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import math


import torch
import torch.nn as nn

class TransformerEncoder(nn.Module):
    def __init__(self, input_dim, head_size, num_heads, ff_dim, dropout=0):
        super(TransformerEncoder, self).__init__()
        self.norm1 = nn.LayerNorm(input_dim)
        self.attention = nn.MultiheadAttention(embed_dim=head_size, num_heads=num_heads, dropout=dropout)
        self.norm2 = nn.LayerNorm(head_size)
        self.ff = nn.Sequential(
            nn.Linear(head_size, ff_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(ff_dim, head_size)
        )
        self.dropout = nn.Dropout(dropout)
        self.projection = nn.Linear(input_dim, head_size)  # New projection layer

    def forward(self, x):
        # Project input to the head_size dimension
        x = self.projection(x)  # Project to match head_size
        attn_output, _ = self.attention(x, x, x)
        x = x + self.dropout(attn_output)
        x = self.norm1(x)

        ff_output = self.ff(x)
        x = x + self.dropout(ff_output)
        return self.norm2(x)

# Adjust the build_transformer_model as well
class TransformerModel(nn.Module):
    def __init__(self, input_dim, head_size, num_heads, ff_dim, num_layers, dropout=0):
        super(TransformerModel, self).__init__()
        self.layers = nn.ModuleList([
            TransformerEncoder(input_dim, head_size, num_heads, ff_dim, dropout) for _ in range(num_layers)
        ])
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.norm = nn.LayerNorm(head_size)
        self.fc = nn.Linear(head_size, 1)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = x.transpose(1, 2)  # Transpose for pooling
        x = self.pool(x).squeeze(-1)  # Pooling to reduce dimensions
        x = self.norm(x)
        return self.fc(x)

# Assuming input_dim = 7, same as train_sequences.shape[-1]
input_dim = (24,7) # This matches your input data
head_size = 256
num_heads = 16
ff_dim = 1024
num_layers = 12
dropout = 0.2

model = TransformerModel(input_dim, head_size, num_heads, ff_dim, num_layers, dropout)
print(model)






TypeError: empty(): argument 'size' failed to unpack the object at pos 2 with error "type must be tuple of ints,but got tuple"

In [40]:
from torchinfo import summary

# For example, if you're using a model for a 1D sequence of length 1000 with 3 input channels
summary(model, input_size=(24,7))  # (channels, sequence_length)


RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [Linear: 3, MultiheadAttention: 3, Dropout: 3]

In [43]:
import tensorflow as tf

import tensorflow.python.keras as keras


In [45]:
from keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, Add, GlobalAveragePooling1D

In [46]:
from keras.callbacks import ModelCheckpoint

In [48]:
from keras.models import Model


In [49]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Add()([x, inputs])

    # Feed Forward Part
    y = LayerNormalization(epsilon=1e-6)(x)
    y = Dense(ff_dim, activation="relu")(y)
    y = Dropout(dropout)(y)
    y = Dense(inputs.shape[-1])(y)
    return Add()([y, x])

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs

    # Create multiple layers of the Transformer block
    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    # Final part of the model
    x = GlobalAveragePooling1D()(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    outputs = Dense(1, activation="linear")(x)

    # Compile model
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Model parameters
input_shape = train_sequences.shape[1:]
head_size = 256
num_heads = 16
ff_dim = 1024
num_layers = 12
dropout = 0.20

# Build the model
model = build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout)
model.summary()