In [23]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer, Input, Dense, Bidirectional, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np

# Generate random data
input_shape = np.random.randint(0, 20, size=(1000, 64))
output_data = np.random.randint(0, 3, size=(1000,))

# One-hot encode the output data
output_data_one_hot = to_categorical(output_data, num_classes=3)

# Split the data into training and validation sets
input_train, input_val, output_train, output_val = train_test_split(input_shape, output_data_one_hot, test_size=0.2, random_state=42)

# Custom SelfAttention layer
class SelfAttention(Layer):
    def __init__(self, attention_units, **kwargs):
        self.attention_units = attention_units
        super(SelfAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W_q = self.add_weight(name="W_q", shape=(input_shape[-1], self.attention_units), initializer="glorot_uniform", trainable=True)
        self.W_k = self.add_weight(name="W_k", shape=(input_shape[-1], self.attention_units), initializer="glorot_uniform", trainable=True)
        self.W_v = self.add_weight(name="W_v", shape=(input_shape[-1], input_shape[-1]), initializer="glorot_uniform", trainable=True)
        super(SelfAttention, self).build(input_shape)

    def call(self, inputs):
        Q = tf.matmul(inputs, self.W_q)
        K = tf.matmul(inputs, self.W_k)
        V = tf.matmul(inputs, self.W_v)
        attention_scores = tf.matmul(Q, K, transpose_b=True)
        attention_scores = tf.math.divide(attention_scores, tf.sqrt(tf.cast(tf.shape(K)[-1], tf.float32)))
        attention_weights = tf.nn.softmax(attention_scores, axis=-1)
        attended_values = tf.matmul(attention_weights, V)
        return attended_values

    def compute_output_shape(self, input_shape):
        return input_shape

# Model architecture
input_sequence = Input(shape=(64,))
attention_output = SelfAttention(32)(input_sequence)

# Expand the dimensions to add a time dimension
attention_output_expanded = tf.expand_dims(attention_output, axis=1)

# Apply Bidirectional LSTM to the expanded attention output
lstm_output = Bidirectional(LSTM(64, return_sequences=True))(attention_output_expanded)

# Squeeze the time dimension
lstm_output_squeezed = tf.squeeze(lstm_output, axis=1)

output = Dense(3, activation='softmax')(lstm_output_squeezed)

# Create the model
model = Model(inputs=input_sequence, outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(input_train, output_train, validation_data=(input_val, output_val), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1beb183fee0>

In [24]:
model.summary()

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 64)]              0         
                                                                 
 self_attention_7 (SelfAtten  (None, 64)               8192      
 tion)                                                           
                                                                 
 tf.expand_dims_2 (TFOpLambd  (None, 1, 64)            0         
 a)                                                              
                                                                 
 bidirectional_10 (Bidirecti  (None, 1, 128)           66048     
 onal)                                                           
                                                                 
 tf.compat.v1.squeeze_2 (TFO  (None, 128)              0         
 pLambda)                                                  

In [11]:
data[0]

array([[0.17660614, 0.83233492, 0.19437931],
       [0.32713838, 0.41190329, 0.81737711],
       [0.56497652, 0.66910481, 0.15248696],
       [0.92067507, 0.81342356, 0.79177651],
       [0.01094447, 0.0255699 , 0.22837341],
       [0.11355345, 0.00990296, 0.07672833],
       [0.19579977, 0.22639979, 0.74433458],
       [0.71469106, 0.23003615, 0.63695422],
       [0.35846147, 0.3626864 , 0.16582222],
       [0.75333162, 0.65145193, 0.99180525]])

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Bidirectional, Dense, Attention, MultiHeadAttention

# Generate random data
input_shape = np.random.randint(0, 20, size=(1000, 64))
output_data = np.random.randint(0, 3, size=(1000,))

# Define the model
input_layer = Input(shape=(64,))

# Embedding layer
embedding_layer = Embedding(input_dim=20, output_dim=50)(input_layer)  # Assuming your vocabulary size is 20

# Bidirectional LSTM layer
#bidirectional_lstm = Bidirectional(LSTM(64, return_sequences=True))(embedding_layer)

# Attention layer
attention = Attention(use_scale=False, score_mode="dot")([embedding_layer, embedding_layer])

# Multiply attention output with bidirectional LSTM output
attended_output = tf.keras.layers.Multiply()([embedding_layer, attention])

# LSTM layer
lstm_layer = LSTM(32)(attended_output)

# Dense layer
output_layer = Dense(3, activation='softmax')(lstm_layer)  # Assuming you have 3 classes for classification

# Create the model
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

# Train the model (replace 'output_data' with your actual target data)
model.fit(input_shape, output_data, epochs=10, batch_size=32, validation_split=0.2)


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 64)]         0           []                               
                                                                                                  
 embedding_1 (Embedding)        (None, 64, 50)       1000        ['input_2[0][0]']                
                                                                                                  
 attention_1 (Attention)        (None, 64, 50)       0           ['embedding_1[0][0]',            
                                                                  'embedding_1[0][0]']            
                                                                                                  
 multiply_1 (Multiply)          (None, 64, 50)       0           ['embedding_1[0][0]',        

<keras.callbacks.History at 0x1b8ba428af0>

In [2]:
import numpy as np

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Generate random data
input_shape = torch.tensor(np.random.randint(0, 20, size=(1000, 64)), dtype=torch.float32)
output_data = torch.tensor(np.random.randint(0, 3, size=(1000,)), dtype=torch.long)

# One-hot encode the output data
output_data_one_hot = torch.nn.functional.one_hot(output_data)

# Split the data into training and validation sets
input_train, input_val, output_train, output_val = train_test_split(input_shape, output_data_one_hot, test_size=0.2, random_state=42)

# Convert data to PyTorch DataLoader
train_dataset = TensorDataset(input_train, output_train)
val_dataset = TensorDataset(input_val, output_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define the model
class AttentionModel(nn.Module):
    def __init__(self, input_dim, output_dim, attention_dim):
        super(AttentionModel, self).__init__()

        self.lstm = nn.LSTM(input_dim, 64, batch_first=True, bidirectional=True)
        self.attention = SelfAttention(128, attention_dim)
        self.fc = nn.Linear(128, output_dim)

    def forward(self, x):
        lstm_output, _ = self.lstm(x)
        attention_output = self.attention(lstm_output)
        output = self.fc(attention_output[:, -1, :])
        return output

# Define the SelfAttention module
class SelfAttention(nn.Module):
    def __init__(self, input_dim, attention_dim):
        super(SelfAttention, self).__init__()

        self.W_q = nn.Linear(input_dim, attention_dim)
        self.W_k = nn.Linear(input_dim, attention_dim)
        self.W_v = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        Q = self.W_q(x)
        K = self.W_k(x)
        V = self.W_v(x)

        attention_scores = torch.matmul(Q, K.transpose(1, 2))
        attention_scores = attention_scores / torch.sqrt(torch.tensor(attention_scores.shape[-1], dtype=torch.float32))

        attention_weights = nn.functional.softmax(attention_scores, dim=-1)
        attended_values = torch.matmul(attention_weights, V)

        return attended_values

# Instantiate the model
input_dim = 64
output_dim = 3
attention_dim = 32

model = AttentionModel(input_dim, output_dim, attention_dim)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, torch.argmax(labels, dim=1))
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, torch.argmax(labels, dim=1)).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == torch.argmax(labels, dim=1)).sum().item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Accuracy: {(correct/total)*100:.2f}%')


IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [None]:
nn.MultiheadAttention