In [8]:
from keras.layers import InputLayer, Dense, Activation, Masking, Embedding, LSTM, Flatten, Bidirectional, GRU, SimpleRNN, RNN
from keras.models import Sequential
import numpy as np

In [11]:
model = Sequential()
input_layer = InputLayer(input_shape=(20,))
layer_1 = Embedding(input_dim=100, output_dim=512, input_length=20)
layer_2= SimpleRNN(4)
layer_3 = Dense(units = 256)
layer_4 = Dense(units = 128)
flatten = Flatten()
layer_5 = Dense(3, activation='softmax')

model.add(input_layer)
model.add(layer_1)
model.add(layer_2)
model.add(layer_3)
model.add(layer_4)
model.add(Flatten())
model.add(layer_5)

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy')

In [12]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 20, 512)           51200     
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 4)                 2068      
                                                                 
 dense_3 (Dense)             (None, 256)               1280      
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 flatten_3 (Flatten)         (None, 128)               0         
                                                                 
 dense_5 (Dense)             (None, 3)                 387       
                                                                 
Total params: 87,831
Trainable params: 87,831
Non-trai

In [4]:
input_shape = np.random.randint(0, 20, size=(1000, 20))
output_data = np.random.randint(0, 3, size=(1000,))

In [5]:
model.fit(input_shape, output_data)



<keras.callbacks.History at 0x2058eda9e20>

In [14]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Generate sample dataset
input_data = np.random.randint(0, 20, size=(1000, 20))
output_data = np.random.randint(0, 3, size=(1000,))

# Convert data to PyTorch tensors
x = torch.tensor(input_data, dtype=torch.float32)
y = torch.tensor(output_data, dtype=torch.long)

# Create DataLoader for batching
dataset = TensorDataset(x, y)
batch_size = 16
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Hyperparameters
input_size = 20
embedding_dim = 32
hidden_size = 50
output_size = 3  # 3 classes
learning_rate = 0.001
epochs = 10
weight_decay = 1e-5  # L2 regularization strength

# Define LSTM model with regularization for specific layers
class LSTMModel(nn.Module):
    def __init__(self, input_size, embedding_dim, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(input_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, output_size)

        # Apply L2 regularization to specific layers
        self.init_weights(self.fc1)
        self.init_weights(self.fc2)

        # Apply separate initialization for each layer
        self.init_weights(self.embedding)
        self.init_weights(self.rnn)
        self.init_weights(self.fc1)
        self.init_weights(self.fc2)

    def forward(self, x):
        out = self.embedding(x)
        out, _ = self.rnn(out)
        out = self.fc1(out[:, -1, :])
        out = self.relu(out)
        out = self.fc2(out)
        return out

    def init_weights(self, layer):
        if isinstance(layer, nn.Linear):
            nn.init.xavier_normal_(layer.weight)
            layer.bias.data.fill_(0.01)
        elif isinstance(layer, nn.Embedding):
            nn.init.xavier_normal_(layer.weight)
        elif isinstance(layer, nn.RNN):
            for name, param in layer.named_parameters():
                if 'weight' in name:
                    nn.init.xavier_normal_(param)
                elif 'bias' in name:
                    param.data.fill_(0.01)

# Create an instance of the model
model = LSTMModel(input_size, embedding_dim, hidden_size, output_size)

# Define loss function and optimizer with weight decay for regularization
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Training loop
for epoch in range(epochs):
    model.train()
    for inputs, labels in dataloader:
        # Forward pass
        outputs = model(inputs.long())

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Testing the model
model.eval()
with torch.no_grad():
    test_sequence = torch.randint(0, 20, (1, 20))  # Replace with your test sequence
    output = model(test_sequence.long())
    predicted_class = torch.argmax(output).item()
    print(f"Predicted class: {predicted_class}")


Epoch [1/10], Loss: 1.1279
Epoch [2/10], Loss: 1.0290
Epoch [3/10], Loss: 1.0444
Epoch [4/10], Loss: 1.1734
Epoch [5/10], Loss: 0.9746
Epoch [6/10], Loss: 0.8721
Epoch [7/10], Loss: 0.9003
Epoch [8/10], Loss: 0.9871
Epoch [9/10], Loss: 1.1772
Epoch [10/10], Loss: 0.9402
Predicted class: 2
