In [1]:
import numpy as np
import tensorflow as tf

# Example task sequences (vulnerable sequences)
task_sequences = [
    "task1 task2 task3",
    "task3 task1 task2",
    "task2 task3 task4",
    "task1 task4 task3"
]

# Non-vulnerable versions of the sequences (corrected sequences)
non_vulnerable_sequences = [
    "task1 task2 task3",
    "task1 task2 task3",
    "task2 task3 task4",
    "task1 task2 task3"
]

# Tokenize sequences
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(task_sequences + non_vulnerable_sequences)
vocab_size = len(tokenizer.word_index) + 1

# Convert sequences to integer sequences
vulnerable_sequences = tokenizer.texts_to_sequences(task_sequences)
non_vulnerable_sequences = tokenizer.texts_to_sequences(non_vulnerable_sequences)

# Pad sequences to make them of equal length
max_sequence_length = max(len(seq) for seq in vulnerable_sequences)
vulnerable_sequences = tf.keras.preprocessing.sequence.pad_sequences(vulnerable_sequences, padding='post', maxlen=max_sequence_length)
non_vulnerable_sequences = tf.keras.preprocessing.sequence.pad_sequences(non_vulnerable_sequences, padding='post', maxlen=max_sequence_length)

# Print some examples
print("Vulnerable Sequences:", vulnerable_sequences)
print("Non-vulnerable Sequences:", non_vulnerable_sequences)


Vulnerable Sequences: [[3 2 1]
 [1 3 2]
 [2 1 4]
 [3 4 1]]
Non-vulnerable Sequences: [[3 2 1]
 [3 2 1]
 [2 1 4]
 [3 2 1]]


In [2]:
from tensorflow.keras import layers, models

def create_autoencoder(input_dim, timesteps, latent_dim=64):
    # Encoder
    input_seq = layers.Input(shape=(timesteps,))
    x = layers.Embedding(input_dim=input_dim, output_dim=64)(input_seq)
    x = layers.LSTM(latent_dim, return_sequences=False)(x)
    
    # Decoder
    x = layers.RepeatVector(timesteps)(x)
    x = layers.LSTM(latent_dim, return_sequences=True)(x)
    output_seq = layers.TimeDistributed(layers.Dense(input_dim, activation='softmax'))(x)
    
    # Autoencoder model
    autoencoder = models.Model(input_seq, output_seq)
    autoencoder.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    return autoencoder

# Create and compile the Autoencoder model
autoencoder = create_autoencoder(vocab_size, max_sequence_length)

# Print model summary
autoencoder.summary()


In [3]:
# Train the Autoencoder model
autoencoder.fit(vulnerable_sequences, np.expand_dims(non_vulnerable_sequences, -1),
                epochs=10, batch_size=4)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 1.6100
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 1.6067
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 1.6034
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 1.6000
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 1.5963
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 1.5924
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 1.5881
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 1.5833
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 1.5779
Epoch 10/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 1.5720


<keras.src.callbacks.history.History at 0x1a760b1d6a0>

In [4]:
def create_rnn(input_dim, timesteps, latent_dim=64):
    # Input layer
    input_seq = layers.Input(shape=(timesteps,))
    x = layers.Embedding(input_dim=input_dim, output_dim=64)(input_seq)
    
    # RNN layers for sequence prediction
    x = layers.LSTM(latent_dim, return_sequences=True)(x)
    x = layers.LSTM(latent_dim, return_sequences=True)(x)
    
    # Output layer
    output_seq = layers.TimeDistributed(layers.Dense(input_dim, activation='softmax'))(x)
    
    # RNN model
    rnn_model = models.Model(input_seq, output_seq)
    rnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    return rnn_model

# Create and compile the RNN model
rnn_model = create_rnn(vocab_size, max_sequence_length)

# Print model summary
rnn_model.summary()

In [5]:
# Train the RNN model using vulnerable sequences and non-vulnerable sequences
rnn_model.fit(vulnerable_sequences, np.expand_dims(non_vulnerable_sequences, -1),
              epochs=10, batch_size=4)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 1.6090
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 1.6058
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 1.6026
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 1.5992
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 1.5956
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 1.5917
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 1.5875
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 1.5830
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 1.5780
Epoch 10/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 1.5726


<keras.src.callbacks.history.History at 0x1a7635fe720>

In [6]:
# Test with a new vulnerable sequence
test_sequence = ["task3 task1 task2"]
test_sequence = tokenizer.texts_to_sequences(test_sequence)
test_sequence = tf.keras.preprocessing.sequence.pad_sequences(test_sequence, padding='post', maxlen=max_sequence_length)

# Predict using the trained RNN model
predicted_sequence = rnn_model.predict(test_sequence)

# Decode the predicted sequence (get the task words)
predicted_sequence = np.argmax(predicted_sequence, axis=-1)

# Convert predicted sequence back to words
predicted_words = tokenizer.sequences_to_texts(predicted_sequence)

# Output the predicted non-vulnerable sequence
print("Predicted corrected sequence:", predicted_words[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Predicted corrected sequence: task3 task3 task3
