# Beat the Enigma Machine by AI Hack - 1st Place Solution
    By: Mouafak Dakhlaoui

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import os

In [2]:
print(f'Pandas version: {pd.__version__}')
print(f'Numpy version: {np.__version__}')
print(f'Tensorflow version: {tf.__version__}')

Pandas version: 1.3.5
Numpy version: 1.23.2
Tensorflow version: 2.9.1


In [3]:
# The path to the folder containing train.csv and test.csv
DATA_DIR = 'data'

# Create a folder for submissions
SUBMISSIONS_DIR = 'submissions'
if not os.path.isdir(SUBMISSIONS_DIR):
    os.mkdir(SUBMISSIONS_DIR)
    
# Set seed
SEED = 42

tf.keras.utils.set_random_seed(SEED)

In [4]:
# Read training and testing data
train_data = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
test_data = pd.read_csv(os.path.join(DATA_DIR, 'test.csv'))


print(f'Training data shape: {train_data.shape}')
print(f'Testing data shape: {test_data.shape}')

Training data shape: (56189, 4)
Testing data shape: (2495, 3)


In [5]:
def load_data(data):
    """
        A function that takes the training dataset as input and returns
        our three model inputs
    """
    plain_text = list(data['plain_text'].values)
    encryption_key = list(data['encryption_key'].values)
    encrypted_text = list(data['encrypted_text'].values)
    return encrypted_text, encryption_key, plain_text

def train_test_split(data, validation_split=0.1):
    encryption, key, decryption = load_data(train_data)
    n_val = int(len(encryption) * validation_split)
    return encryption[:-n_val], key[:-n_val], decryption[:-n_val], encryption[-n_val:], key[-n_val:], decryption[-n_val:]

In [6]:
validation_split = 0.01
train_encryption, train_key, train_decryption, val_encryption, val_key, val_decryption = train_test_split(train_data)

In [7]:
# Create training and validation tf datasets.
BUFFER_SIZE = len(train_encryption)
BATCH_SIZE = 64

train_dataset = tf.data.Dataset.from_tensor_slices((train_encryption, train_key, train_decryption)).shuffle(BUFFER_SIZE)
train_dataset = train_dataset.batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((val_encryption, val_key, val_decryption))
val_dataset = val_dataset.batch(BATCH_SIZE)

In [8]:
# Use keras' TextVectorization layer to transform the plain (decrypted) text, the key and the encrypted text
# into tokens.
vocabulary = [chr(index) for index in range(ord('A'), ord('Z') + 1)]
output_sequence_length = int(max(train_data['encrypted_text'].map(len).max(), train_data['plain_text'].map(len).max()))

encryption_text_processor = tf.keras.layers.TextVectorization(
    standardize=None,
    split='character',
    output_sequence_length=output_sequence_length,
    vocabulary=vocabulary)

key_text_processor = tf.keras.layers.TextVectorization(
    standardize=None,
    split='character',
    output_sequence_length=3,
    vocabulary=vocabulary
)

decryption_text_processor = tf.keras.layers.TextVectorization(
    standardize=None,
    split='character',
    output_sequence_length=output_sequence_length,
    vocabulary=vocabulary)

In [9]:
AUTOTUNE = tf.data.AUTOTUNE

# Transform string to tokens.
train_ds = train_dataset.map(lambda encryption, key, decryption: ((encryption_text_processor(encryption), key_text_processor(key)), decryption_text_processor(decryption)))
val_ds = val_dataset.map(lambda encryption, key, decryption: ((encryption_text_processor(encryption), key_text_processor(key)), decryption_text_processor(decryption)))

# Optimize the datasets for performance.
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)

In [10]:
# Create a model

encryption_embedding_dim = 128
key_embedding_dim = 128

key_units = 256

decryption_units1 = 256
decryption_units2 = 128


dense_units1 = 128
dense_units2 = 64
dense_units3 = 32



def get_model():
    encrypted_input = tf.keras.layers.Input((output_sequence_length))
    key_input = tf.keras.layers.Input((3))
    
    embedded_encryption = tf.keras.layers.Embedding(encryption_text_processor.vocabulary_size()-1,
                                                   encryption_embedding_dim,
                                                   mask_zero=True)(encrypted_input)
    
    embedded_key = tf.keras.layers.Embedding(3,
                                            key_embedding_dim)(key_input)
    
    key1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(key_units))(embedded_key)
    key2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(key_units))(embedded_key)
    
    mapped_key1 = tf.keras.layers.Dense(decryption_units1, activation='relu')(key1)
    mapped_key2 = tf.keras.layers.Dense(decryption_units1, activation='relu')(key2)
    
    mapped_key3 = tf.keras.layers.Dense(decryption_units2, activation='relu')(mapped_key1)
    mapped_key4 = tf.keras.layers.Dense(decryption_units2, activation='relu')(mapped_key2)
    
    decryption1 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(decryption_units1, return_sequences=True))(embedded_encryption, initial_state=[mapped_key1, mapped_key2])
    decryption2 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(decryption_units2, return_sequences=True))(decryption1, initial_state=[mapped_key3, mapped_key4])
    
    pre_output1 = tf.keras.layers.Dense(dense_units1, activation='relu')(decryption2)
    pre_output2 = tf.keras.layers.Dense(dense_units2, activation='relu')(pre_output1)
    pre_output3 = tf.keras.layers.Dense(dense_units3, activation='relu')(pre_output2)
    
    output = tf.keras.layers.Dense(decryption_text_processor.vocabulary_size())(pre_output3)
    
    model = tf.keras.Model([encrypted_input, key_input], output)
    
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                                         reduction='none'),
                 optimizer='adam')
    
    return model


def get_final_model(model):
    """
        A function that takes a model as input and adds a sigmoid activation at the output layer
    """
    encrypted_input = tf.keras.layers.Input((output_sequence_length))
    key_input = tf.keras.layers.Input((3))
    
    model_output = model([encrypted_input, key_input])
    
    activation_output = tf.keras.layers.Activation('sigmoid')(model_output)
    
    final_model = tf.keras.Model([encrypted_input, key_input], activation_output)
    
    final_model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False, reduction='none'),
                       optimizer='adam')
    
    return final_model
    

    
def load_test_data(data):
    encryption_key = list(data['encryption_key'].values)
    encrypted_text = list(data['encrypted_text'].values)
    return encrypted_text, encryption_key

def get_test_ds(data):
    encrypted_text, encryption_key = load_test_data(data)
    test_dataset = tf.data.Dataset.from_tensor_slices((encrypted_text, encryption_key))
    test_dataset = test_dataset.batch(BATCH_SIZE)
    test_ds = test_dataset.map(lambda encryption, key: ((encryption_text_processor(encryption), key_text_processor(key)),))
    return test_ds


def get_submission(model, test_ds):
    """
        A function that takes a model and the test dataset as parameters and returns a submission
    """
    prediction = model.predict(test_ds)
    columns = ['ID']
    columns.extend([f'label_{chr(key)}' for key in range(ord('A'), ord('Z') + 1)])
    sub = pd.DataFrame(columns=columns)
    for index, text, pred in tqdm(zip(test_data['ID'].values, test_data['encrypted_text'], prediction)):
        for idx, letter_dist in zip(range(len(text)), pred):

            values_dict = {f'label_{chr(key)}':value for key, value in zip(range(ord('A'), ord('Z') + 1), letter_dist[2:])}
            values_dict['ID'] = f'{index}_{idx}'
            sub = sub.append(values_dict, ignore_index=True)
    return sub

test_ds = get_test_ds(test_data)

In [11]:
# Train the model and use early stopping
model = get_model()
model.summary()
es_callback = tf.keras.callbacks.EarlyStopping(patience=5)

epochs = 500
model.fit(train_ds.map(lambda x, y: (x, tf.one_hot(y, 28))),
          epochs=epochs,
          validation_data=val_ds.map(lambda x, y: (x, tf.one_hot(y, 28))),
         callbacks=[es_callback])

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 3)]          0           []                               
                                                                                                  
 embedding_1 (Embedding)        (None, 3, 128)       384         ['input_2[0][0]']                
                                                                                                  
 input_1 (InputLayer)           [(None, 135)]        0           []                               
                                                                                                  
 bidirectional (Bidirectional)  (None, 512)          788480      ['embedding_1[0][0]']            
                                                                                              

Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500


<keras.callbacks.History at 0x20714a75400>

In [12]:
# Used early stopping patience=10 and restore_best_weights=True to squeeze more performance out of the model
es_callback = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

epochs = 500
model.fit(train_ds.map(lambda x, y: (x, tf.one_hot(y, 28))),
          epochs=epochs,
          validation_data=val_ds.map(lambda x, y: (x, tf.one_hot(y, 28))),
         callbacks=[es_callback])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500


<keras.callbacks.History at 0x2077fa12df0>

In [13]:
# Get the final model (with the sigmoid activation at the output layer)
final_model = get_final_model(model)
final_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 135)]        0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 3)]          0           []                               
                                                                                                  
 model (Functional)             (None, 135, 28)      3039356     ['input_3[0][0]',                
                                                                  'input_4[0][0]']                
                                                                                                  
 activation (Activation)        (None, 135, 28)      0           ['model[0][0]']            

In [14]:
# Make a submission
sub = get_submission(final_model, test_ds)
sub.to_csv(os.path.join(SUBMISSIONS_DIR, 'model5(0.0066).csv'), index=False)



2495it [03:50, 10.84it/s]
