# Code Breaker Playground

I suggest you hide the cells in this section, as they are boilerplate imports, initialization, and some convenience functions.

In [1]:
# Imports

import os
import logging

# Disable some chatty warnings from Tensorflow:
# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
os.environ["GRPC_VERBOSITY"] = "ERROR"
os.environ["GLOG_minloglevel"] = "2"

# Still trying to disable warnings. It's harder than you'd think!
logging.getLogger('tensorflow').disabled = True

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import random
import pandas as pd
import sys
import time

from credentials import CONNECTION_INFO
from constants import *

import encoders
import db_connect
import helpers
import tf_helpers
import models
import crackers

2024-12-02 20:16:00.749969: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-02 20:16:00.773952: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-02 20:16:00.779202: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
%%time
CAESAR_KEY_MODEL = models.load_model(models.CAESAR_KEY_MODEL_PATH)
CAESAR_TEXT_MODEL = models.load_model(models.CAESAR_TEXT_MODEL_PATH)
SUBSTITUTION_KEY_MODEL = None
SUBSTITUTION_TEXT_MODEL = None

CAESAR_CHUNK_SIZE = CAESAR_TEXT_MODEL.input_shape[2]
SUBSTITUTION_CHUNK_SIZE = None

CAESAR_SCALER = helpers.load_scaler_from_file(helpers.get_recommended_scaler_path(encoders.ENCODER_CAESAR, CAESAR_CHUNK_SIZE, temp=False))
SUBSTITUTION_SCALER = None

MAX_DISPLAY_LENGTH = 100
MAX_CAESAR_KEY = len(encoders.CHARSET)-1

CAESAR_CRACKER = crackers.Caesar_Cracker(CAESAR_SCALER, CAESAR_KEY_MODEL, CAESAR_TEXT_MODEL, verbose=0)

I0000 00:00:1733199365.653616  168107 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733199365.762191  168107 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733199365.762323  168107 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733199365.768831  168107 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1733199365.768987  168107 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

CPU times: user 1.05 s, sys: 699 ms, total: 1.75 s
Wall time: 2.86 s


In [3]:
# Return a string short enough to display, optionally with lines above and below if it is a multi-line string
SEPARATOR_LINE = "---------------------------------------------------------------------------"
def displayable(message: str, add_lines_to_multiline= True) -> str:
    if len(message) < MAX_DISPLAY_LENGTH:
        result = message
    else:    
        result = message[0 : MAX_DISPLAY_LENGTH-3] + "..."

    if ("\n" in result) and add_lines_to_multiline:
        # Multi-line string
        result = (SEPARATOR_LINE + "\n" + result + "\n" + SEPARATOR_LINE + "\n")

    return result

def ms_per_char(elapsed_seconds, characters: str) -> float:
    ms = elapsed_seconds * 1000
    char_count = len(characters)
    return float(ms) / float(char_count)

def work_magic(message: str, key: int):
    if (message is None) or (len(message) < 1):
        raise Exception("Message must be a non-empty string")
    if (key < 1 or key > MAX_CAESAR_KEY):
        raise Exceptions("Key out of bounds")

    # Simplify the string so it uses the right characterset
    simplified = encoders.encode_simple(message)
    if len(simplified) < 1:
        raise Exception("Message became an empty string after simplification. It must have been all special characters.")

    if len(simplified) < CAESAR_CHUNK_SIZE // 2:
        print(f"Your string is only {len(simplified)} characters after simplification.")
        print(f"The model has a hard time with short strings. But so do people!")
        print()    

    # Make the string long enough to be at least one chunk, and divisible by the chunk size
    padded = simplified
    while len(padded) < CAESAR_CHUNK_SIZE:
        padded = padded + simplified            
    plaintext = padded[0: len(padded) - len(padded) % CAESAR_CHUNK_SIZE]
    
    print("This is your original message:")
    print(displayable(message))
    print()
    print("This is the plaintext we will work with:")
    print(displayable(plaintext))
    print()

    ciphertext = encoders.encode_caesar(plaintext, key)
    print(f"After encrypting with key {key}, this is the ciphertext:")
    print(displayable(ciphertext))
    print()

    start_time = time.time()
    inferred_key = CAESAR_CRACKER.infer_key_with_model(ciphertext)
    good_key = key == inferred_key    
    elapsed_seconds = (time.time() - start_time)

    print(f'The model predicted a key of {inferred_key}, which is {"correct." if good_key else "INCORRECT!"}')
    print(f"Finding the key took {elapsed_seconds:.2f} seconds, or {ms_per_char(elapsed_seconds, ciphertext):.2f} ms per character.")

    start_time = time.time()
    inferred_text = CAESAR_CRACKER.infer_text_with_model(ciphertext)
    elapsed_seconds = (time.time() - start_time)

    print()
    print(f"The model predicted this text:")
    print(displayable(inferred_text))
    print(f"Finding the text took {elapsed_seconds:.2f} seconds, or {ms_per_char(elapsed_seconds, ciphertext):.2f} ms per character.")

    (good, bad, total, good_pct) = helpers.good_bad_string_match(plaintext, inferred_text)
    random_pct = float(1) / float(len(encoders.CHARSET))
    print(f"The model's string is {good_pct:.2%} right.")
    print(f"Random guessing would tend to be about {random_pct:.2%} right.")
    


# The Fun Part

In [5]:
MY_MESSAGE = """
Set this to whatever you like. The model does best with standard English text, and longer strings are easier
"""
MY_KEY = random.randint(1, MAX_CAESAR_KEY)

work_magic(MY_MESSAGE, MY_KEY)

This is your original message:
---------------------------------------------------------------------------

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut lab...
---------------------------------------------------------------------------


This is the plaintext we will work with:
LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT, SED DO EIUSMOD TEMPOR INCIDIDUNT UT LABO...

After encrypting with key 61, this is the ciphertext:
---------------------------------------------------------------------------
KNQDL?HORTL?CNKNQ?RHS?
LDS"?BNMRDBSDSTQ?
CHOHRBHMF?DKHS"?RDC?CN?DHTRLNC?SDLONQ?HMBHCHCTMS?TS?K
AN...
---------------------------------------------------------------------------


The model predicted a key of 61, which is correct.
Finding the key took 0.12 seconds, or 0.46 ms per character.

The model predicted this text:
FHKCIBJNOKDCHMIIKBIEO AMFT> LPOSDCSDRSP>?IGOGRBGLE?ILHS/ OED JO JHTRLND?REMPOR?KNCHDHDTMT?RS MAJN...
Finding the text 