# Decryption with Heuristics and Machine Learning

In [303]:
import pandas as pd
import re
from collections import Counter

# read processed data
cyphertext_path = "../data/processed/ezoo_23.csv"
df = pd.read_csv(cyphertext_path)
num_artists = len(df)

# read reference artists
ref_artists_path = "../data/artists.txt"
with open(ref_artists_path, "r") as file:
    artists = file.read().splitlines()
df.head()

Unnamed: 0,cyphertext
0,IFQYO9PNY 62PG
1,I2W30O S0OFYN2IOF
2,I92WYO3
3,G2IOEY
4,9TY 6TIWO3D0EYN3 (3PO3Y9 3Y9)


In [304]:
# heuritic patterns
DJ_SET_PATTERN = re.compile(r"\([A-Z0-9]{2} [A-Z0-9]{3}\)")  # DJ SET
SUNSET_PATTERN = re.compile(
    r"\(([A-Z0-9])([A-Z0-9])([A-Z0-9])(\1)([A-Z0-9])([A-Z0-9]) (\1)(\5)(\6)\)"
)
B2B_PATTERN = re.compile(r"(\w+\s)([A-Z0-9])([A-Z0-9])(\2)(\s\w+)")  # B2B


# extract heuritic patterns
def extract_patterns(df, text):
    mappings = {}
    # DJ SET
    dj_set_matches = DJ_SET_PATTERN.findall(text)
    for match in dj_set_matches:
        cypher_dj_set = match[1:-1]
        plain_dj_set = "DJ SET"
        for c, p in zip(
            cypher_dj_set.replace(" ", ""), plain_dj_set.replace(" ", "")
        ):
            mappings[c] = p

        artist = text[:-8].strip()
        new_row = pd.DataFrame([{"cyphertext": artist}])
        df = pd.concat([df, new_row], ignore_index=True)

    # SUNSET SET
    sunset_matches = SUNSET_PATTERN.findall(text)
    for match in sunset_matches:
        cypher_sunset = match[:-3]
        plain_dj_set = "SUNSET"
        for c, p in zip(cypher_sunset, plain_dj_set):
            mappings[c] = p

        artist = text[:-13].strip()
        new_row = pd.DataFrame([{"cyphertext": artist}])
        df = pd.concat([df, new_row], ignore_index=True)

    # B2B
    b2b_matches = B2B_PATTERN.findall(text)
    for match in b2b_matches:
        cypher_b2b = match[1] + match[2]
        plain_b2b = "B2"
        for c, p in zip(cypher_b2b, plain_b2b):
            mappings[c] = p

        # create new rows for b2b artists
        artists = [match[i].strip() for i in range(0, len(match), 4)]
        new_df = pd.DataFrame(artists, columns=["cyphertext"])
        df = pd.concat([df, new_df], ignore_index=True)

    return df, mappings

In [305]:
# initial cyphertext to plaintext mappings (D, J, S, E, T, B, 2)
def init_mappings(df):
    mappings = {}
    for artist in df["cyphertext"]:
        df, new_mappings = extract_patterns(df, artist)
        mappings.update(new_mappings)
    return df, mappings

df, mappings = init_mappings(df)
mappings

{'3': 'S', 'P': 'U', 'O': 'N', 'Y': 'E', '9': 'T', 'G': 'B', '4': '2'}

In [306]:
# create initial plaintext
def create_plaintext(df):
    df["plaintext"] = df["cyphertext"].apply(
        lambda row: re.sub(r"[A-Za-z0-9]", "-", row)
    )
create_plaintext(df)
df.head()

Unnamed: 0,cyphertext,plaintext
0,IFQYO9PNY 62PG,--------- ----
1,I2W30O S0OFYN2IOF,------ ----------
2,I92WYO3,-------
3,G2IOEY,------
4,9TY 6TIWO3D0EYN3 (3PO3Y9 3Y9),--- ------------ (------ ---)


In [307]:
# update plaintext of row with new mappings
def replace_decoded(row):
    updated_decoded = []
    for ct, dc in zip(row["cyphertext"], row["plaintext"]):
        if ct in mappings:
            updated_decoded.append(mappings[ct])
        else:
            updated_decoded.append(dc)
    return "".join(updated_decoded)


# update plaintext with new mappings
def update_plaintext(df):
    df["plaintext"] = df.apply(lambda row: replace_decoded(row), axis=1)
update_plaintext(df)
df.head()

Unnamed: 0,cyphertext,plaintext
0,IFQYO9PNY 62PG,---ENTU-E --UB
1,I2W30O S0OFYN2IOF,---S-N --N-E---N-
2,I92WYO3,-T--ENS
3,G2IOEY,B--N-E
4,9TY 6TIWO3D0EYN3 (3PO3Y9 3Y9),T-E ----NS---E-S (SUNSET SET)


In [308]:
# match plaintext pattern with artist
def match_pattern(pattern, name):
    # check if pattern and name have the same length
    if len(pattern) != len(name):
        return False

    # check if pattern and name match
    for p_char, n_char in zip(pattern, name):
        if p_char == "-":
            if n_char in mappings.values() or n_char == " ":
                return False
        elif p_char == " " and n_char != " ":
            return False
        else:
            if p_char != n_char:
                return False
    return True


# match pattern with all artists
def match_artist(pattern):
    matches = [artist for artist in artists if match_pattern(pattern, artist)]
    return ", ".join(matches) if matches else None


# match all plaintext patterns with artists
def match_artists():
    df["matches"] = df["plaintext"].apply(match_artist)


match_artists()
df

Unnamed: 0,cyphertext,plaintext,matches
0,IFQYO9PNY 62PG,---ENTU-E --UB,ADVENTURE CLUB
1,I2W30O S0OFYN2IOF,---S-N --N-E---N-,ALISON WONDERLAND
2,I92WYO3,-T--ENS,ATLIENS
3,G2IOEY,B--N-E,BLANKE
4,9TY 6TIWO3D0EYN3 (3PO3Y9 3Y9),T-E ----NS---E-S (SUNSET SET),
5,YR6W3W0O,E---S--N,EXCISION
6,7I2IO9W3,----NT-S,GALANTIS
7,7TYO7IN,--EN---,GHENGAR
8,7NW8,----,"GRIZ, KYGO"
9,7NMKKWO (3PO3Y9 3Y9),------N (SUNSET SET),


In [309]:
# set a row to be a specific artist and updates mappings
def update_row(row_num, artist):
    # set artist for row
    df.at[row_num, "plaintext"] = artist

    cyphertext = df.at[row_num, "cyphertext"]

    # update mappings
    for ct_char, pt_char in zip(cyphertext, artist):
        if (
            ct_char.isalnum()
            and ct_char not in mappings
            and pt_char not in mappings.values()
        ):
            mappings[ct_char] = pt_char
    df["plaintext"] = df.apply(replace_decoded, axis=1)

    # update matches
    match_artists()

In [310]:
# decrypt all artists in a dataframe
def decrypt(df):
    mapping_change = True
    while mapping_change:
        mapping_change = False
        for row_num, row in df.iterrows():
            if not row["matches"]:
                continue

            plaintext = row["plaintext"]
            matches = row["matches"].split(", ")
            blanks = sum(1 for char in plaintext if char == "-")
            if len(matches) == 1 and blanks / len(plaintext) < 0.5:
                artist = matches[0]
                if artist in df["plaintext"].tolist():
                    continue

                update_row(row_num, artist)
                mapping_change = True
                break
    df = df.head(num_artists).drop(columns=["matches"])
    return df
df = decrypt(df)
df

Unnamed: 0,cyphertext,plaintext
0,IFQYO9PNY 62PG,ADVENTURE CLUB
1,I2W30O S0OFYN2IOF,ALISON WONDERLAND
2,I92WYO3,ATLIENS
3,G2IOEY,BLANKE
4,9TY 6TIWO3D0EYN3 (3PO3Y9 3Y9),THE CHAINSMOKERS (SUNSET SET)
5,YR6W3W0O,EXCISION
6,7I2IO9W3,GALANTIS
7,7TYO7IN,GHENGAR
8,7NW8,GRIZ
9,7NMKKWO (3PO3Y9 3Y9),GRYFFIN (SUNSET SET)


In [311]:
# sort mappings in alphabetical order
def sort_mappings(mappings):
    return dict(sorted(mappings.items(), key=lambda item: item[1]))

mappings = sort_mappings(mappings)
mappings

{'4': '2',
 'I': 'A',
 'G': 'B',
 '6': 'C',
 'F': 'D',
 'Y': 'E',
 'K': 'F',
 '7': 'G',
 'T': 'H',
 'W': 'I',
 'V': 'J',
 'E': 'K',
 '2': 'L',
 'D': 'M',
 'O': 'N',
 '0': 'O',
 'L': 'P',
 'N': 'R',
 '3': 'S',
 '9': 'T',
 'P': 'U',
 'Q': 'V',
 'S': 'W',
 'R': 'X',
 'M': 'Y',
 '8': 'Z'}

In [312]:
# write final lineup
# plaintext_path = "../data/decoded_plaintext/countdown_23.txt"
# with open(plaintext_path, 'w') as f:
#     # write lineup
#     for index, row in df.iterrows():
#         f.write(f"{row['cyphertext']} -> {row['plaintext']}\n")
    
#     f.write("\n")
    
#     # write mappings
#     for key, value in mappings.items():
#         f.write(f"{key} -> {value}\n")