# Decryption with Heuristics and Pattern Matching

In [116]:
import pandas as pd
import re
from collections import Counter

file_name = "countdown_24"
save = False

In [117]:
# read processed data
cyphertext_path = f"../data/processed/{file_name}.csv"
df = pd.read_csv(cyphertext_path)
num_artists = len(df)

# read reference artists
ref_artists_path = "../data/artists.txt"
with open(ref_artists_path, "r") as file:
    artists = file.read().splitlines()
artists = [artist.upper().strip() for artist in artists]
df.head()

Unnamed: 0,cyphertext
0,ABC DBEFG HAIEGHJA
1,GJKLMBFA NEOHPJFAHE QR


In [118]:
# heuritic patterns
DJ_SET_PATTERN = re.compile(r"\([A-Z0-9]{2}\s[A-Z0-9]{3}\)")  # DJ SET
SUNSET_PATTERN = re.compile(
    r"\(([A-Z0-9])([A-Z0-9])([A-Z0-9])(\1)([A-Z0-9])([A-Z0-9])\s(\1)(\5)(\6)\)"
)  # SUNSET SET
THROWBACK_PATTERN = re.compile(
    r"\(([A-Z0-9])([A-Z0-9]{8}\s[A-Z0-9]{2})(\1)\)"
)  # THROWBACK SET
B2B_PATTERN = re.compile(r"(\w+\s)([A-Z0-9])([A-Z0-9])(\2)(\s\w+)")  # B2B

In [119]:
# extract heuritic patterns
def extract_patterns(df, text):
    mappings = {}
    # DJ SET
    dj_set_matches = DJ_SET_PATTERN.findall(text)
    for match in dj_set_matches:
        cypher_dj_set = match[1:-1]
        plain_dj_set = "DJ SET"
        for c, p in zip(
            cypher_dj_set.replace(" ", ""), plain_dj_set.replace(" ", "")
        ):
            mappings[c] = p

        artist = text[:-8].strip()
        new_row = pd.DataFrame([{"cyphertext": artist}])
        df = pd.concat([df, new_row], ignore_index=True)

    # SUNSET SET
    sunset_matches = SUNSET_PATTERN.findall(text)
    for match in sunset_matches:
        cypher_sunset = match[:-3]
        plain_dj_set = "SUNSET"
        for c, p in zip(cypher_sunset, plain_dj_set):
            mappings[c] = p

        artist = text[:-13].strip()
        new_row = pd.DataFrame([{"cyphertext": artist}])
        df = pd.concat([df, new_row], ignore_index=True)

    # THROWBACK SET
    throwback_matches = THROWBACK_PATTERN.findall(text)
    for match in throwback_matches:
        match = "".join(match).replace(" ", "")
        cypher_throwback = match[:-1]
        for c, p in zip(cypher_throwback.replace(" ", ""), "THROWBACKSE"):
            mappings[c] = p
        artist = text[:-16].strip()
        new_row = pd.DataFrame([{"cyphertext": artist}])
        df = pd.concat([df, new_row], ignore_index=True)

    # B2B
    b2b_matches = B2B_PATTERN.findall(text)
    for match in b2b_matches:
        cypher_b2b = match[1] + match[2]
        plain_b2b = "B2"
        for c, p in zip(cypher_b2b, plain_b2b):
            mappings[c] = p

        # create new rows for b2b artists
        artists = [match[i].strip() for i in range(0, len(match), 4)]
        new_df = pd.DataFrame(artists, columns=["cyphertext"])
        df = pd.concat([df, new_df], ignore_index=True)

    return df, mappings

In [120]:
known_mappings = {
    "A": "N",
    "B": "E",
    "C": "W",
    "D": "Y",
    "E": "A",
    "F": "R",
    "G": "S",
    "H": "I",
    "I": "V",
    "J": "O",
    "K": "U",
    "L": "T",
    "M": "H",
    "N": "C",
    "O": "L",
    "P": "F",
    "Q": "1",
    "R": "8"
}

In [121]:
# initial cyphertext to plaintext mappings from heuristics
def init_mappings(df):
    mappings = known_mappings
    for artist in df["cyphertext"]:
        df, new_mappings = extract_patterns(df, artist)
        mappings.update(new_mappings)
    return df, mappings


df, mappings = init_mappings(df)
mappings

{'A': 'N',
 'B': 'E',
 'C': 'W',
 'D': 'Y',
 'E': 'A',
 'F': 'R',
 'G': 'S',
 'H': 'I',
 'I': 'V',
 'J': 'O',
 'K': 'U',
 'L': 'T',
 'M': 'H',
 'N': 'C',
 'O': 'L',
 'P': 'F',
 'Q': '1',
 'R': '8'}

In [122]:
# create initial plaintext
def create_plaintext(df):
    df["plaintext"] = df["cyphertext"].apply(
        lambda row: re.sub(r"[A-Za-z0-9]", "-", row)
    )
create_plaintext(df)
df.head()

Unnamed: 0,cyphertext,plaintext
0,ABC DBEFG HAIEGHJA,--- ----- --------
1,GJKLMBFA NEOHPJFAHE QR,-------- ---------- --


In [123]:
# update plaintext of row with new mappings
def replace_decoded(row):
    updated_decoded = []
    for ct, dc in zip(row["cyphertext"], row["plaintext"]):
        if ct in mappings:
            updated_decoded.append(mappings[ct])
        else:
            updated_decoded.append(dc)
    return "".join(updated_decoded)


# update plaintext with new mappings
def update_plaintext(df):
    df["plaintext"] = df.apply(lambda row: replace_decoded(row), axis=1)
update_plaintext(df)
df.head()

Unnamed: 0,cyphertext,plaintext
0,ABC DBEFG HAIEGHJA,NEW YEARS INVASION
1,GJKLMBFA NEOHPJFAHE QR,SOUTHERN CALIFORNIA 18


In [124]:
# match plaintext pattern with artist
def match_pattern(pattern, name):
    # check if pattern and name have the same length
    if len(pattern) != len(name):
        return False

    # check if pattern and name match
    for p_char, n_char in zip(pattern, name):
        if p_char == "-":
            if n_char in mappings.values() or n_char == " ":
                return False
        elif p_char == " " and n_char != " ":
            return False
        else:
            if p_char != n_char:
                return False
    return True


# match pattern with all artists
def match_artist(pattern):
    matches = [artist for artist in artists if match_pattern(pattern, artist)]
    return ", ".join(matches) if matches else None


# match all plaintext patterns with artists
def match_artists():
    df["matches"] = df["plaintext"].apply(match_artist)


match_artists()
df

Unnamed: 0,cyphertext,plaintext,matches
0,ABC DBEFG HAIEGHJA,NEW YEARS INVASION,
1,GJKLMBFA NEOHPJFAHE QR,SOUTHERN CALIFORNIA 18,


In [125]:
# set a row to be a specific artist and updates mappings
def update_row(row_num, artist):
    # set artist for row
    df.at[row_num, "plaintext"] = artist

    cyphertext = df.at[row_num, "cyphertext"]

    # update mappings
    for ct_char, pt_char in zip(cyphertext, artist):
        if (
            ct_char.isalnum()
            and ct_char not in mappings
            and pt_char not in mappings.values()
        ):
            mappings[ct_char] = pt_char
    df["plaintext"] = df.apply(replace_decoded, axis=1)

    # update matches
    match_artists()

In [126]:
# decrypt all artists in a dataframe
def decrypt(df):
    def match_and_update():
        for row_num, row in df.iterrows():
            if not row["matches"]:
                continue

            plaintext = row["plaintext"]
            matches = row["matches"].split(", ")
            blanks = plaintext.count("-")

            if len(matches) == 1 and blanks / len(plaintext) < 0.5:
                artist = matches[0]

                if artist not in df["plaintext"].tolist():
                    update_row(row_num, artist)
                    return True
        return False

    match_artists()
    while match_and_update():
        pass
    return df.drop(columns=["matches"])
df = decrypt(df).head(num_artists)
df

Unnamed: 0,cyphertext,plaintext
0,ABC DBEFG HAIEGHJA,NEW YEARS INVASION
1,GJKLMBFA NEOHPJFAHE QR,SOUTHERN CALIFORNIA 18


In [127]:
# sort mappings in alphabetical order
def sort_mappings(mappings):
    return dict(sorted(mappings.items(), key=lambda item: item[1]))

mappings = sort_mappings(mappings)
mappings

{'Q': '1',
 'R': '8',
 'E': 'A',
 'N': 'C',
 'B': 'E',
 'P': 'F',
 'M': 'H',
 'H': 'I',
 'O': 'L',
 'A': 'N',
 'J': 'O',
 'F': 'R',
 'G': 'S',
 'L': 'T',
 'K': 'U',
 'I': 'V',
 'C': 'W',
 'D': 'Y'}

In [128]:
# write final lineup
plaintext_path = f"../data/decoded_plaintext/{file_name}.txt"
if save:
    with open(plaintext_path, "w") as f:
        # write lineup
        for index, row in df.iterrows():
            f.write(f"{row['cyphertext']} -> {row['plaintext']}\n")

        f.write("\n")

        # write mappings
        for key, value in mappings.items():
            f.write(f"{key} -> {value}\n")