In [279]:
from pathlib import Path
from collections import Counter

In [280]:
alphabet = 'abcdefghijklmnopqrstuvwxyz'

cipher_texts_dir = Path("../textos-cifrados-20230312")
test_english = "../test/cipher_text_english.txt"
test_portuguese = "../test/texto_cifrado_portugues.txt"

# portuguese 5 most frequent letters
pt_fmfl = "aeors"

# english 5 most frequent letters
en_fmfl = "aeiot"

In [281]:
def index_of_coincidence(word):
    coincidence_index = 0
    n = len(word)
    occurrences = list(Counter(word).values())
    for occurrence in occurrences:
        coincidence_index += occurrence / n * (occurrence - 1) / (n - 1)
    return round(coincidence_index, 3)

In [282]:
def find_key_length(cipher_text):
    print("| m | Ic")
    print("|-------")

    key_length = 1
    while key_length < (len(cipher_text) / 2):
        print(f"| {key_length} |", end=" ")
        
        substrings = [cipher_text[i::key_length] for i in range(key_length)]
        indexes = [index_of_coincidence(substring) for substring in substrings]
        print(indexes)

        diff = round(abs(sum(indexes) / len(indexes) - 0.067), 3)
        if 0 <= diff <= 0.002:
            break

        diff = round(abs(sum(indexes) / len(indexes) - 0.081), 3)
        if 0 <= diff <= 0.005:
            break

        key_length += 1
    print()
    
    return key_length

In [283]:
def generate_possible_keys(cipher_text, key_length):
    substrings = [cipher_text[i::key_length] for i in range(key_length)]
    occurrences = [[key for key, _ in Counter(substring).most_common()] for substring in substrings]
    possible_keys = ["".join([occurrence[index] for occurrence in occurrences if index < len(occurrence)]) for index in range(len(alphabet))]
    return possible_keys[:5]

In [284]:
def decrypt(cipher_text, possible_keys):
    plaintext = ""
    for key in possible_keys:
        for index in range(0, len(cipher_text), len(key)):
            chunk = cipher_text[index : index + len(key)]
            for char, dist in zip(chunk, key):
                a = ord(char) - ord(dist)
                plaintext += alphabet[a]

        most_freq_chars = "".join(sorted([key for key, _ in Counter(plaintext).most_common(5)]))

        if most_freq_chars in [pt_fmfl, en_fmfl]:
            print(key, "\n")
            break
         
        plaintext = ""
            
    return plaintext

In [302]:
def decrypt2(cipher_text, possible_keys, key_length):
    chunks = [cipher_text[index : index + key_length] for index in range(0, len(cipher_text), key_length)]
    
    plaintext = ""
    for key in possible_keys:
        for chunk in chunks:
            distances = [ord(char) - ord(dist) for char, dist in zip(chunk, key)]
            plaintext += "".join([alphabet[distance] for distance in distances])
    
        fmfl = "".join(sorted([key for key, _ in Counter(plaintext).most_common(5)]))

        if fmfl in [pt_fmfl, en_fmfl]:
            print(key, "\n")
            break
         
        plaintext = ""
            
    return plaintext

In [303]:
with open(test_english, "r") as file:
    cipher_text = file.read()

key_length = find_key_length(cipher_text)

possible_keys = generate_possible_keys(cipher_text, key_length)
print(possible_keys)
print()

plaintext = decrypt2(cipher_text, possible_keys, key_length)
print("plaintext:", plaintext[:50])

with open(f"../out/{test_english[8:]}", "w") as file:
    file.write(plaintext)


| m | Ic
|-------
| 1 | [0.042]
| 2 | [0.042, 0.042]
| 3 | [0.042, 0.042, 0.042]
| 4 | [0.042, 0.042, 0.042, 0.042]
| 5 | [0.042, 0.042, 0.042, 0.042, 0.042]
| 6 | [0.042, 0.042, 0.042, 0.043, 0.042, 0.042]
| 7 | [0.067, 0.067, 0.067, 0.067, 0.067, 0.067, 0.067]

['qiyrsqi', 'fxnghfx', 'meunome', 'asibcas', 'umcvwum']

meunome 

plaintext: thisebookisfortheuseofanyoneanywhereatnocostandwit


In [304]:
with open(test_portuguese, "r") as file:
    cipher_text = file.read()

key_length = find_key_length(cipher_text)

possible_keys = generate_possible_keys(cipher_text, key_length)
print(possible_keys)
print()

plaintext = decrypt(cipher_text, ["meunome"])
print("plaintext:", plaintext[:50])

with open(f"../out/{test_portuguese[8:]}", "w") as file:
    file.write(plaintext)

| m | Ic
|-------
| 1 | [0.046]
| 2 | [0.045, 0.046]
| 3 | [0.046, 0.046, 0.046]
| 4 | [0.045, 0.046, 0.046, 0.047]
| 5 | [0.046, 0.046, 0.046, 0.046, 0.046]
| 6 | [0.045, 0.046, 0.045, 0.047, 0.045, 0.046]
| 7 | [0.077, 0.074, 0.077, 0.076, 0.077, 0.076, 0.077]

['miyrsqe', 'qeunomi', 'asibcas', 'ewlfgew', 'uvmewuv']

meunome 

plaintext: quemhacincoentaannostivesseacoragemdepublicarumliv


In [287]:
for cipher_text_file in cipher_texts_dir.iterdir():
  print(cipher_text_file.name, "\n")
  with open(cipher_text_file, "r") as file:
    cipher_text = file.read()

  key_length = find_key_length(cipher_text)

  possible_keys = generate_possible_keys(cipher_text, key_length)
  print(possible_keys)
  print()

  plaintext = decrypt(cipher_text, possible_keys)
  print("plaintext:", plaintext[:50])
  print()

  with open(f"../out/{cipher_text_file.name}", "w") as file:
    file.write(plaintext)


cipher1.txt 

| m | Ic
|-------
| 1 | [0.042]
| 2 | [0.045, 0.047]
| 3 | [0.042, 0.042, 0.042]
| 4 | [0.058, 0.058, 0.059, 0.054]
| 5 | [0.042, 0.042, 0.042, 0.042, 0.042]
| 6 | [0.045, 0.048, 0.045, 0.047, 0.045, 0.047]
| 7 | [0.042, 0.042, 0.042, 0.042, 0.042, 0.042, 0.042]
| 8 | [0.081, 0.081, 0.081, 0.081, 0.082, 0.081, 0.081, 0.082]

['gvmwxmer', 'cristian', 'qfwghwob', 'ujaklasf', 'tizjkzre']

cristian 

plaintext: bibliasagradatraducaojoaoferreiradealmeidaedicaore

cipher10.txt 

| m | Ic
|-------
| 1 | [0.043]
| 2 | [0.055, 0.048]
| 3 | [0.059, 0.065, 0.056]
| 4 | [0.055, 0.048, 0.055, 0.048]
| 5 | [0.043, 0.043, 0.043, 0.043, 0.043]
| 6 | [0.081, 0.082, 0.081, 0.081, 0.081, 0.081]

['verkip', 'rangel', 'fobusz', 'jsfywd', 'irexvc']

rangel 

plaintext: bibliasagradatraducaojoaoferreiradealmeidaedicaore

cipher11.txt 

| m | Ic
|-------
| 1 | [0.043]
| 2 | [0.043, 0.043]
| 3 | [0.05, 0.068, 0.052]
| 4 | [0.043, 0.043, 0.043, 0.043]
| 5 | [0.043, 0.043, 0.043, 0.043, 0.043]
| 6 