## This notebook contains the code used for:
- 3.2 Vigenère Cipher and 3.4 Other Ciphers. The ciphertext for 3.2 (Vigenère Cipher) is commented out, and the ciphertext for 3.4 (Other Cipher) is uncommented.

In [1]:
import pandas as pd
import re
import numpy as np
from collections import Counter

def clean_text(text):
    """Removes non-alphabetic characters and converts to uppercase."""
    return re.sub(r'[^A-Z]', '', text.upper())

def indice_coincidencia(text):
    """
    Computes the Index of Coincidence (IC) for a given text.
    IC = (sum(f_i * (f_i - 1))) / (N * (N - 1))
    where f_i is the frequency of each letter.
    """
    text = clean_text(text)
    N = len(text)
    
    if N < 2:
        return 0.0

    # Ensure all 26 letters are counted
    freq = Counter(text)
    for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
        freq.setdefault(letter, 0)

    ic = sum(f * (f - 1) for f in freq.values()) / (N * (N - 1))
    return ic

def calcular_ic_por_m(text, max_m=8):
    """
    Computes the Index of Coincidence (IC) for different key lengths (m).
    Tries key lengths from 2 to max_m and returns a table of ICs.
    Ensures all columns have the same number of rows for DataFrame conversion.
    """
    text = clean_text(text)
    resultados = {m: [] for m in range(2, max_m + 1)}

    for m in range(2, max_m + 1):
        for i in range(m):
            subtexto = text[i::m]
            resultados[m].append(indice_coincidencia(subtexto))

    # Make all lists the same length by padding with NaN
    max_rows = max(len(lst) for lst in resultados.values())
    for m in resultados:
        while len(resultados[m]) < max_rows:
            resultados[m].append(np.nan)

    return pd.DataFrame(resultados)

# Example text
texto_cifrado = """BNVSNSIHQCEELSSKKYERIFJKXUMBGYKAMQLJTYAVFBKVTDVBPVVRJYYLAOKYMPQSCGDLFSRLLPROYGESEBUUALRWXMMASAZLGLEDFJBZAVVPXWICGJXASCBYEHOSNMULKCEAHTQOKMFLEBKFXLRRFDTZXCIWBJSICBGAWDVYDHAVFJXZIBKCGJIWEAHTTOEWTUHKRQVVRGZBXYIREMMASCSPBNLHJMBLRFFJELHWEYLWISTFVVYFJCMHYUYRUFSFMGESIGRLWALSWMNUHSIMYYITCCQPZSICEHBCCMZFEGVJYOCDEMMPGHVAAUMELCMOEHVLTIPSUYILVGFLMVWDVYDBTHFRAYISYSGKVSUUHYHGGCKTMBLRX"""
# texto_cifrado = """KCCPKBGUFDPHQTYAVINRRTMVGRKDNBVFDETDGILTXRGUDDKOTFMBPVGEGLTGCKQRACQCWDNAWCRXIZAKFTLEWRPTYCQKYVXCHKFTPONCQQRHJVA
# JUWETMCMSPKQDYHJVDAHCTRLSVSKCGCZQQDZXGSFRLSWCWSJTBHAFSIASPRJAHKJRJUMVGKMITZHFPDISPZLVLGWTFPLKKEBDPGCEBSHCTJRWXBAFSPEZQNRWXCVYCGA
# ONWDDKACKAWBBIKFTIOVKCGGHJVLNHIFFSQESVYCLACNVRWBBIREPBBVFEXOSCDYGZWPFDTKFQIYCWHJVLNHIQIBTKHJVNPIST"""
# Compute ICs and display as a table
df_ic = calcular_ic_por_m(texto_cifrado, max_m=8)

df_ic


Unnamed: 0,2,3,4,5,6,7,8
0,0.044333,0.044258,0.043468,0.045045,0.051203,0.039832,0.058279
1,0.046207,0.047863,0.056335,0.042523,0.061343,0.044724,0.055504
2,,0.048387,0.046517,0.03964,0.054997,0.044267,0.050879
3,,,0.04792,0.043317,0.070862,0.037736,0.046253
4,,,,0.037023,0.055526,0.046444,0.037928
5,,,,,0.069804,0.033382,0.064734
6,,,,,,0.046444,0.046377
7,,,,,,,0.050242


In [2]:
import math
from collections import defaultdict

def clean_text(text):
    """
    Devuelve el texto en mayúsculas, dejando solo las letras A-Z.
    """
    return ''.join(c for c in text.upper() if c.isalpha())

def find_trigrams(text):
    """
    Busca trigramas en el texto y devuelve un diccionario donde la clave es el trigrama
    y el valor es la lista de posiciones donde aparece.
    """
    positions = defaultdict(list)
    for i in range(len(text) - 2):
        trigram = text[i:i+3]
        positions[trigram].append(i)
    # Filtrar solo los trigramas que aparecen al menos dos veces.
    return {tg: pos for tg, pos in positions.items() if len(pos) >= 2}

def gcd_list(numbers):
    """
    Calcula el máximo común divisor (GCD) de una lista de enteros.
    """
    g = numbers[0]
    for num in numbers[1:]:
        g = math.gcd(g, num)
    return g

def most_repeated_trigram(ciphertext):
    """
    Busca el trigrama que más se repite en el texto (limpio) y para ese trigrama:
      - Devuelve la lista de posiciones,
      - Calcula las distancias entre apariciones consecutivas,
      - Calcula el GCD de esas distancias.
    """
    text = clean_text(ciphertext)
    trigrams = find_trigrams(text)
    
    if not trigrams:
        return None, None, None, None

    # Seleccionar el trigrama con mayor cantidad de apariciones.
    most_common = max(trigrams.items(), key=lambda item: len(item[1]))
    trigram, pos_list = most_common
    
    # Calcular las distancias entre apariciones consecutivas.
    distances = [pos_list[i+1] - pos_list[i] for i in range(len(pos_list) - 1)]
    
    # Calcular el GCD de las distancias, si hay al menos una distancia.
    gcd_val = gcd_list(distances) if distances else 0
    
    return trigram, pos_list, distances, gcd_val

if __name__ == "__main__":
    ciphertext= """BNVSNSIHQCEELSSKKYERIFJKXUMBGYKAMQLJTYAVFBKVTDVBPVVRJYYLAOKYMPQSCGDLFSRLLPROYGESEBUUALRWXMMASAZLGLEDFJBZ
    AVVPXWICGJXASCBYEHOSNMULKCEAHTQOKMFLEBKFXLRRFDTZXCIWBJSICBGAWDVYDHAVFJXZIBKCGJIWEAHTTOEWTUHKRQVVRGZBXYIREMMASCSPBNLHJMBL
    RFFJELHWEYLWISTFVVYFJCMHYUYRUFSFMGESIGRLWALSWMNUHSIMYYITCCQPZSICEHBCCMZFEGVJYOCDEMMPGHVAAUMELCMOEHVLTIPSUYILVGFLMVWDVYDB
    THFRAYISYSGKVSUUHYHGGCKTMBLRX"""
    # ciphertext = """KCCPKBGUFDPHQTYAVINRRTMVGRKDNBVFDETDGILTXRGUDDKOTFMBPVGEGLTGCKQRACQCWDNAWCRXIZAKFTLEWRPTYCQKYVXCHKFTPONCQQRHJVA
    # JUWETMCMSPKQDYHJVDAHCTRLSVSKCGCZQQDZXGSFRLSWCWSJTBHAFSIASPRJAHKJRJUMVGKMITZHFPDISPZLVLGWTFPLKKEBDPGCEBSHCTJRWXBAFSPEZQNRWXCVYCGA
    # ONWDDKACKAWBBIKFTIOVKCGGHJVLNHIFFSQESVYCLACNVRWBBIREPBBVFEXOSCDYGZWPFDTKFQIYCWHJVLNHIQIBTKHJVNPIST"""

    trigram, positions, distances, gcd_val = most_repeated_trigram(ciphertext)
    if trigram is not None:
        print("Trigrama más repetido:", trigram)
        print("Posiciones:", positions)
        print("Distancias entre apariciones:", distances)
        print("GCD de las distancias:", gcd_val)
    else:
        print("No se encontraron trigramas repetidos.")


Trigrama más repetido: AVF
Posiciones: [38, 170]
Distancias entre apariciones: [132]
GCD de las distancias: 132


In [3]:

english_freq = [
    0.0820,  # A
    0.0150,  # B
    0.0280,  # C
    0.0430,  # D
    0.1270,  # E
    0.0220,  # F
    0.0200,  # G
    0.0610,  # H
    0.0700,  # I
    0.0020,  # J
    0.0080,  # K
    0.0400,  # L
    0.0240,  # M
    0.0670,  # N
    0.0750,  # O
    0.0190,  # P
    0.0010,  # Q
    0.0600,  # R
    0.0630,  # S
    0.0910,  # T
    0.0280,  # U
    0.0100,  # V
    0.0230,  # W
    0.0010,  # X
    0.0200,  # Y
    0.0010   # Z
]


def subcadena_por_columna(texto, i, m):
    """
    Devuelve la subcadena y_i (las letras en posiciones i, i+m, i+2m, ...).
    Solo toma A..Z, en mayúsculas.
    """
    texto_limpio = ''.join(c for c in texto.upper() if 'A' <= c <= 'Z')
    return texto_limpio[i::m]


def frecuencias_letras(cadena):
    """
    Devuelve un array f de 26 elementos, donde f[j] es la cantidad
    de veces que aparece la letra (A=0, B=1, ..., Z=25).
    """
    f = [0]*26
    for c in cadena:
        indice = ord(c) - ord('A')
        f[indice] += 1
    return f


def calcula_Mg_subcadena(freq_sub, n_sub, english_freq):
    """
    Dada la frecuencia freq_sub (array de 26) y la longitud n_sub de la subcadena,
    calcula [M_0, M_1, ..., M_25] usando la fórmula:

        M_g = sum_{i=0..25} [ p_i * (freq_sub[(i+g) mod 26] / n_sub) ].

    Retorna una lista de 26 valores (M_0..M_25).
    """
    resultados = []
    for g in range(26):
        suma = 0.0
        for i in range(26):
            # índice desplazado (i + g) mod 26
            indice = (i + g) % 26
            # p_i * (f_{i+g} / n_sub)
            suma += english_freq[i] * (freq_sub[indice] / n_sub)
        resultados.append(suma)
    return resultados

# Find MG
def analizar_vigenere_correlacion(ciphertext, m):
    """
    Dado un texto cifrado 'ciphertext' y la longitud de la clave 'm',
    1) Extrae cada subcadena y_i (i=0..m-1).
    2) Calcula los 26 valores M_g para cada subcadena.
    3) Imprime la 'tabla' M_g (similar a Table 2.4).
    4) Elige el g que maximiza M_g como el desplazamiento k_i.
    5) Devuelve la secuencia de shifts y la 'keyword' correspondiente.
    """
    # Normalizamos texto
    texto_limpio = ''.join(c for c in ciphertext.upper() if 'A' <= c <= 'Z')
    clave_shifts = []
    
    print(f"Analizando texto con m = {m} (longitud de clave)")
    print("=======================================================================")
    
    for i in range(m):
        # 1) Subcadena y_i
        y_i = subcadena_por_columna(texto_limpio, i, m)
        n_sub = len(y_i)
        
        if n_sub == 0:
            # Evitar problemas si la subcadena está vacía
            clave_shifts.append(0)
            print(f"Subcadena i={i}: (vacía)")
            continue
        
        # 2) Frecuencias y lista M_g
        f_sub = frecuencias_letras(y_i)
        M_vals = calcula_Mg_subcadena(f_sub, n_sub, english_freq)
        
        # 3) Imprimir tabla (g, M_g)
        print(f"\nSubcadena i={i} (tamaño={n_sub}):")
        print("  g   M_g")
        for g in range(26):
            print(f" {g:2d}  {M_vals[g]:.4f}")
        
        # 4) Elegir el mejor g (máximo M_g)
        best_g = max(range(26), key=lambda g: M_vals[g])
        clave_shifts.append(best_g)
        
        print(f" -> Mejor g para i={i} es {best_g}, con M_g={M_vals[best_g]:.4f}")
    
    # Convertir shifts a letras
    keyword = ''.join(chr(s + ord('A')) for s in clave_shifts)
    
    print("\n=======================================================================")
    print("Shifts finales (k_i) =", clave_shifts)
    print("Keyword reconstruida =", keyword)
    
    return clave_shifts, keyword


if __name__ == "__main__":
    # Texto cifrado de ejemplo (pon aquí tu texto real)
    ciphertext= """BNVSNSIHQCEELSSKKYERIFJKXUMBGYKAMQLJTYAVFBKVTDVBPVVRJYYLAOKYMPQSCGDLFSRLLPROYGESEBUUALRWXMMASAZLGLEDFJBZ
    AVVPXWICGJXASCBYEHOSNMULKCEAHTQOKMFLEBKFXLRRFDTZXCIWBJSICBGAWDVYDHAVFJXZIBKCGJIWEAHTTOEWTUHKRQVVRGZBXYIREMMASCSPBNLHJMBL
    RFFJELHWEYLWISTFVVYFJCMHYUYRUFSFMGESIGRLWALSWMNUHSIMYYITCCQPZSICEHBCCMZFEGVJYOCDEMMPGHVAAUMELCMOEHVLTIPSUYILVGFLMVWDVYDB
    THFRAYISYSGKVSUUHYHGGCKTMBLRX"""
#     ciphertext = """
# KCCPKBGUFDPHQTYAVINRRTMVGRKDNBVFDETDGILTXRGUDDKOTFMBPVGEGLTGCKQRACQCWDNAWCRXIZAKFTLEWRPTYCQKYVXCHKFTPONCQQRHJVA
# JUWETMCMSPKQDYHJVDAHCTRLSVSKCGCZQQDZXGSFRLSWCWSJTBHAFSIASPRJAHKJRJUMVGKMITZHFPDISPZLVLGWTFPLKKEBDPGCEBSHCTJRWXBAFSPEZQNRWXCVYCGA
# ONWDDKACKAWBBIKFTIOVKCGGHJVLNHIFFSQESVYCLACNVRWBBIREPBBVFEXOSCDYGZWPFDTKFQIYCWHJVLNHIQIBTKHJVNPIST
#     """
    
    # Suponemos que ya determinaste que m=5 (por ejemplo)
    m = 6
    
    # Llamamos a la función que hace todo el análisis
    shifts, keyword = analizar_vigenere_correlacion(ciphertext, m)
    
    # Ahora 'shifts' contiene la lista de desplazamientos
    # y 'keyword' la palabra clave estimada.


Analizando texto con m = 6 (longitud de clave)

Subcadena i=0 (tamaño=63):
  g   M_g
  0  0.0407
  1  0.0359
  2  0.0334
  3  0.0343
  4  0.0456
  5  0.0371
  6  0.0397
  7  0.0348
  8  0.0465
  9  0.0345
 10  0.0365
 11  0.0357
 12  0.0443
 13  0.0355
 14  0.0280
 15  0.0408
 16  0.0427
 17  0.0338
 18  0.0350
 19  0.0610
 20  0.0410
 21  0.0326
 22  0.0337
 23  0.0455
 24  0.0343
 25  0.0377
 -> Mejor g para i=0 es 19, con M_g=0.0610

Subcadena i=1 (tamaño=62):
  g   M_g
  0  0.0408
  1  0.0349
  2  0.0327
  3  0.0439
  4  0.0373
  5  0.0366
  6  0.0368
  7  0.0681
  8  0.0369
  9  0.0326
 10  0.0284
 11  0.0468
 12  0.0307
 13  0.0392
 14  0.0366
 15  0.0358
 16  0.0337
 17  0.0393
 18  0.0479
 19  0.0397
 20  0.0454
 21  0.0335
 22  0.0423
 23  0.0361
 24  0.0355
 25  0.0295
 -> Mejor g para i=1 es 7, con M_g=0.0681

Subcadena i=2 (tamaño=62):
  g   M_g
  0  0.0529
  1  0.0355
  2  0.0310
  3  0.0379
  4  0.0611
  5  0.0362
  6  0.0304
  7  0.0369
  8  0.0458
  9  0.0283
 10  0.033

In [4]:
def vigenere_decrypt(ciphertext, key):
    """
    Decrypts the given ciphertext using the Vigenère cipher with the provided key.
    Both ciphertext and key should consist of uppercase alphabetic characters.
    """
    plaintext = ""
    key_length = len(key)
    
    # Process each character in the ciphertext.
    for i, char in enumerate(ciphertext):
        if char.isalpha():
            # Calculate the shift for the current key letter.
            shift = ord(key[i % key_length]) - ord('A')
            # Decrypt the character: subtract the shift modulo 26.
            decrypted_char = chr((ord(char) - ord('A') - shift) % 26 + ord('A'))
            plaintext += decrypted_char
        else:
            # Non-alphabetic characters are added unchanged.
            plaintext += char
    return plaintext

# Provided ciphertext (remove any newlines or spaces)
# ciphertext = """
# KCCPKBGUFDPHQTYAVINRRTMVGRKDNBVFDETDGILTXRGUD
# DKOTFMBPVGEGLTGCKQRACQCWDNAWCRXIZAKFTLEWRPTYC
# QKYVXCHKFTPONCQQRHJVAJUWETMCMSPKQDYHJVDAHCTRL
# SVSKCGCZQQDZXGSFRLSWCWSJTBHAFSIASPRJAHKJRJUMV
# GKMITZHFPDISPZLVLGWTFPLKKEBDPGCEBSHCTJRWXBAFS
# PEZQNRWXCVYCGAONWDDKACKAWBBIKFTIOVKCGGHJVLNHI
# FFSQESVYCLACNVRWBBIREPBBVFEXOSCDYGZWPFDTKFQIY
# CWHJVLNHIQIBTKHJVNPIST
# """
ciphertext= """BNVSNSIHQCEELSSKKYERIFJKXUMBGYKAMQLJTYAVFBKVTDVBPVVRJYYLAOKYMPQSCGDLFSRLLPROYGESEBUUALRWXMMASAZLGLEDFJBZ
    AVVPXWICGJXASCBYEHOSNMULKCEAHTQOKMFLEBKFXLRRFDTZXCIWBJSICBGAWDVYDHAVFJXZIBKCGJIWEAHTTOEWTUHKRQVVRGZBXYIREMMASCSPBNLHJMBL
    RFFJELHWEYLWISTFVVYFJCMHYUYRUFSFMGESIGRLWALSWMNUHSIMYYITCCQPZSICEHBCCMZFEGVJYOCDEMMPGHVAAUMELCMOEHVLTIPSUYILVGFLMVWDVYDB
    THFRAYISYSGKVSUUHYHGGCKTMBLRX"""

# Clean the ciphertext by removing spaces and newlines
ciphertext = ciphertext.replace("\n", "").replace(" ", "")

# Given key
key = "THEORY"

# Decrypt the ciphertext
plaintext = vigenere_decrypt(ciphertext, key)
print("Decrypted Plaintext:")
print(plaintext)


Decrypted Plaintext:
IGREWUPAMONGSLOWTALKERSMENINPARTICULARWHODROPPEDWORDSAFEWATATIMELIKEBEANSINAHILLANDWHENIGOTTOMINNEAPOLISWHEREPEOPLETOOKALAKEWOBEGONCOMMATOMEANTHEENDOFASTORYICOULDNTSPEAKAWHOLESENTENCEINCOMPANYANDWASCONSIDEREDNOTTOOBRIGHTSOIENROLLEDINASPEECHCOURSETAUGHTBYORVILLESANDTHEFOUNDEROFREFLEXIVERELAXOLOGYASELFHYPNOTICTECHNIQUETHATENABLEDAPERSONTOSPEAKUPTOTHREEHUNDREDWORDSPERMINUTE
