In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# encrypted key
c = np.array(list("""VVHQWVVRHMUSGJGTHKIHTSSEJCHLSFCBGVWCRLRYQTFSVGAHWKCUHWAUGLQHNSLRLJSHBLTSPISPRDXLJSVEEGHLQWKASSKUWEPWQTWVSPGOELKCQYFNSVWLJSNIQKGNRGYBWLWGOVIOKHKAZKQKXZGYHCECMEIUJOQKWFWVEFQHKIJRCLRLKBIENQFRJLJSDHGRHLSFQTWLAUQRHWDMWLGUSGIKKFLRYVCWVSPGPMLKASSJVOQXEGGVEYGGZMLJCXXLJSVPAIVWIKVRDRYGFRJLJSLVEGGVEYGGEIAPUUISFPBTGNWWMUCZRVTWGLRWUGUMNCZVILE"""))
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

def create_df_freq(c):
    # returns the frequency of each letter in the crypted message
    unique, count = np.unique(c, return_counts=True)
    counts = dict(zip(unique, count))
    for k in counts.keys():
        counts[k] = [counts[k]]
    return pd.DataFrame(data=counts)

create_df_freq(c)

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,...,Q,R,S,T,U,V,W,X,Y,Z
0,8,5,12,4,15,10,27,16,13,14,...,14,17,24,8,12,22,22,5,8,5


In [3]:
def shift(c):
    # returns a dataframe where the i-th line is shifted by i
    df_shifted = pd.DataFrame(data=c).T
    for shift in range(1, len(c)):
        shifted_c = np.append(c[shift:], c[:shift])
        df_append = pd.DataFrame(data=shifted_c).T
        df_shifted = df_shifted.append(df_append, ignore_index=True)
    return df_shifted
df_shifted = shift(c)

In [4]:
df_shifted

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,321,322,323,324,325,326,327,328,329,330
0,V,V,H,Q,W,V,V,R,H,M,...,G,U,M,N,C,Z,V,I,L,E
1,V,H,Q,W,V,V,R,H,M,U,...,U,M,N,C,Z,V,I,L,E,V
2,H,Q,W,V,V,R,H,M,U,S,...,M,N,C,Z,V,I,L,E,V,V
3,Q,W,V,V,R,H,M,U,S,G,...,N,C,Z,V,I,L,E,V,V,H
4,W,V,V,R,H,M,U,S,G,J,...,C,Z,V,I,L,E,V,V,H,Q
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,Z,V,I,L,E,V,V,H,Q,W,...,G,L,R,W,U,G,U,M,N,C
327,V,I,L,E,V,V,H,Q,W,V,...,L,R,W,U,G,U,M,N,C,Z
328,I,L,E,V,V,H,Q,W,V,V,...,R,W,U,G,U,M,N,C,Z,V
329,L,E,V,V,H,Q,W,V,V,R,...,W,U,G,U,M,N,C,Z,V,I


In [5]:
def get_coinc(df_shifted):
    # returns a dataframe where the i-th column contains the index of coincidence after a shift of i
    df_coinc = pd.DataFrame()
    for i in range(1, len(df_shifted)):
        df_coinc[i] = [np.sum(df_shifted.iloc[0] == df_shifted.iloc[i])]
    return df_coinc
df_coinc = get_coinc(df_shifted)
df_coinc

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,321,322,323,324,325,326,327,328,329,330
0,14,14,16,15,25,12,13,13,8,15,...,15,8,13,13,12,25,15,16,14,14


In [6]:
fig = make_subplots(rows=1, cols=2, subplot_titles=["Number of shifts per index of coincidence", "Index of coincidence per shift"])

fig.add_trace(go.Histogram(x=df_coinc.iloc[0]), row=1, col=1)
fig.add_trace(go.Bar(x=df_coinc.columns,y=df_coinc.iloc[0]), row=1, col=2)

fig.update_layout(height=400, width=1200)

In [7]:
# this code gets the indices of the shifts with maximum coincidence
# (to be completed)
argmax = np.array(np.argsort(df_coinc.iloc[0])[::-1])
df_coinc.iloc[0][argmax[:10]+1]

261    30
70     30
276    27
55     27
281    26
50     26
15     26
316    26
195    25
30     25
Name: 0, dtype: int64

In [8]:
def create_df_freq_letter():
    freq_eng = ['0.082', '0.015', '0.028', '0.043', '0.127', '0.022', '0.020', '0.061', '0.070', '0.002', '0.008', '0.040', '0.024', '0.067', '0.075', '0.019', '0.001', '0.060', '0.060', '0.091', '0.028', '0.010', '0.023', '0.001', '0.020', '0.001']
    freq_fra = ['0.0815', '0.0097', '0.0315', '0.0373', '0.1739', '0.0112', '0.0097', '0.0085', '0.0731', '0.0045', '0.0002', '0.0569', '0.0287', '0.0712', '0.0528', '0.0280', '0.0121', '0.0664', '0.0814', '0.0722', '0.0638', '0.0164', '0.0003', '0.0041', '0.0028', '0.0015']
    dict_freq_letter = {"freq_eng" : freq_eng, "freq_fra" : freq_fra}
    df_freq_letter = pd.DataFrame(data=dict_freq_letter, index=list(alphabet))
    return df_freq_letter
create_df_freq_letter()

Unnamed: 0,freq_eng,freq_fra
A,0.082,0.0815
B,0.015,0.0097
C,0.028,0.0315
D,0.043,0.0373
E,0.127,0.1739
F,0.022,0.0112
G,0.02,0.0097
H,0.061,0.0085
I,0.07,0.0731
J,0.002,0.0045
