In [1]:
## Run this if privcomp is not installed
# !pip3 install -e ../.

In [1]:
from privcomp import OneTimePad, VigenereCipher, letter_count, Colours
import altair as alt
import pandas as pd

In [2]:
def plot_letter_count(title: str, text: str, x="letter", y="occurences", **kwargs) -> alt.Chart:
    text_counts = pd.DataFrame(letter_count(text), columns=["letter", "occurences"])
    return alt.Chart(text_counts).mark_bar().encode(
        x=alt.X(x, sort="-y"),
        y=y,
    ).properties(
        title={
            "text": title, 
            "subtitle": kwargs.get("subtitle", ""),
            "anchor": "start",
            "dy": -15,
            "fontSize": 15,
            "subtitleFontSize": 13
        }
    )

## 1984 Letter Distribution

In [3]:
with open("../data/1984.txt") as f:
    book = f.read()

book = VigenereCipher.clean_text(book)
book_counts = plot_letter_count("1984 Letter Distribution", book).configure_mark(color=Colours.red)
book_counts

## Letter Distribution with small key (size=1)

We saw with the `MonoAlphabeticCipherAttack` that you can re-construct part of the information from an encrypted message by counting letter frequencies and matching these to known letter distributions. So how can we protect against this? For this we can use `OneTimePad`. First off, lets look at the letter count of the original text, and compare this with the encrypted message when you have a short key

In [4]:
short_key_cipher = VigenereCipher(key="f")
encrypted = short_key_cipher.encrypt(book)

k1_counts = plot_letter_count("Small Key Letter Distribution", encrypted).configure_mark(color=Colours.blue)
k1_counts

It's easy to map our encrypted letters to the original letter by comparing the distributions. We know `e` is the most frequenct letter, therefore we know that our cipher has substituted `j` for `e`. Below we can see that if we look at a `j` in the encrypted string that it maps to `e` in our book text.

In [5]:
encrypted[4:5], book[4:5]

('j', 'e')

What happens when we use a large keysize?

In [9]:
key = OneTimePad.generate(size=len(book))
long_key_cipher = OneTimePad(key=key)
encrypted = long_key_cipher.encrypt(book)

klarge_counts = plot_letter_count(title="Large Key Letter Distribution", subtitle="Count of the encrypted text", text=encrypted).configure_mark(color=Colours.purple)
klarge_counts

In [7]:
plot_letter_count(title="Large Key Letter Distribution", subtitle="Count of Encryption Key itself", text=key).configure_mark(color=Colours.orange)

# OneTimePad is impractical beacause...
- The key has to be at least as long as the message one wants to transmit
- For perfect secrecy one has to use a new key every time.
- Alice and Bob have to make sure that they are the only ones that know the key. They cannot stablish a common key communicating through an insecure channel