In [1]:
ciphertext = "OBRGXIMYAZZAWCATBNMUYYHAZNVGFCXPVVSIJSVLKIFAVGBIECAZSBWGRGRQWUCHMMOCYEFLGQQNKFSHQMGYALNKCIJQVEKVWXNFOYFYQBESGOYTXMAYTXSISNBPMSGOJBKFWRUTTMLSBNQMLLRGFNZUAWHZLBRVZGHUVZMCKJEHSLSWGXCNZYEXRIMLPXRIXNUXRNSNRPHFDHBMAYWKHTKNGNUXRNJUVGMYNYEYNLYYGPGYFSBNQQWUCHMMSLRWTFDYQRNOJUEWNLVUZIDHWXLHTLKNEXMALBRQGUMMGXCUFXLHTLLLRTROJYFIDHLIGADLUBVXENSCALVCDFFIQCFAHISILUXXZXNUAMZAWISRNOJYKMQYECGRSBWHAHLUFBBPDPWLJBRYOCYEAYSVYXSISPRKSNZYPHMMWKHXMWWMGAZNEOFMDHKORMGOKNUHTAZQRAZPWBRTQXGZFMTJAXUTRNWCAPZLUFRODLFYFLGUKHRODLTYRGRYWHNLRIUCNMDXOCGAKIFAQXKUQMVGZFDBVLSIJSGADLWCFGNCFMGTMWWISTBIMHGKXBSPVGFVWHRYHNWXSKNGHLBENHYYQPZLXUEXNHDSBGDQZIXGNQKNUXCCKUFMQIMMRYEYUNFHEUDIAZVUJWNGQYSFVSDNZYFNOLWGRBLJGLGTMWWISKZJAXVMXCFVEBMAAHTBSNGUPENMWCGBRIFFLHMYOBBBRNZIEHTAZFLTBKMUVGSYVQVMGNZYROHFKISPZLOBBVZHLBBKNOYBYRTHVYELSUFXGADJJISBSUTFRPZSGZPTQLQCAZHNGHGADMCCYEEODARGDLSFQHDMFIGKZCKYNLDWGHQEDPQHRBSBWLNKDBAMFNOJDSJTFIFMYHZXWXZHQYLBNGSQAWRHMWWQNKHMVYPEZLWXUXVCDFAHSQSMGXOLWWVHTMLCZXHHOUVMHHYZBKQYAHSHQWWGRGSMFIEPHFDBRMTLFBVLZLESOTBEXIEYQYKBFNOJDCRLAOLWEHRMWMGADYFYZRRZJIAMHYJQVMGIMNQXKUQNUXUUDORHENAGRMGULCFUDCFANEHNLFRTGYSXBYXIMLBIOIFYAMGUKWBNMNWXSHQGGLRMGUFYVMGYJHHFDLAWNEROHYEBNLANLHQNZYABBYKNPTKWMFNMHIFMJBSBJYTTQXLIPHLGAMFTQCSN"


In [2]:
MIN_LENGTH = 3
MAX_LENGTH = 10


def FreqSubstring(ciphertext):
    size = len(ciphertext)
    freq = dict()

    for i in range(size):
        string = ''
        for j in range(i, size):
            string += ciphertext[j]
            if len(string) < MIN_LENGTH or len(string) > MAX_LENGTH:
                continue
            if string in freq.keys():
                freq[string] += 1
            else:
                freq[string] = 1

    freq = sorted(freq.items(), key=lambda d: d[1], reverse=True)
    return freq[0:5]


FreqSubstring(ciphertext)


[('NZY', 5), ('GAD', 5), ('NUX', 4), ('NOJ', 4), ('MWW', 4)]

Most frequence substring: NZY 5 times, GAD 5 times


In [3]:
def SubLocation(string, substring):
    locations = []
    begin = 0
    while(True):
        loc = string.find(substring, begin)
        if(loc == -1):
            break
        locations.append(loc)
        begin = loc+1
    return locations


print(SubLocation(ciphertext, "NZY"))
print(SubLocation(ciphertext, "GAD"))


[179, 413, 659, 749, 1151]
[320, 542, 788, 818, 1022]


NZY's location in original string is [179, 413, 659, 749, 1151]

GAD's location in original string is [320, 542, 788, 818, 1022]


In [4]:
def LocationDiff(locations):
    diff = []
    for i in range(len(locations)-1):
        diff.append(locations[i+1]-locations[i])
    return diff


print(LocationDiff([179, 413, 659, 749, 1151]))
print(LocationDiff([320, 542, 788, 818, 1022]))


[234, 246, 90, 402]
[222, 246, 30, 204]


The difference of each NZY's location is [234, 246, 90, 402]

The difference of each GAD's location is [222, 246, 30, 204]


In [5]:
def FreqFactor(diffs):
    factors = dict()

    for diff in diffs:
        for factor in range(2, diff + 1):
            if diff % factor == 0:
                if factor in factors.keys():
                    factors[factor] += 1
                else:
                    factors[factor] = 1

    factors = sorted(factors.items(), key=lambda d: d[1], reverse=True)
    return factors[0:5]


FreqFactor([234, 246, 90, 402, 222, 246, 30, 204])


[(2, 8), (3, 8), (6, 8), (9, 2), (18, 2)]

The most frequent factor is 2--8times, 3--8times, 6--8times.

2 and 3 are 6's factor. Therefore, I try the length of the key is 6 first.


In [6]:
def DevideString(string, piece):
    pieces = ["" for i in range(piece)]
    for i in range(len(string)):
        mod = i % piece
        pieces[mod] += string[i]

    return pieces


In [7]:
def CaesarCipherDecode(cipher, keys):
    cipher = cipher.upper()
    keys = keys.upper()
    plains = ""
    for i in range(len(cipher)):
        key = keys[i % len(keys)]
        offset = ord(key)-ord("A")
        plainASCII = ord(cipher[i]) - offset
        if plainASCII < ord("A"):
            plainASCII += 26
        plains += chr(plainASCII)
    return plains


In [8]:
def StringFrequency(string):
    plains = [[] for i in range(26)]
    switcher = {"A": .08167, "B": .01492, "C": .02782, "D": .04253, "E": .12702, "F": .02228, "G": .02015, "H": .06094, "I": .06966, "J": .00153, "K": .00772, "L": .04025, "M": .02406,
                "N": .06749, "O": .07507, "P": .01929, "Q": .00095, "R": .05987, "S": .06327, "T": .09056, "U": .02758, "V": .00978, "W": .02360, "X": .00150, "Y": .01974, "Z": .00074}

    for i in range(26):
        plain = CaesarCipherDecode(string, chr(ord("A")+i))
        frequency = 0
        for char in plain:
            frequency += switcher.get(char)
        plains[i].append(plain)
        plains[i].append(chr(ord("A")+i))
        plains[i].append(frequency/len(string))

    plains = sorted(plains, key=lambda d: d[2], reverse=True)

    return plains[0:3]


In [9]:
devidedStrings = DevideString(ciphertext, 6)
for devidedString in devidedStrings:
    print(StringFrequency(devidedString)[0]
          )


['WUEUHFRNMEEWOAIRENOIAOETTHTPSAHUFALEORVGNEALRDESTUNTRTCALNTVERMENEWAAHEEMSSHEOIECNSBECWNYLRENEUAEEPGFLHSSUCLRAHEOEINIOENWHHUDHSWTGDNRBOYOKLANSOYEILNETIEDELAETWGIENLNMFSLTENRRUYLITNTAUNSEONRIPIHSUNRTIA', 'S', 0.06650510000000004]
['HEIATVYGIMAIWNRWDEUETUXYRAHAPCERTTNQTAEEYAREAADTHSDREOHILGAAOEINHRIBVEQSUUTWHFDILENENIIGSHYISONVNDREAYOTASTOCLEMRODBNAILHOLAWEOHHHEDOLFINIGLOENNRSYSDHCWEDLSCIAFNMOHHYOHICSEOWTTUMIGLDRECDRENCETETLSEOST', 'U', 0.06340685000000003]
['ENNLIIIINEPLDDAIADLGOWHOENEIRTRCHEOUHIRTOPEDRMYRETYESTINSUKNFXTNOWLLECUTSEHEESHNESEEAATDIITSTFTIEFODROKHSESNAISETFIRGCTYORYIIEFOOLRTFECNTLEDTADEASWLMAEACHNTIMIOFEREIBRSERTMNIDHEESAEOONOFEIUARYNCAWGCS', 'N', 0.06741628140703525]
['NGAFNCSNGNOLUTRLMIAEWIAUNDCGOEEEEWTAENFWUODYDPOEXEOAPHESPPETYTYOWILEROEHTTOHATAWVSVNSTHENSHNAARNFRLWENNETFOGNZUIARTIILIOIOANTVWCRYSHIWAGHLNORSLIRUAOONORLEOEOETRONWTSAFUSOHYTTEEOTBLAFPTUOTTMLIOIATIAOA', 'T', 0.06432180904522622]
['YACIGTLCTSNGLHDLPFYTNLURGIANTDSSSIBLSNOHRNUOOEUNHDUSEEFWRTAAORTMESBTTNNAEHUA

In total, the key word is "SUNTZU"


In [10]:
CaesarCipherDecode(ciphertext, "SUNTZU")


'WHENYOUENGAGEINACTUALFIGHTINGIFVICTORYISLONGINCOMINGTHEMENSWEAPONSWILLGROWDULLANDTHEIRARDORWILLBEDAMPENEDIFYOULAYSIEGETOATOWNYOUWILLEXHAUSTYOURSTRENGTHANDIFTHECAMPAIGNISPROTRACTEDTHERESOURCESOFTHESTATEWILLNOTBEEQUALTOTHESTRAINNEVERFORGETWHENYOURWEAPONSAREDULLEDYOURARDORDAMPENEDYOURSTRENGTHEXHAUSTEDANDYOURTREASURESPENTOTHERCHIEFTAINSWILLSPRINGUPTOTAKEADVANTAGEOFYOUREXTREMITYTHENNOMANHOWEVERWISEWILLBEABLETOAVERTTHECONSEQUENCESTHATMUSTENSUETHUSTHOUGHWEHAVEHEARDOFSTUPIDHASTEINWARCLEVERNESSHASNEVERBEENSEENASSOCIATEDWITHLONGDELAYSINALLHISTORYTHEREISNOINSTANCEOFACOUNTRYHAVINGBENEFITEDFROMPROLONGEDWARFAREONLYONEWHOKNOWSTHEDISASTROUSEFFECTSOFALONGWARCANREALIZETHESUPREMEIMPORTANCEOFRAPIDITYINBRINGINGITTOACLOSEITISONLYONEWHOISTHOROUGHLYACQUAINTEDWITHTHEEVILSOFWARWHOCANTHOROUGHLYUNDERSTANDTHEPROFITABLEWAYOFCARRYINGITONTHESKILLFULGENERALDOESNOTRAISEASECONDLEVYNEITHERAREHISSUPPLYWAGONSLOADEDMORETHANTWICEONCEWARISDECLAREDHEWILLNOTWASTEPRECIOUSTIMEINWAITINGFORREINFORCEMENTSNORWILLHETURNHISARMYBACKFOR