# Recreating the Results in "The Phaistos Disk: A New Way of Viewing the Language Behind the Script" (Davis 2018)

I use the Linear A corpus at https://lineara.xyz to recreate the results from Brent Davis' paper showing a 
statistically significant relationship between the bigrams in the Phaistos Disc and Linear A.

I find that one bigram identified as common between the two is doubtful. '𐘠𐘚' (TI-I) does not actually appear in Linear A. It may be that the bigram is with a variation of 𐘚(TI) which is 𐘛(*28B). We find a single instance of '𐘠𐘛' (TI-*28B) in the Linear A corpus, in ZA6b. It is not clear to me if it is valid to treat (TI-*28B) as the equivalent of (TI-I). If it is not, then the number of matching bi-grams between the Phaistos disc and the Linear A corpus must be revised down to 16. This no longer falls within the region of statistical significance, which Davis identifies as 16.4 or above.


## Recreating the Results of Davis 2018
First we import the Phaistos Disc inscription. We also initialize a list of symbols from the Phaistos Disc and all known symbols from Linear A.

In [355]:
import json
import itertools as it
import pandas as pd
from IPython.display import display

styles = [dict(selector="caption", 
    props=[("text-align", "center"),
    ("font-size", "120%"),
    ("color", 'black')])]

pd_inscription_a = ("𐇑𐇛𐇜𐇐𐇡𐇽|𐇧𐇷𐇛|𐇬𐇼𐇖𐇽|𐇬𐇬𐇱|𐇑𐇛𐇓𐇷𐇰|𐇪𐇼𐇖𐇛|𐇪𐇻𐇗|𐇑𐇛𐇕𐇡|𐇮𐇩𐇲|"
                "𐇑𐇛𐇸𐇢𐇲|𐇐𐇸𐇷𐇖|𐇑𐇛𐇯𐇦𐇵𐇽|𐇶𐇚|𐇑𐇪𐇨𐇙𐇦𐇡|𐇫𐇐𐇽|𐇑𐇛𐇮𐇩𐇽|𐇑𐇛𐇪𐇪𐇲𐇴𐇤|𐇰𐇦|"
                "𐇑𐇛𐇮𐇩𐇽|𐇑𐇪𐇨𐇙𐇦𐇡|𐇫𐇐𐇽|𐇑𐇛𐇮𐇩𐇽|𐇑𐇛𐇪𐇝𐇯𐇡𐇪|𐇕𐇡𐇠𐇢|𐇮𐇩𐇛|𐇑𐇛𐇜𐇐|𐇦𐇢𐇲𐇽|𐇙𐇒𐇵|𐇑𐇛𐇪𐇪𐇲𐇴𐇤|𐇜𐇐|𐇙𐇒𐇵|")
pd_words_a = pd_inscription_a.split('|')

pd_inscription_b = ("𐇑𐇛𐇥𐇷𐇖|𐇪𐇼𐇖𐇲|𐇑𐇴𐇦𐇔𐇽|𐇥𐇨𐇪|𐇰𐇧𐇣𐇛|𐇟𐇦𐇡𐇺𐇽|𐇜𐇐𐇶𐇰|𐇞𐇖𐇜𐇐𐇡|𐇥𐇴𐇹𐇨|"
                    "𐇖𐇧𐇷𐇲|𐇑𐇩𐇳𐇷|𐇪𐇨𐇵𐇐|𐇬𐇧𐇧𐇣𐇲|𐇟𐇝𐇡|𐇬𐇰𐇐|𐇕𐇲𐇯𐇶𐇰|𐇑𐇘𐇪𐇐|𐇬𐇳"
                    "𐇖𐇗𐇽|𐇬𐇗𐇜|𐇬𐇼𐇖𐇽|𐇥𐇬𐇳𐇖𐇗𐇽|𐇪𐇱𐇦𐇨|𐇖𐇡𐇲|𐇖𐇼𐇖𐇽|𐇖𐇦𐇡𐇧|𐇥𐇬𐇳𐇖𐇗𐇽|𐇘𐇭𐇶𐇡𐇖|𐇑𐇕𐇲𐇦𐇖|𐇬𐇱𐇦𐇨|𐇼𐇖𐇽|")
pd_words_b = pd_inscription_b.split('|')

pd_inscription = pd_inscription_a + pd_inscription_b
pd_words = pd_inscription.replace('𐇽','').split('|')

pd_symbols = ["𐇐", "𐇑", "𐇒", "𐇓", "𐇔", "𐇕", "𐇖", "𐇗", "𐇘", "𐇙", "𐇚", 
    "𐇛", "𐇜", "𐇝", "𐇞", "𐇟", "𐇠", "𐇡", "𐇢", "𐇣", "𐇤", "𐇥", "𐇦", "𐇧", "𐇨", "𐇩", "𐇪", "𐇫", "𐇬", "𐇭", "𐇮", "𐇯"
    , "𐇰", "𐇱", "𐇲", "𐇳", "𐇴", "𐇵", "𐇶", "𐇷", "𐇸", "𐇹", "𐇺", "𐇻", "𐇼"]

la_symbols = ["𐄂", "𐘀", "𐘁", "𐘂", "𐘃", "𐘄", "𐘅", "𐘆", "𐘇", "𐘈", "𐘉", "𐘊", "𐘋", "𐘌", "𐘍", "𐘎", 
    "𐘏", "𐘐", "𐘑", "𐘒", "𐘓", "𐘔", "𐘕", "𐘖", "𐘗", "𐘘", "𐘙", "𐘚", "𐘛", "𐘜", "𐘝", "𐘞",
    "𐘟", "𐘠", "𐘡", "𐘢", "𐘣", "𐘤", "𐘥", "𐘦", "𐘧", "𐘨", "𐘩", "𐘪", "𐘫", "𐘬", "𐘭", "𐘮",
    "𐘯", "𐘰", "𐘱", "𐘲", "𐘳", "𐘴", "𐘵", "𐘶", "𐘷", "𐘸", "𐘹", "𐘺", "𐘻", "𐘼", "𐘽", "𐘾",
    "𐘿", "𐙀", "𐙁", "𐙂", "𐙃", "𐙄", "𐙅", "𐙆", "𐙇", "𐙈", "𐙉", "𐙊", "𐙋", "𐙌", "𐙍", 
    "𐙎", "𐙏", "𐙐", "𐙑", "𐙒", "𐙓", "𐙔", "𐙕", "𐙖", "𐙗", "𐙘", "𐙙", "𐙚", "𐙛", "𐙜", "𐙝", 
    "𐙞", "𐙟", "𐙠", "𐙡", "𐙢", "𐙣", "𐙤", "𐙥", "𐙦", "𐙧", "𐙨", "𐙩", "𐙪", "𐙫", "𐙬", "𐙭",
    "𐙮", "𐙯", "𐙰", "𐙱", "𐙲", "𐙳", "𐙴", "𐙵", "𐙶", "𐙷", "𐙸", "𐙹", "𐙺", "𐙻", "𐙼", "𐙽",
    "𐙾", "𐙿", "𐚀", "𐚁", "𐚂", "𐚃", "𐚄", "𐚅", "𐚆", "𐚇", "𐚈", "𐚉", "𐚊", "𐚋", "𐚌", "𐚍",
    "𐚎", "𐚏", "𐚐", "𐚑", "𐚒", "𐚓", "𐚔", "𐚕", "𐚖", "𐚗", "𐚘", "𐚙", "𐚚", "𐚛", "𐚜", 
    "𐚝", "𐚞", "𐚟", "𐚠", "𐚡", "𐚢", "𐚣", "𐚤", "𐚥", "𐚦", "𐚧", "𐚨", "𐚩", "𐚪", "𐚫", "𐚬", 
    "𐚭", "𐚮", "𐚯", "𐚰", "𐚱", "𐚲", "𐚳", "𐚴", "𐚵", "𐚶", "𐚷", "𐚸", "𐚹", "𐚺", "𐚻", "𐚼",
    "𐚽", "𐚾", "𐚿", "𐛀", "𐛁", "𐛂", "𐛃", "𐛄", "𐛅", "𐛆", "𐛇", "𐛈", "𐛉", "𐛊", "𐛋", "𐛌",
    "𐛍", "𐛎", "𐛏", "𐛐", "𐛑", "𐛒", "𐛓", "𐛔", "𐛕", "𐛖", "𐛗", "𐛘", "𐛙", "𐛚", "𐛛", "𐛜",
    "𐛝", "𐛞", "𐛟", "𐛠", "𐛡", "𐛢", "𐛣", "𐛤", "𐛥", "𐛦", "𐛧", "𐛨", "𐛩", "𐛪", "𐛫", 
    "𐛬", "𐛭", "𐛮", "𐛯", "𐛰", "𐛱", "𐛲", "𐛳", "𐛴", "𐛵", "𐛶", "𐛷", "𐛸", "𐛹", "𐛺", "𐛻", 
    "𐛼", "𐛽", "𐛾", "𐛿", "𐜀", "𐜁", "𐜂", "𐜃", "𐜄", "𐜅", "𐜆", "𐜇", "𐜈", "𐜉", "𐜊", "𐜋",
    "𐜌", "𐜍", "𐜎", "𐜏", "𐜐", "𐜑", "𐜒", "𐜓", "𐜔", "𐜕", "𐜖", "𐜗", "𐜘", "𐜙", "𐜚", "𐜛",
    "𐜜", "𐜝", "𐜞", "𐜟", "𐜠", "𐜡", "𐜢", "𐜣", "𐜤", "𐜥", "𐜦", "𐜧", "𐜨", "𐜩", "𐜪", "𐜫",
    "𐜬", "𐜭", "𐜮", "𐜯", "𐜰", "𐜱", "𐜲", "𐜳", "𐜴", "𐜵", "𐜶", "𐝀", "𐝁", "𐝂", "𐝃", 
    "𐝄", "𐝅", "𐝆", "𐝇", "𐝈", "𐝉", "𐝊", "𐝋", "𐝌", "𐝍", "𐝎", "𐝏", "𐝐", "𐝑", "𐝒", "𐝓", 
    "𐝔", "𐝕", "𐝠", "𐝡", "𐝢", "𐝣", "𐝤", "𐝥", "𐝦", "𐝧", "𐝬", "𐝭", "𐝮", "𐝯"]

Next we import all known words from Linear A into a list called `la_words`.

In [7]:
json_file = open('../Data/LinearAWords.json')
inscriptions = json.load(json_file)

la_words = []
for inscription in inscriptions:
    word_tags = inscription["tagsForWords"]

    for index, word_tag in enumerate(word_tags):
        tags = word_tag["tags"]
        if "word" not in tags:
            continue
        word = word_tag["word"].replace('\U0001076b', '')
        if len(word) == 1:
            continue
        la_words.append(word)
la_words = list(set(la_words))


Now we can create lists of unique bigrams in Linear A and the Phaistos disc.

In [152]:

def getNgrams(words, n):
    ngrams = []
    for word in words:
        bg = [word[i:i+n] for i in range(0, len(word) - (n-1))]
        ngrams.extend(bg)
    return ngrams

la_bigrams, pd_bigrams, pd_trigrams, la_trigrams = [], [], [], []
ngram_infos = [
    [la_bigrams, "bi", 2, la_words, "Linear A"],
    [pd_bigrams, "bi", 2, pd_words, "Phaistos Disc"],
]

for (ngram, prefix, n, words, name) in ngram_infos:
    ngram = getNgrams(words, n)
    print("\n" + name + ":")
    print("Unique " + prefix + "grams", len(set(ngram)), 
          "Total " + prefix + "grams", len(ngram))
    print("Unique symbols in " + prefix + "grams",
          len(set(list(it.chain.from_iterable(ngram)))))

la_bigrams = getNgrams(la_words, 2)
pd_bigrams = getNgrams(pd_words, 2)




Linear A:
Unique bigrams 1170 Total bigrams 2036
Unique symbols in bigrams 168

Phaistos Disc:
Unique bigrams 115 Total bigrams 180
Unique symbols in bigrams 45


With these we now have what we need to rerun Davis' analysis comparing the bigrams that appear in both Linear A and the disc.

Davis gives the homomorphs used for his analysis as follows:
![alt text](14homomorphs.png "Title")

We implement the same here:

In [153]:
# Brent Davis 2018 mapping
pd_la_davis_map = {
"𐇛": "𐘿",  
"𐇬": "𐙁",  
"𐇼": "𐘽", 
"𐇖": "𐘠",  
"𐇱": "𐘢",  
"𐇗": "𐘚",  
"𐇮": "𐙂",  
"𐇲": "𐘃",  
"𐇢": "𐘀",  
"𐇦": "𐘅",  
"𐇨": "𐙅",  
"𐇥": "𐘞",  
"𐇟": "𐘸", 
"𐇳": "𐘝",  
}


Now we see if we can get the same number of bigrams as Davis in the disc:

In [362]:


# Use the provisional PD to LA mapping above to find common bigrams between LA and the Disc
pd_inscription_as_la = list(map(lambda x: pd_la_davis_map[x] if x in pd_la_davis_map else x, pd_inscription))
pd_inscription_as_la_words = ''.join(pd_inscription_as_la).split('|')
pd_la_bigrams = getNgrams(pd_inscription_as_la_words,2)

pd_bigrams_both = set([bg for bg in pd_la_bigrams if all(g in pd_la_davis_map.values() for g in bg)])
#print(str(len(pd_bigrams_both)) + " bigrams", sorted(pd_bigrams_both))

pd_la_davis_map_r = {y:x for x,y in  pd_la_davis_map.items()}
df = pd.DataFrame([pd_bigrams_both,
                  [pd_la_davis_map_r[x[:1]] + pd_la_davis_map_r[x[-1:]] for x in pd_bigrams_both]],
                  columns=[i+1 for i,p in enumerate(pd_bigrams_both)])
df = df.set_axis(['Linear A Bigrams', 'Disc Bigrams'], axis='index')
df.style.set_caption("Linear A and Phaistos Disc Bigrams").set_table_styles(styles)


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
Linear A Bigrams,𐘅𐘀,𐘿𐙂,𐘢𐘅,𐘠𐘿,𐘠𐘽,𐙁𐙁,𐘃𐘅,𐘠𐘚,𐘸𐘅,𐙁𐘚,𐘞𐙁,𐙁𐘝,𐘽𐘠,𐘠𐘃,𐙁𐘢,𐘅𐙅,𐘿𐘞,𐘞𐙅,𐘝𐘠,𐘀𐘃,𐘅𐘠,𐘠𐘅,𐙁𐘽
Disc Bigrams,𐇦𐇢,𐇛𐇮,𐇱𐇦,𐇖𐇛,𐇖𐇼,𐇬𐇬,𐇲𐇦,𐇖𐇗,𐇟𐇦,𐇬𐇗,𐇥𐇬,𐇬𐇳,𐇼𐇖,𐇖𐇲,𐇬𐇱,𐇦𐇨,𐇛𐇥,𐇥𐇨,𐇳𐇖,𐇢𐇲,𐇦𐇖,𐇖𐇦,𐇬𐇼


This matches the 23 bigrams given in Table 40 by Davis:
    
![alt text](23pairs.png "Title")


No we can count the number of pairs that match between Linear A and the disc.

In [368]:

bg_both = sorted([(bg, la_bigrams.count(bg), pd_la_bigrams.count(bg)) 
                  for bg in pd_bigrams_both & set(la_bigrams)])

df = pd.DataFrame([[b for a,b,c in bg_both], [c for a,b,c in bg_both]],
                  columns=[a for a,b,c in bg_both])
df = df.set_axis(['Occurences in Linear A', 'Occurences in Disc'], axis='index')
df.style.set_table_styles(styles).set_caption("Matching Pairs of Linear A and Phaistos Disc Bigrams")


Unnamed: 0,𐘀𐘃,𐘅𐘀,𐘅𐘠,𐘝𐘠,𐘞𐙁,𐘠𐘃,𐘠𐘅,𐘠𐘽,𐘢𐘅,𐘸𐘅,𐘽𐘠,𐘿𐙂,𐙁𐘚,𐙁𐘢,𐙁𐘽,𐙁𐙁
Occurences in Linear A,2,1,4,1,3,1,2,2,2,2,1,1,5,1,1,1
Occurences in Disc,2,1,1,3,2,1,1,1,2,1,6,3,1,2,2,1


Our output also gives the number of occurences of the bigrams in each of Linear A and the Disc, both as a total for all bigrams and for each bigram individually. So for '𐘸𐘅' we find that it occurs twice in Linear A and once on the Phaistos Disc, i.e.: ('𐘸𐘅', 2, 1).




## Reviewing the Results
We find only 16 instances of Disc bigrams appearing in Linear A. This is one less than found by Davis. 

Let's take a look at the ones we think are missing according to our own analysis:

In [369]:
bg_pd_only = pd_bigrams_both - set(la_bigrams)
bg_pd_only = sorted([(bg, pd_la_bigrams.count(bg)) 
                  for bg in bg_pd_only])

df = pd.DataFrame([[b for a,b in bg_pd_only]],
                  columns=[a for a,b in bg_pd_only])
df = df.set_axis(["Occurences"], axis='index')
df.style.set_table_styles(styles).set_caption("Mapped bigrams that don't appear in Linear A")



Unnamed: 0,𐘃𐘅,𐘅𐙅,𐘞𐙅,𐘠𐘚,𐘠𐘿,𐘿𐘞,𐙁𐘝
Occurences,1,2,1,3,1,1,3


We can compare this with the table from (Davis 2018):

![alt text](17bigrams.png "Title")

The difference is the bigram: '𐘠𐘚', (when transliterated: TI-I). '𐘠𐘚' (TI-I) does not actually appear in Linear A. Where the two syllabograms are adjacent they are not word-internal, i.e. they are in adjacent words rather than the same word:

![alt text](PKZa11.png "Title")
![alt text](PYRWc4.png "Title")


Another potential source for the identification is a variation of 𐘚(TI) which is 𐘛(*28B). We find a single instance of '𐘠𐘛' (TI-*28B) in the Linear A corpus, in ZA6b:

![alt text](ZA6b.png "Title")

It is not clear to me if it is valid to treat (TI-*28B) as the equivalent of (TI-I). If it is not, then the number of matching bi-grams between the Phaistos disc and the Linear A corpus must be revised down to 16. This no longer falls within the region of statistical significance, which Davis identifies as 16.4 or above.



## Experimenting with Different Mappings
In this section we'll experiment with an expanded set of homomorphic mappings in the syllabograms of Linear A and the Phaistos disc and see if improves or changes the result of 17/23 observed by Davis. 


In [376]:

def runExperimentalMapping():
    pd_inscription_as_la = list(map(lambda x: pd_la_hogan_map[x]
                                    if x in pd_la_hogan_map else x, pd_inscription))
    pd_inscription_as_la_words = ''.join(pd_inscription_as_la).split('|')
    pd_la_bigrams = getNgrams(pd_inscription_as_la_words,2)

    pd_bigrams_both = set([bg for bg in pd_la_bigrams
                           if all(g in pd_la_hogan_map.values() for g in bg)])
    
    pd_la_hogan_map_r = {y:x for x,y in  pd_la_hogan_map.items()}
    df = pd.DataFrame([pd_bigrams_both,
                      [pd_la_hogan_map_r[x[:1]] + pd_la_hogan_map_r[x[-1:]] for x in pd_bigrams_both]],
                      columns=[i+1 for i,p in enumerate(pd_bigrams_both)])
    df = df.set_axis(['Linear A Bigrams', 'Disc Bigrams'], axis='index')
    df = (df.style.set_caption("Mapping gives %d Linear A and Phaistos Disc Bigrams" % len(pd_bigrams_both))
            .set_table_styles(styles))
    display(df)

    bg_both = sorted([(bg, la_bigrams.count(bg), pd_la_bigrams.count(bg)) 
                      for bg in pd_bigrams_both & set(la_bigrams)])
    
    df = pd.DataFrame([[b for a,b,c in bg_both] + [sum([b for a,b,c in bg_both])],
                    [c for a,b,c in bg_both] + [sum([c for a,b,c in bg_both])]],
                    columns=[a for a,b,c in bg_both] + ["Total"])
    df = df.set_axis(['Occurences in Linear A', 'Occurences in Disc'], axis='index')
    df = (df.style.set_caption("The %d mapped bigrams that actually appear in Linear A" % len(bg_both))
            .set_table_styles(styles))
    display(df)
    
    bg_pd_only = pd_bigrams_both - set(la_bigrams)
    bg_pd_only = sorted([(bg, pd_la_bigrams.count(bg)) 
                      for bg in bg_pd_only])
    
    df = pd.DataFrame([[b for a,b in bg_pd_only]],
                  columns=[a for a,b in bg_pd_only])
    df = df.set_axis(["Occurences"], axis='index')
    df = (df.style.set_caption("The %d mapped bigrams that don't appear in Linear A" % (len(pd_bigrams_both) - len(bg_both)))
            .set_table_styles(styles))
    display(df)
 

In the first instance we'll expand our mapping to include some additional glyphs and alter some others. The differences are given in the table below.


In [461]:
pd_la_hogan_map = {
"𐇑": "𐘚",
"𐇛": "𐘾",
"𐇬": "𐙁",
"𐇼": "𐘽",
"𐇖": "𐘠",  
"𐇱": "𐘢",
"𐇮": "𐙂",
"𐇲": "𐘃",
"𐇢": "𐘀",
"𐇦": "𐘅",
"𐇥": "𐘞",
"𐇟": "𐘸",
"𐇳": "𐘝",
"𐇶": "𐘙",
"𐇭": "𐘏", 
"𐇝": "𐘳",
"𐇨": "𐙅",  
"𐇪": "𐙒",
"𐇤": "𐘱",
"𐇫": "𐙆",
"𐇧": "𐘦",  
}

def showDifferencesBetweenMappings():
    row_index = set([k for k in pd_la_davis_map] + [k for k in pd_la_hogan_map])
    row_index = [a for a in row_index 
                 if a not in pd_la_hogan_map or a not in pd_la_davis_map or
                           pd_la_hogan_map[a] != pd_la_davis_map[a]]

    df = pd.DataFrame([[pd_la_davis_map[a] if a in pd_la_davis_map else "None"  
                       for a in row_index],
                       [pd_la_hogan_map[a] if a in pd_la_hogan_map else "None"
                        for a in row_index]]
                      , columns=row_index)
    df = df.set_axis(["Davis", "Ours"], axis='index')
    df = df.style.set_caption("Differences between Davis and our mapping").set_table_styles(styles)
    display(df)
showDifferencesBetweenMappings()

Unnamed: 0,𐇗,𐇶,𐇤,𐇑,𐇝,𐇭,𐇛,𐇧,𐇪,𐇫
Davis,𐘚,,,,,,𐘿,,,
Ours,,𐘙,𐘱,𐘚,𐘳,𐘏,𐘾,𐘦,𐙒,𐙆


Let's try this mapping:

In [462]:
runExperimentalMapping()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36
Linear A Bigrams,𐘅𐘀,𐘾𐙂,𐘠𐘦,𐙅𐙒,𐘏𐘙,𐙒𐙅,𐘚𐘾,𐘢𐘅,𐘠𐘽,𐙁𐙁,𐘃𐘅,𐘸𐘅,𐘞𐙁,𐙒𐘢,𐘾𐘞,𐙁𐘝,𐘽𐘠,𐙒𐙒,𐘠𐘃,𐙒𐘳,𐘠𐘾,𐙁𐘢,𐘚𐙒,𐘅𐙅,𐙒𐘃,𐘞𐙅,𐘾𐙒,𐘸𐘳,𐘝𐘠,𐙒𐘽,𐘀𐘃,𐙁𐘦,𐘦𐘦,𐘅𐘠,𐘠𐘅,𐙁𐘽
Disc Bigrams,𐇦𐇢,𐇛𐇮,𐇖𐇧,𐇨𐇪,𐇭𐇶,𐇪𐇨,𐇑𐇛,𐇱𐇦,𐇖𐇼,𐇬𐇬,𐇲𐇦,𐇟𐇦,𐇥𐇬,𐇪𐇱,𐇛𐇥,𐇬𐇳,𐇼𐇖,𐇪𐇪,𐇖𐇲,𐇪𐇝,𐇖𐇛,𐇬𐇱,𐇑𐇪,𐇦𐇨,𐇪𐇲,𐇥𐇨,𐇛𐇪,𐇟𐇝,𐇳𐇖,𐇪𐇼,𐇢𐇲,𐇬𐇧,𐇧𐇧,𐇦𐇖,𐇖𐇦,𐇬𐇼


Unnamed: 0,𐘀𐘃,𐘅𐘀,𐘅𐘠,𐘚𐘾,𐘝𐘠,𐘞𐙁,𐘠𐘃,𐘠𐘅,𐘠𐘽,𐘠𐘾,𐘢𐘅,𐘸𐘅,𐘸𐘳,𐘽𐘠,𐘾𐘞,𐘾𐙂,𐙁𐘢,𐙁𐘽,𐙁𐙁,Total
Occurences in Linear A,2,1,4,2,1,3,1,2,2,2,2,2,6,1,2,3,1,1,1,39
Occurences in Disc,2,1,1,13,3,2,1,1,1,1,2,1,1,6,1,3,2,2,1,45


Unnamed: 0,𐘃𐘅,𐘅𐙅,𐘏𐘙,𐘚𐙒,𐘞𐙅,𐘠𐘦,𐘦𐘦,𐘾𐙒,𐙁𐘝,𐙁𐘦,𐙅𐙒,𐙒𐘃,𐙒𐘢,𐙒𐘳,𐙒𐘽,𐙒𐙅,𐙒𐙒
Occurences,1,2,1,2,1,1,1,3,3,1,1,2,1,1,2,3,2


We get 36 possible bigrams, of which 19 actually appear in Linear A. A poor result. When we inspect the ones that don't appear in Linear A we can see that 5 in particular don't produce any result at at all. If we remove these as a bad lot and rerun the analysis again we get a much better result:

In [463]:
del pd_la_hogan_map["𐇪"]
del pd_la_hogan_map["𐇨"]
del pd_la_hogan_map["𐇭"]
del pd_la_hogan_map["𐇤"]
del pd_la_hogan_map["𐇧"]

showDifferencesBetweenMappings()
runExperimentalMapping()

Unnamed: 0,𐇛,𐇶,𐇑,𐇗,𐇨,𐇫,𐇝
Davis,𐘿,,,𐘚,𐙅,,
Ours,𐘾,𐘙,𐘚,,,𐙆,𐘳


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
Linear A Bigrams,𐘅𐘀,𐘾𐙂,𐘚𐘾,𐘢𐘅,𐘠𐘽,𐙁𐙁,𐘃𐘅,𐘸𐘅,𐘞𐙁,𐘾𐘞,𐙁𐘝,𐘽𐘠,𐘠𐘃,𐘠𐘾,𐙁𐘢,𐘸𐘳,𐘝𐘠,𐘀𐘃,𐘅𐘠,𐘠𐘅,𐙁𐘽
Disc Bigrams,𐇦𐇢,𐇛𐇮,𐇑𐇛,𐇱𐇦,𐇖𐇼,𐇬𐇬,𐇲𐇦,𐇟𐇦,𐇥𐇬,𐇛𐇥,𐇬𐇳,𐇼𐇖,𐇖𐇲,𐇖𐇛,𐇬𐇱,𐇟𐇝,𐇳𐇖,𐇢𐇲,𐇦𐇖,𐇖𐇦,𐇬𐇼


Unnamed: 0,𐘀𐘃,𐘅𐘀,𐘅𐘠,𐘚𐘾,𐘝𐘠,𐘞𐙁,𐘠𐘃,𐘠𐘅,𐘠𐘽,𐘠𐘾,𐘢𐘅,𐘸𐘅,𐘸𐘳,𐘽𐘠,𐘾𐘞,𐘾𐙂,𐙁𐘢,𐙁𐘽,𐙁𐙁,Total
Occurences in Linear A,2,1,4,2,1,3,1,2,2,2,2,2,6,1,2,3,1,1,1,39
Occurences in Disc,2,1,1,13,3,2,1,1,1,1,2,1,1,6,1,3,2,2,1,45


Unnamed: 0,𐘃𐘅,𐙁𐘝
Occurences,1,3


Now we have 19 mappings found in Linear A of a possible 21. 

## Comparing Word-End Syllabograms
Let's compare glyphs that appear at the end of words in Linear A and the Disc.

In [29]:
syllables = {
'𐘀': 'DA', '𐘁': 'RO', '𐘂': 'PA', '𐘃': 'TE', '𐘄': 'TO', '𐘅': 'NA', 
'𐘆': 'DI', '𐘇': 'A', '𐘈': 'SE', '𐘉': 'U', '𐘊': 'PO', '𐘋': 'ME', 
'𐘌': 'QA', '𐘍': 'ZA', '𐘎': 'ZO', '𐘏': 'QI', '𐘕': 'MU', '𐘗': 'NE',
'𐘘': 'RU', '𐘙': 'RE', '𐘚': 'I', '𐘜': 'PU₂', '𐘝': 'NI', '𐘞': 'SA', 
'𐘠': 'TI', '𐘡': 'E', '𐘢': 'PI', '𐘣': 'WI', '𐘤': 'SI', '𐘥': 'KE',
'𐘦': 'DE', '𐘧': 'JE', '𐘩': 'NWA', '𐘫': 'PU', '𐘬': 'DU', '𐘭': 'RI',
'𐘮': 'WA', '𐘯': 'NU', '𐘰': 'PA₂', '𐘱': 'JA', '𐘲': 'SU', '𐘳': 'TA', 
'𐘴': 'RA', '𐘵': 'O', '𐘶': 'JU', '𐘷': 'TA₂', '𐘸': 'KI', '𐘹': 'TU', 
'𐘺': 'KO', '𐘻': 'MI', '𐘼': 'ZE', '𐘽': 'RA₂', '𐘾': 'KA', '𐘿': 'QE', 
'𐙁': 'MA', '𐙂': 'KU', '𐙄': 'AU', '𐙆': 'TWE', '𐙀': 'ZU'
}

vowels = {
'𐘇': 'A',
'𐘡': 'E',
'𐘚': 'I',
'𐘵': 'O',
'𐘉': 'U', 
'𐙄': 'AU',
}


Let's find the most common last syllabograms in Linear A words:

In [395]:
import collections

la_last_letters = { l[-1:]: len([w for w in la_words if w[-1:] == l[-1:]]) 
                   for l in la_words if len(l) > 1 and l[-1:] in syllables}
# Sort highest to top
la_last_letters = sorted(la_last_letters.items(), key=lambda x:x[1], reverse=True)
la_last_letters = collections.OrderedDict(la_last_letters)

r = {key: rank for rank, key in enumerate(sorted(set(la_last_letters.values()), reverse=True), 1)}
la_last_letters_ranked = {k: r[v] for k,v in la_last_letters.items()}

df = pd.DataFrame([[b for a,b in la_last_letters.items()],[b for a,b in la_last_letters_ranked.items()]],
                columns=[a for a,b in la_last_letters.items()])
df = df.set_axis(['Occurrences', 'Ranking'], axis='index')
df = df.style.set_caption("Most Common Word-End Syllabograms in Linear A").set_table_styles(styles)
display(df)




Unnamed: 0,𐘱,𐘅,𐘃,𐘙,𐘠,𐘴,𐘳,𐘤,𐘗,𐘻,𐘈,𐘾,𐙁,𐘘,𐘝,𐘸,𐘁,𐘍,𐘚,𐘀,𐘆,𐘶,𐘭,𐘹,𐘞,𐘂,𐘬,𐘯,𐙂,𐘋,𐘢,𐘽,𐘰,𐘲,𐘦,𐘮,𐘡,𐘇,𐘫,𐘣,𐘷,𐘥,𐘿,𐙀,𐘕,𐘧,𐘉,𐘌,𐘜,𐘄,𐘺,𐘵,𐘊
Occurrences,48,45,44,43,41,37,32,28,27,27,23,22,21,20,20,19,18,17,17,16,16,16,15,15,14,14,14,14,13,13,13,12,12,11,10,9,9,8,8,8,8,7,7,6,6,6,5,5,4,3,2,2,1
Ranking,1,2,3,4,5,6,7,8,9,9,10,11,12,13,13,14,15,16,16,17,17,17,18,18,19,19,19,19,20,20,20,21,21,22,23,24,24,25,25,25,25,26,26,27,27,27,28,28,29,30,31,31,32


And do the same for the disc:

In [396]:
pd_last_letters = { l[-1:]: len([w for w in pd_words if w[-1:] == l[-1:]]) 
                   for l in pd_words if len(l) > 1}
# Sort highest to top
pd_last_letters = sorted(pd_last_letters.items(), key=lambda x:x[1], reverse=True)
pd_last_letters = collections.OrderedDict(pd_last_letters)

r = {key: rank for rank, key in enumerate(sorted(set(pd_last_letters.values()), reverse=True), 1)}
pd_last_letters_ranked = {k: r[v] for k,v in pd_last_letters.items()}

df = pd.DataFrame([[b for a,b in pd_last_letters.items()],[b for a,b in pd_last_letters_ranked.items()]],
                columns=[a for a,b in pd_last_letters.items()])
df = df.set_axis(['Occurrences', 'Ranking'], axis='index')
df = df.style.set_caption("Most Common Word-Initial Syllabograms in PD (By Occurrence)").set_table_styles(styles)
display(df)


Unnamed: 0,𐇖,𐇲,𐇐,𐇡,𐇛,𐇗,𐇰,𐇵,𐇩,𐇨,𐇤,𐇪,𐇱,𐇚,𐇦,𐇢,𐇔,𐇺,𐇷,𐇜,𐇧
Occurrences,8,7,7,6,4,4,3,3,3,3,2,2,1,1,1,1,1,1,1,1,1
Ranking,1,2,2,3,4,4,5,5,5,5,6,6,7,7,7,7,7,7,7,7,7


In [413]:
pd_la_full_map = {
"𐇑": "𐘚",
"𐇛": "𐘾",
"𐇬": "𐙁",
"𐇼": "𐘽",
"𐇖": "𐘠",  
"𐇱": "𐘢",
"𐇮": "𐙂",
"𐇲": "𐘃",
"𐇢": "𐘀",
"𐇦": "𐘅",
"𐇥": "𐘞",
"𐇟": "𐘸",
"𐇳": "𐘝",
"𐇶": "𐘙",
"𐇭": "𐘏", 
"𐇝": "𐘳",
"𐇨": "𐙅",  
"𐇪": "𐙒",
"𐇤": "𐘱",
"𐇫": "𐙆",
"𐇧": "𐘦",  
}

"""
ranking_comp = sorted(
    [[k, pd_last_letters_ranked[k], pd_la_full_map[k], la_last_letters_ranked[pd_la_full_map[k]]]
    for k,v in pd_la_full_map.items() if k in pd_last_letters and v in la_last_letters]
    , key=lambda x: abs(x[1] - x[3]))

df = pd.DataFrame(ranking_comp,
                columns=["PD Glyph", "PD Ranking", "LA Glyph", "LA Ranking"])
df = df.style.hide_index().set_caption("Raw Ranking").set_table_styles(styles)
display(df)
"""

n_ranking_comp = sorted([
                     (k, 
                      pd_last_letters_ranked[k], 
                      pd_la_full_map[k], 
                      max(1, int((la_last_letters_ranked[pd_la_full_map[k]] 
                                  / len(la_last_letters_ranked)) 
                                 * max([c for b,c in pd_last_letters_ranked.items()])))
                     )
                     for k,v in pd_la_full_map.items() if k in pd_last_letters and v in la_last_letters
                 ], key=lambda x: abs(x[1] - x[3]))

df = pd.DataFrame(n_ranking_comp,
                columns=["PD Glyph", "PD Ranking", "LA Glyph", "LA Ranking"])
df = df.style.hide_index().set_caption("Normalized Ranking").set_table_styles(styles)
display(df)


PD Glyph,PD Ranking,LA Glyph,LA Ranking
𐇖,1,𐘠,1
𐇲,2,𐘃,1
𐇛,4,𐘾,1
𐇧,7,𐘦,3
𐇱,7,𐘢,2
𐇢,7,𐘀,2
𐇤,6,𐘱,1
𐇦,7,𐘅,1


## Compare Word-Initial Syllabograms

Let's find the most common last syllabograms in Linear A words:

In [408]:
import collections

la_first_letters = { l[:1]: len([w for w in la_words if w[:1] == l[:1]]) 
                   for l in la_words if len(l) > 1 and l[:1] in syllables}
# Sort highest to top
la_first_letters = sorted(la_first_letters.items(), key=lambda x:x[1], reverse=True)
la_first_letters = collections.OrderedDict(la_first_letters)

df = pd.DataFrame([[b for a,b in la_first_letters.items()]],
                columns=[a for a,b in la_first_letters.items()])
df = df.set_axis(['Occurrences'], axis='index')
df = df.style.set_caption("Most Common Word-Initial Syllabograms in Linear A").set_table_styles(styles)
display(df)

r = {key: rank for rank, key in enumerate(sorted(set(la_first_letters.values()), reverse=True), 1)}
la_first_letters_ranked = {k: r[v] for k,v in la_first_letters.items()}

df = pd.DataFrame([[b for a,b in la_first_letters_ranked.items()]],
                columns=[a for a,b in la_first_letters_ranked.items()])
df = df.set_axis(['Ranking'], axis='index')
df = df.style.set_caption("Most Common Word-Initial Syllabograms in Linear A (Ranked)").set_table_styles(styles)
display(df)


Unnamed: 0,𐘇,𐘱,𐘚,𐘀,𐘾,𐙂,𐘳,𐘉,𐘤,𐘂,𐘞,𐙁,𐘸,𐘌,𐘬,𐘆,𐘻,𐘹,𐘢,𐘃,𐘴,𐘿,𐘘,𐘙,𐘠,𐘣,𐘭,𐘈,𐘲,𐘅,𐘮,𐘰,𐘵,𐘡,𐘯,𐘫,𐙀,𐘁,𐘺,𐘝,𐘦,𐘗,𐘽,𐘋,𐘄,𐘥,𐘧,𐘜,𐘍,𐘷,𐘊,𐙄,𐘕,𐘶,𐘏
Occurrences,115,51,51,36,35,33,29,29,28,26,25,24,24,23,22,20,18,18,17,15,14,14,14,12,12,11,11,10,9,9,8,8,8,7,7,7,7,6,6,6,6,4,3,3,3,3,2,2,2,2,2,2,2,2,1


Unnamed: 0,𐘇,𐘱,𐘚,𐘀,𐘾,𐙂,𐘳,𐘉,𐘤,𐘂,𐘞,𐙁,𐘸,𐘌,𐘬,𐘆,𐘻,𐘹,𐘢,𐘃,𐘴,𐘿,𐘘,𐘙,𐘠,𐘣,𐘭,𐘈,𐘲,𐘅,𐘮,𐘰,𐘵,𐘡,𐘯,𐘫,𐙀,𐘁,𐘺,𐘝,𐘦,𐘗,𐘽,𐘋,𐘄,𐘥,𐘧,𐘜,𐘍,𐘷,𐘊,𐙄,𐘕,𐘶,𐘏
Ranking,1,2,2,3,4,5,6,6,7,8,9,10,10,11,12,13,14,14,15,16,17,17,17,18,18,19,19,20,21,21,22,22,22,23,23,23,23,24,24,24,24,25,26,26,26,26,27,27,27,27,27,27,27,27,28


And do the same for the disc:

In [409]:
pd_first_letters = { l[:1]: len([w for w in pd_words if w[:1] == l[:1]]) 
                   for l in pd_words if len(l) > 1}
# Sort highest to top
pd_first_letters = sorted(pd_first_letters.items(), key=lambda x:x[1], reverse=True)
pd_first_letters = collections.OrderedDict(pd_first_letters)

df = pd.DataFrame([[b for a,b in pd_first_letters.items()]],
                columns=[a for a,b in pd_first_letters.items()])
df = df.set_axis(['Occurrences'], axis='index')
df = df.style.set_caption("Most Common Word-Initial Syllabograms in Phaistos Disc").set_table_styles(styles)
display(df)

r = {key: rank for rank, key in enumerate(sorted(set(pd_first_letters.values()), reverse=True), 1)}
pd_first_letters_ranked = {k: r[v] for k,v in pd_first_letters.items()}

df = pd.DataFrame([[b for a,b in pd_first_letters_ranked.items()]],
                columns=[a for a,b in pd_first_letters_ranked.items()])
df = df.set_axis(['Ranking'], axis='index')
df = df.style.set_caption("Most Common Word-Initial Syllabograms in Phaistos Disc (Ranked)").set_table_styles(styles)
display(df)


Unnamed: 0,𐇑,𐇬,𐇪,𐇥,𐇖,𐇮,𐇫,𐇰,𐇕,𐇙,𐇜,𐇟,𐇧,𐇐,𐇶,𐇦,𐇞,𐇘,𐇼
Occurrences,19,8,5,4,4,2,2,2,2,2,2,2,1,1,1,1,1,1,1


Unnamed: 0,𐇑,𐇬,𐇪,𐇥,𐇖,𐇮,𐇫,𐇰,𐇕,𐇙,𐇜,𐇟,𐇧,𐇐,𐇶,𐇦,𐇞,𐇘,𐇼
Ranking,1,2,3,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6


In [412]:
"""
ranking_comp = [(k, pd_first_letters_ranked[k], pd_la_full_map[k], la_first_letters_ranked[pd_la_full_map[k]])
                 for k,v in pd_la_full_map.items() if k in pd_first_letters and v in la_first_letters]

df = pd.DataFrame(ranking_comp,
                columns=["PD Glyph", "PD Ranking", "LA Glyph", "LA Ranking"])
df = df.style.hide_index().set_caption("Raw Ranking").set_table_styles(styles)
display(df)
"""

n_ranking_comp = sorted([
                     (k, 
                      pd_first_letters_ranked[k], 
                      pd_la_full_map[k], 
                      max(1, int((la_first_letters_ranked[pd_la_full_map[k]] 
                                  / len(la_first_letters_ranked)) 
                                 * max([c for b,c in pd_first_letters_ranked.items()])))
                     )
                     for k,v in pd_la_full_map.items() if k in pd_first_letters and v in la_first_letters
                 ], key=lambda x: abs(x[1] - x[3]))

df = pd.DataFrame(n_ranking_comp,
                columns=["PD Glyph", "PD Ranking", "LA Glyph", "LA Ranking"])
df = df.style.hide_index().set_caption("Normalized Ranking").set_table_styles(styles)
display(df)


PD Glyph,PD Ranking,LA Glyph,LA Ranking
𐇑,1,𐘚,1
𐇬,2,𐙁,1
𐇖,4,𐘠,1
𐇥,4,𐘞,1
𐇼,6,𐘽,2
𐇮,5,𐙂,1
𐇦,6,𐘅,2
𐇟,5,𐘸,1
𐇧,6,𐘦,2
𐇶,6,𐘙,1


## Examining Potential Illegal Combinations

In [29]:
syllables = {
'𐘀': 'DA', '𐘁': 'RO', '𐘂': 'PA', '𐘃': 'TE', '𐘄': 'TO', '𐘅': 'NA', 
'𐘆': 'DI', '𐘇': 'A', '𐘈': 'SE', '𐘉': 'U', '𐘊': 'PO', '𐘋': 'ME', 
'𐘌': 'QA', '𐘍': 'ZA', '𐘎': 'ZO', '𐘏': 'QI', '𐘕': 'MU', '𐘗': 'NE',
'𐘘': 'RU', '𐘙': 'RE', '𐘚': 'I', '𐘜': 'PU₂', '𐘝': 'NI', '𐘞': 'SA', 
'𐘠': 'TI', '𐘡': 'E', '𐘢': 'PI', '𐘣': 'WI', '𐘤': 'SI', '𐘥': 'KE',
'𐘦': 'DE', '𐘧': 'JE', '𐘩': 'NWA', '𐘫': 'PU', '𐘬': 'DU', '𐘭': 'RI',
'𐘮': 'WA', '𐘯': 'NU', '𐘰': 'PA₂', '𐘱': 'JA', '𐘲': 'SU', '𐘳': 'TA', 
'𐘴': 'RA', '𐘵': 'O', '𐘶': 'JU', '𐘷': 'TA₂', '𐘸': 'KI', '𐘹': 'TU', 
'𐘺': 'KO', '𐘻': 'MI', '𐘼': 'ZE', '𐘽': 'RA₂', '𐘾': 'KA', '𐘿': 'QE', 
'𐙁': 'MA', '𐙂': 'KU', '𐙄': 'AU', '𐙆': 'TWE', '𐙀': 'ZU'
}

vowels = {
'𐘇': 'A',
'𐘡': 'E',
'𐘚': 'I',
'𐘵': 'O',
'𐘉': 'U', 
'𐙄': 'AU',
}


In [46]:
print([l for l in set(la_bigrams) if l[1:2] in vowels])

['𐘴𐘉', '𐙁𐘚', '𐘅𐘇', '𐙀𐘡', '𐘗𐘇', '𐘁𐘡', '𐘾𐘚', '𐘀𐘉', '𐘙𐘚', '𐘞𐘉', '𐘀𐘚', '𐘘𐘇', '𐘃𐘉', '𐙕𐘉', '𐘱𐘚', '𐘅𐘚', '𐘠𐘡', '𐘈𐘚', '𐚏𐘡', '𐘱𐘉', '𐘯𐘉', '𐙴𐘡', '𐘴𐘇', '𐘠𐘇', '𐘇𐘚', '𐙌𐘇', '𐘳𐘉', '𐘘𐘚', '𐘊𐘇', '𐘤𐘇', '𐘋𐘚', '𐘰𐘚', '𐘁𐙄', '𐙠𐘚', '𐘽𐘉', '𐘺𐘇', '𐘾𐘇', '𐘀𐘡', '𐘮𐘡', '𐘭𐘇', '𐘙𐘇', '𐘀𐘇', '𐘞𐘚', '𐙇𐘚', '𐘳𐘇', '𐙁𐘉', '𐘥𐘚', '𐘄𐘚', '𐄂𐘚', '𐘉𐘚', '𐘃𐘚', '𐘍𐘚', '𐘢𐘵', '𐚂𐘚', '𐘤𐘡', '𐘺𐘡', '𐘀𐘵', '𐘤𐘚', '𐘂𐘚', '𐘶𐘉', '𐘳𐘚', '𐘘𐘡', '𐘕𐘇', '𐘅𐘉', '𐘮𐘵', '𐙠𐘇', '𐘮𐙄', '𐘰𐘡', '𐘢𐘡', '𐘴𐘵', '𐘾𐘉']
