# Lexical Ordering of Names

Look at the lexical ordering entities assigned numbers to see if there's any pattern in the ordering.

First create a list of all inscriptions where entities have quantities assigned to them. 
We exclude things like commodities, so this should be just a list of words that have both
the 'word' tag and the 'assigned number' tag.

In [28]:
import os
import re
import json
from collections import Counter

files = ["060-transaction-words.txt", "160-transaction-signs.txt"]
words_to_ignore = []
for file in files:
    input_file = open("../" + file, 'r')
    while True:
        line = input_file.readline()
        if not line:
            break
        words_to_ignore.extend(line.strip().split('\t'))

json_file = open('../210-wordtags.js')
inscriptions = json.load(json_file)

words_before = {}
words_after = {}

def atStartOfList(word_tags, index):
    if index < 2:
        return False
    word_tag = word_tags[index - 3]
    if "word" in word_tag["tags"] and "assigned number" in word_tag["tags"]:
        return False
    if "number" in word_tag["tags"]:
        return False
    return True

assignment_sequences = {}
for inscription in inscriptions:
    word_tags = inscription["tagsForWords"]

    sequences = []
    sequence = []
    for index, word_tag in enumerate(word_tags):
        tags = word_tag["tags"]
        if "word" not in tags or "assigned number" not in tags:
            continue
        word = word_tag["transliteratedWord"]
        if word in words_to_ignore:
            continue
        if atStartOfList(word_tags, index):
            if sequence and len(sequence) > 1:
                sequences.append(sequence)
            sequence = []
        sequence.append(word)
    if sequence and len(sequence) > 1:
        sequences.append(sequence)
    if sequences:
        assignment_sequences[inscription["name"]] = sequences

assignment_sequences


{'ARKH2': [['*131B',
   'A-SI-DA-TO-I',
   '*79-*301-SE-DE-*21F-*118',
   'A-SU-PU-WA']],
 'HT1': [['*79-SU', 'DI-DI-ZA-KE', 'KU-PA₃-NU', 'A-RA-NA-RE']],
 'HT102': [['VIR+[?]-GRA+PA', 'DI-RI-NA', 'MA-*79']],
 'HT103': [['DA-KU-NA', 'DA-KU-SE-NE']],
 'HT104': [['DA-KU-SE-NE-TI', 'I-DU-TI', 'PA-DA-SU-TI']],
 'HT108': [['DI-NA-RO', 'RA₂-TI']],
 'HT10a': [['DA-RE', 'U-*325-ZA'], ['*305-RU', 'DA-RI-DA', 'ME-ZA']],
 'HT10b': [['U-TI', 'DA-RE', 'TA-RI-NA', '*312-TA', 'KA-SA-RU', 'TA-NA-TI']],
 'HT115a': [['NA-*21F-NE-MI-NA',
   'SE-KU-TU',
   'PA-RA-NE',
   'A-SE-JA',
   'KA-PO-RU']],
 'HT117a': [['U-SU',
   'MI-TU',
   'KU-RA-MU',
   'MA-RU',
   'KU-PA₃-NU',
   'TU-JU-MA',
   'U-DI-MI',
   'MI-RU-TA-RA-RE',
   'TE-JA-RE',
   'NA-DA-RE'],
  ['KU-KU-DA-RA', 'KO-SA-I-TI', 'DA-MI-NU', 'DA-NE-KU-TI', 'KI-DA-RO']],
 'HT117b': [['KU-RE-JU', 'DI-KI-SE']],
 'HT119': [['RI-MI-SI', 'KO-JA', 'KU-PA₃-NA-TU'],
  ['JA-*345', '*306-TU', '*327-JU']],
 'HT11a': [['KA-RO-NA', '*322-RI', 'A-SU-JA', 'VIR+[?]-I']

# Count the instances where a letter sees other ordered before and after it

Now we'll try to see if there's any evidence of the assignees appearing in a consistent lexical order. We do this by seeing the initial letters of each entry show always appear before or after each other.

In [27]:
words_before = {}
words_after = {}

# For every assignee record the assignees that appear before and after it in the list.
for i, (inscription, sequences) in enumerate(assignment_sequences.items()):
    for sequence in sequences:
        if len(sequence) == 1:
            continue
        prev_words = []
        for word in sequence:
            if word in words_before:
                words_before[word] = words_before[word] + prev_words
            else:
                words_before[word] = prev_words
            for prev_word in prev_words:
                if prev_word in words_after:
                    words_after[prev_word].append(word)
                else:
                    words_after[prev_word] = [word]
            prev_words.append(word)

def reduce(original):
    d = {}
    for i, (k, v) in enumerate(original.items()):
        j = k.split('-')[0]
        if j in d:
            d[j].extend([x.split('-')[0] for x in v])
        else:
            d[j] = [x.split('-')[0] for x in v]
    return d

letter_counts = {}
for words in [words_before, words_after]:
    print("--------------")
    initials = reduce(words)
    
    for i, (word, prev_words) in enumerate(initials.items()):
        A = Counter([x for x in prev_words])
        counted = {x : A[x] for x in A if A[x] > 1}
        if not counted:
            continue
        if word in letter_counts:
            letter_counts[word].append(counted)
        else:
            letter_counts[word] = [counted]

print("letters in common")
print("--------------")
for i, (letter, counts) in enumerate(letter_counts.items()):
    if len(counts) < 2:
        print(letter, "not enough info", counts)
        continue
    common_letters = list(set([x for x in counts[0]]) & set([x for x in counts[1]]))
    print(letter, common_letters)
    print(letter, "prev_words", {x : letter_counts[letter][0][x] for x in common_letters})
    print(letter, "after_words", {x : letter_counts[letter][1][x] for x in common_letters})

print("\n\nletters not in common")
print("--------------")
for i, (letter, counts) in enumerate(letter_counts.items()):
    if len(counts) < 2:
        print(letter, "not enough info", counts)
        continue
    print(letter)
    unique_letters = [x for x in counts[0] if x not in counts[1]]
    print(letter, "prev_words", {x : letter_counts[letter][0][x] for x in unique_letters})
    unique_letters = [x for x in counts[1] if x not in counts[0]]
    print(letter, "after_words", {x : letter_counts[letter][1][x] for x in unique_letters})


--------------
--------------
letters in common
--------------
*79 ['A']
*79 prev_words {'A': 3}
*79 after_words {'A': 2}
DI ['A', 'KU', 'QE']
DI prev_words {'A': 2, 'KU': 5, 'QE': 3}
DI after_words {'A': 2, 'KU': 2, 'QE': 4}
KU ['KU', 'QE', 'MI', 'DI', 'TE', 'U', 'PA', 'SA', 'DA']
KU prev_words {'KU': 17, 'QE': 2, 'MI': 5, 'DI': 4, 'TE': 2, 'U': 4, 'PA': 2, 'SA': 3, 'DA': 10}
KU after_words {'KU': 3, 'QE': 2, 'MI': 2, 'DI': 3, 'TE': 3, 'U': 2, 'PA': 3, 'SA': 3, 'DA': 7}
A ['KA', 'TO', 'A', 'RA₂', 'QA', 'DU', 'PA', 'DA']
A prev_words {'KA': 5, 'TO': 4, 'A': 39, 'RA₂': 2, 'QA': 3, 'DU': 4, 'PA': 4, 'DA': 5}
A after_words {'KA': 4, 'TO': 4, 'A': 9, 'RA₂': 2, 'QA': 3, 'DU': 4, 'PA': 2, 'DA': 5}
MA ['KA', 'A', 'RA₂', 'PA', 'SA', 'MA']
MA prev_words {'KA': 4, 'A': 4, 'RA₂': 2, 'PA': 3, 'SA': 2, 'MA': 15}
MA after_words {'KA': 3, 'A': 4, 'RA₂': 2, 'PA': 2, 'SA': 2, 'MA': 2}
O not enough info [{'O': 3}]
KA ['PA']
KA prev_words {'PA': 4}
KA after_words {'PA': 2}
RA not enough info [{'RA': 3}]


# Look for cases where the same assignees appear together more than once