In [33]:
import csv                                                                                      
import json
import os
import re
from collections import Counter

def isNumber(word):
    if word >= u'\U00010100' and word <= u'\U0001013f':
        return True
    return False

place_names = {}
transaction_words = {}
transaction_signs = {}
numbers = {}
commodities = {}
from_suffix = {}
adjectives = {}
to_prefix = {}
fractions = {}
weights = {}
logograms = {}
words_in_linearb = {}
word_types = [(place_names, "060-place-names.txt", "place name", 0, 1),
        (transaction_words, "060-transaction-words.txt", "transaction term", 0, 1),
        (numbers, "050-numbers.csv", "number", 0, 1),
        (fractions, "065-fractions.txt", "fraction", 0, 0),
        (numbers, "065-fractions.txt", "number", 0, 0),
        (weights, "065-weights.txt", "weight", 0, 0),
        (logograms, "065-logograms.txt", "logogram", 0, 0),
        (commodities, "060-commodities.txt", "commodity", 0, 1),
        (from_suffix, "060-from-suffix.txt", "uses from suffix (-TE/-TI)", 0, 1),
        (adjectives, "060-adjectives.txt", "adjective for placename", 0, 1),
        (to_prefix, "060-to-prefix.txt", "uses to prefix (I-/J-)", 0, 1),
        (words_in_linearb, "135-identical-words-in-linearb.txt", "word also in linear b", 0, 1),
        (transaction_signs, "160-transaction-signs.txt", "transaction sign", 0, 1),
        ]
for word_type in word_types:
    dictionary = word_type[0]
    input_file = open("../../../LinearA-Original/" + word_type[1], 'r')
    while True:
        line = input_file.readline()
        if not line:
            break
        line_array = line.strip().split('\t')
        dictionary[line_array[word_type[3]]] = line_array[word_type[4]]

json_file = open('../../../LinearA-Original/150-metadata-template.js')
data = json.load(json_file)
inscriptions = data["metadata"]

def isHeadWord(word, original_word, no_of_words):
    if no_of_words == 1:
        return False
    if index:
        return False
    if original_word.startswith(u'\U0001076b'):
        return False
    if original_word == u'\U00010101':
        return False
    if word == "—":
        return False
    if word.isnumeric():
        return False
    return True;

def assignNumberToPreviousWord(word, word_tags, index, prev_word_tag, prev_original_word):
    if word not in numbers:
        return False
    if not index:
        return False
    if word == u'\U00010101':
        return False
    if word.startswith(u'\U0001076b'):
        return False
    cleaned_prev_original_word = prev_original_word.replace(u'\U0001076b', "")
    if cleaned_prev_original_word in numbers:
        return False
    if prev_original_word.endswith(u'\U0001076b'):
        return False
    if prev_original_word == "\n":
        return False
    return True

def shouldIncludeWord(word):
    if word in numbers:
        return False
    if u'\U00010101' in word:
        return False
    if word == "—":
        return False
    if word == "":
        return False
    return True

def wordRepeatedInInscription(word_tags, word):
    if not shouldIncludeWord(word):
        return False

    words = list(map(lambda x: x["word"].replace(u'\U0001076b', ""), word_tags))
    if words.count(word) > 1:
        return True
    return False

word_find_spots = {}
def addFindSpot(name, word):
    if not shouldIncludeWord(word):
        return
    find_spot = name[:2]
    if word not in word_find_spots:
        word_find_spots[word] = [find_spot]
        return
    word_find_spots[word].append(find_spot)

for inscription in inscriptions:
    for tags in inscription["tagsForWords"]:
        word = tags["word"]
        word = word.replace(u'\U0001076b', "")
        addFindSpot(inscription["name"], word)

locations = {}
locations["ZA"] ="Zakros"          
locations["PK"] ="Palaikastro"  
locations["PE"] ="Petras"  
locations["SY"] ="Syme" 
locations["PS"] ="Pseira" 
locations["MA" ] ="Malia" 
locations["AR"] ="Arkhalkhori" 
locations["IO"] ="Iouktas" 
locations["KN"] ="Knossos" 
locations["TY"] ="Tylissos"  
locations["PH"] ="Phaistos"  
locations["HT"] ="Haghia Triada"  
locations["AP"] ="Apodoulou"  
locations["KH"] ="Kharnia" 



# Classify the disbursement transactions where commodoties are relatively ordered.

In [34]:
# Main transaction tablets where commodities are ordered

transaction_tablets = ["ARKH3a", "ARKH3b", "HT2", "HT100", "HT101", 
        "HT114a", "HT116a", "HT116b", "HT12", "HT121", "HT125a", "HT125b",
        "HT129", "HT131a", "HT131b", "HT137", "HT139", "HT14", "HT18", "HT21",
        "HT23a", "HT23b", "HT27a", "HT27b", "HT28a", "HT28b", "HT30", "HT30",
        "HT32", "HT33", "HT34", "HT35", "HT44a", "HT50a", "HT58", "HT90",
        "HT91", "HT96b", "HT99a", "KH5", "KH8", "KH9", "KH9", "KH11", "KH21",
        "KH55", "KH61", "KNZb35", "TY3a", "ZA18a", "ZA6a", "ZA6b", "ZA11a"]

new_inscriptions = []
for old_inscription in inscriptions:
    inscription = old_inscription.copy()
    word_tags = inscription["tagsForWords"]
    if inscription["name"] not in transaction_tablets:
        continue

    inscription["transactions"] = []
    commodityID = 0
    transactionID = 0
    for index, word_tag in enumerate(word_tags):
        word = word_tag["transliteratedWord"]
        original_word = word_tag["word"]
        if "tags" in word_tag:
            del word_tag["tags"]

        tags = []
        if word == "\n":
            continue

        if original_word == u'\U0001076b':
            tags.append("lacuna")
        if original_word.startswith(u'\U0001076b'):
            tags.append("lacuna at start")
        if original_word.endswith(u'\U0001076b'):
            tags.append("lacuna at end")
        if u'\U00010101' in original_word:
            tags.append("word separator")
        if original_word == "—":
            tags.append("dividing line")

        cleaned_word = word.replace(u'\U0001076b', "")
        cleaned_original_word = original_word.replace(u'\U0001076b', "")
        for word_type in word_types:
            dictionary = word_type[0]
            annotation = word_type[2]
            if cleaned_word in dictionary or cleaned_original_word in dictionary:
                if not annotation in tags:
                    tags.append(annotation)

        if (len(cleaned_word) > 1 and not "word separator" in tags
            and word != '—' and word !=  u'\U0001076b'
            and not 'logogram' in tags
            and not 'commodity' in tags
            and not "number" in tags and not "fraction" in tags):
            tags.append("word")

        prev_word_tag = word_tags[index - 1]
        prev_original_word = prev_word_tag["word"]
        if assignNumberToPreviousWord(cleaned_original_word, word_tags, index,
                prev_word_tag, prev_original_word):
            commodityID += 1
            prev_word_tag["description"] = "commodity"
            prev_word_tag["commodityID"] = commodityID
        if "number" in tags:
            word_tag["commodityID"] = commodityID
            word_tag["description"] = "quantity"

        if "word" in tags:
            word_tag["description"] = "recipient"
            commodityID = 0
            transactionID += 1
        full_transaction_id = inscription["name"] + '-' + str(transactionID)
        word_tag["transactionID"] = full_transaction_id
        if full_transaction_id not in inscription["transactions"]:
            transaction_entry = {"description" : "sender",
                                 "transliteratedWord" : locations[inscription["name"][:2]] + " Magazine",
                                "transactionID" : full_transaction_id}
            if transaction_entry not in inscription["transactions"]:
                inscription["transactions"].append(transaction_entry)

    inscription["words"] = inscription.pop("tagsForWords")
    output_file = open(inscription["name"] + ".js", "w")
    output_file.write(json.dumps(inscription, sort_keys=True, indent=4, ensure_ascii=False))
    new_inscriptions.append(inscription)

transactions = {}
transactions["disbursement-transactions"] =  new_inscriptions

#output_file = open("040-disbursement-transactions.js", "w")
#output_file.write(json.dumps(transactions, sort_keys=True, indent=4, ensure_ascii=False))
#print(json.dumps(transactions, sort_keys=True, indent=4, ensure_ascii=False))


# Find some candidate tablets meeting certain patterns

Here we're looking for patterns where the quantity is assigned to the recipient rather than the commodity.


In [5]:
# Find some candidate tablets meeting certain patterns

transaction_tablets = ["ARKH3a", "ARKH3b", "HT2", "HT100", "HT101", 
        "HT114a", "HT116a", "HT116b", "HT12", "HT121", "HT125a", "HT125b",
        "HT129", "HT131a", "HT131b", "HT137", "HT139", "HT14", "HT18", "HT21",
        "HT23a", "HT23b", "HT27a", "HT27b", "HT28a", "HT28b", "HT30", "HT30",
        "HT32", "HT33", "HT34", "HT35", "HT44a", "HT50a", "HT58", "HT90",
        "HT91", "HT96b", "HT99a", "KH5", "KH8", "KH9", "KH9", "KH11", "KH21",
        "KH55", "KH61", "KNZb35", "TY3a", "ZA18a", "ZA6a", "ZA6b", "ZA11a"]


def wordsContainPattern(current_pattern):
    patterns = [
                ["head word", "commodity", "word", "number", "word", "number"],
                ["transaction term", "commodity", "word", "number", "word", "number"],
                ["word", "word", "word", "number", "word", "number"],
                ["word", "word", "commodity", "number"],
                ["word", "logogram", "word", "number"],
                ["word", "logogram", "number", "logogram", "number"],
                ["word", "number", "word", "number", "word", "number"],
                ["word", "number", "word", "number", "number", "word", "number"],
                ["word", "number", "word", "number", "number", "word", "number"],
                ["logogram", "number", "logogram", "number"],
               ]
    to_skip = ["word separator"]
    for pattern in patterns:
        matching_pattern = []
        for tags in current_pattern:
            if "word separator" in tags:
                continue
            index = len(matching_pattern)
            if pattern[index] in tags:
                matching_pattern.append(pattern[index])
            else:
                matching_pattern = []
            if len(matching_pattern) == len(pattern):
                return True
    return False

new_inscriptions = []
for old_inscription in inscriptions:
    inscription = old_inscription.copy()
    word_tags = inscription["tagsForWords"]
    
    # ignore tablets we've already covered
    if inscription["name"] in transaction_tablets:
        continue
        
    inscription["transactions"] = {}
    commodityID = 0
    transactionID = 0
    current_pattern = []
    
    for index, word_tag in enumerate(word_tags):
        word = word_tag["transliteratedWord"]
        original_word = word_tag["word"]
        if "tags" in word_tag:
            del word_tag["tags"]

        tags = []
        if word == "\n":
            continue

        if original_word == u'\U0001076b':
            tags.append("lacuna")
        if original_word.startswith(u'\U0001076b'):
            tags.append("lacuna at start")
        if original_word.endswith(u'\U0001076b'):
            tags.append("lacuna at end")
        if u'\U00010101' in original_word:
            tags.append("word separator")
        if original_word == "—":
            tags.append("dividing line")

        cleaned_word = word.replace(u'\U0001076b', "")
        cleaned_original_word = original_word.replace(u'\U0001076b', "")
        for word_type in word_types:
            dictionary = word_type[0]
            annotation = word_type[2]
            if cleaned_word in dictionary or cleaned_original_word in dictionary:
                if not annotation in tags:
                    tags.append(annotation)

        if (len(cleaned_word) > 1 and not "word separator" in tags
            and word != '—' and word !=  u'\U0001076b'
            and not 'logogram' in tags
            and not 'commodity' in tags
            and not "number" in tags and not "fraction" in tags):
            tags.append("word")

        prev_word_tag = word_tags[index - 1]
        prev_original_word = prev_word_tag["word"]
        if assignNumberToPreviousWord(cleaned_original_word, word_tags, index,
                prev_word_tag, prev_original_word):
            prev_word_tag["description"] = "recipient"
            prev_word_tag["commodityID"] = commodityID
        if "number" in tags:
            word_tag["commodityID"] = commodityID
            word_tag["description"] = "quantity"

        if "commodity" in tags:
            commodityID += 1
            word_tag["commodityID"] = commodityID
            word_tag["description"] = "commodity"

        if "word" in tags:
            word_tag["description"] = "sender"
            transactionID += 1
        full_transaction_id = inscription["name"] + '-' + str(transactionID)
        word_tag["transactionID"] = full_transaction_id
        current_pattern += [tags]
  
    inscription["words"] = inscription.pop("tagsForWords")
    
    if not wordsContainPattern(current_pattern):
        continue
    #print(json.dumps(inscription, sort_keys=True, indent=4, ensure_ascii=False))
    if os.path.isfile("./final/" + inscription["name"] + '.js'):
        print(inscription["name"], "matches but file already exists")
        continue
    if os.path.isfile("./ignore/" + inscription["name"] + '.js'):
        print(inscription["name"], "matches but file already exists")
        continue
    print(inscription["name"], "written")

    output_file = open(inscription["name"] + ".js", "w")
    output_file.write(json.dumps(inscription, sort_keys=True, indent=4, ensure_ascii=False))
    output_file.close()
    


HT1 matches but file already exists
HT3 matches but file already exists
HT6a matches but file already exists
HT6b matches but file already exists
HT7a matches but file already exists
HT8a matches but file already exists
HT8b matches but file already exists
HT9a matches but file already exists
HT9b matches but file already exists
HT10a matches but file already exists
HT10b matches but file already exists
HT11a written
HT11b written
HT13 matches but file already exists
HT15 matches but file already exists
HT16 written
HT17 matches but file already exists
HT19 matches but file already exists
HT20 written
HT24a written
HT25a matches but file already exists
HT25b written
HT29 matches but file already exists
HT31 matches but file already exists
HT38 written
HT39 written
HT40 matches but file already exists
HT42+59 matches but file already exists
HT45b written
HT49a written
HT51a matches but file already exists
HT60 written
HT69 matches but file already exists
HT82 written
HT85a matches but f

## Create templates for some missing tablets

In [44]:
# Find some candidate tablets meeting certain patterns

transaction_tablets = ['ZA14', 'HT127b', 'HT146', 'HT25a', 'HT3', 'HT39', 'HT63',
                       'HT98a', 'ZA20', 'ZA7a', 'ZA10a', 'ZA10b', 'PE2', 'HT130',
                       'HT24b', 'TY2', 'HT105', 'HT123+124a', 'HT132', 'HT20', 'ZA1a',
                       'ZA9', 'ZA15b', 'KH6', 'KH7a', 'KH7b', 'TY3b', 'ARKH5', 'KH4',
                       'HT31', 'HT43', 'PE1', 'ZA11b', 'HT16', 'HT24a', 'HT123+124b', 
                       'HT7b', 'HT26a', 'HT11b', 'HT103', 'HT110a', 'HT26b', 'HT36',
                       'HT62+73', 'HT89', 'HT93a', 'HT140', 'HT86b']


new_inscriptions = []
for old_inscription in inscriptions:
    inscription = old_inscription.copy()
    word_tags = inscription["tagsForWords"]
    
    # ignore tablets we're not interested in
    if inscription["name"] not in transaction_tablets:
        continue
        
    inscription["transactions"] = []
    commodityID = 0
    transactionID = 0
    current_pattern = []
    
    for index, word_tag in enumerate(word_tags):
        word = word_tag["transliteratedWord"]
        original_word = word_tag["word"]
        if "tags" in word_tag:
            del word_tag["tags"]

        tags = []
        if word == "\n":
            continue

        if original_word == u'\U0001076b':
            tags.append("lacuna")
        if original_word.startswith(u'\U0001076b'):
            tags.append("lacuna at start")
        if original_word.endswith(u'\U0001076b'):
            tags.append("lacuna at end")
        if u'\U00010101' in original_word:
            tags.append("word separator")
        if original_word == "—":
            tags.append("dividing line")

        cleaned_word = word.replace(u'\U0001076b', "")
        cleaned_original_word = original_word.replace(u'\U0001076b', "")
        for word_type in word_types:
            dictionary = word_type[0]
            annotation = word_type[2]
            if cleaned_word in dictionary or cleaned_original_word in dictionary:
                if not annotation in tags:
                    tags.append(annotation)

        if (len(cleaned_word) > 1 and not "word separator" in tags
            and word != '—' and word !=  u'\U0001076b'
            and not 'logogram' in tags
            and not 'commodity' in tags
            and not "number" in tags and not "fraction" in tags):
            tags.append("word")

        prev_word_tag = word_tags[index - 1]
        prev_original_word = prev_word_tag["word"]
        if assignNumberToPreviousWord(cleaned_original_word, word_tags, index,
                prev_word_tag, prev_original_word):
            prev_word_tag["description"] = "recipient"
            prev_word_tag["commodityID"] = commodityID
        if "number" in tags:
            word_tag["commodityID"] = commodityID
            word_tag["description"] = "quantity"

        if "commodity" in tags:
            commodityID += 1
            word_tag["commodityID"] = commodityID
            word_tag["description"] = "commodity"

        if "word" in tags:
            word_tag["description"] = "sender"
            transactionID += 1
        full_transaction_id = inscription["name"] + '-' + str(transactionID)
        word_tag["transactionID"] = full_transaction_id
        current_pattern += [tags]
  
    inscription["words"] = inscription.pop("tagsForWords")
    
    #print(json.dumps(inscription, sort_keys=True, indent=4, ensure_ascii=False))
    if os.path.isfile("./final/" + inscription["name"] + '.js'):
        print(inscription["name"], "matches but file already exists")
        continue
    if os.path.isfile("./ignore/" + inscription["name"] + '.js'):
        print(inscription["name"], "matches but file already exists")
        continue
    print(inscription["name"], "written")

    output_file = open(inscription["name"] + ".js", "w")
    output_file.write(json.dumps(inscription, sort_keys=True, indent=4, ensure_ascii=False))
    output_file.close()
    


HT3 matches but file already exists
HT7b written
HT11b written
HT16 written
HT20 written
HT24a written
HT24b written
HT25a matches but file already exists
HT26a written
HT26b written
HT31 matches but file already exists
HT36 written
HT39 matches but file already exists
HT43 written
HT62+73 written
HT63 written
HT86b written
HT89 written
HT93a written
HT98a written
HT103 written
HT105 written
HT110a written
HT123+124a written
HT123+124b written
HT127b written
HT130 matches but file already exists
HT132 written
HT140 matches but file already exists
HT146 written
KH4 matches but file already exists
KH6 matches but file already exists
KH7a written
KH7b written
PE1 written
PE2 written
TY2 written
TY3b written
ZA1a written
ZA7a written
ZA9 written
ZA10a written
ZA10b written
ZA11b written
ZA14 matches but file already exists
ZA15b written
ZA20 written
ARKH5 written


In [41]:

# Write out a template file for the selected inscription
name = "HT146"
output_file = open(name + ".js", "w")
output_file.write(json.dumps([{"name": name,
                              "transactions":[],
                              "words": x["tagsForWords"] }
                              for x in inscriptions if x["name"] == name]
                             , sort_keys=True, indent=4, ensure_ascii=False))
output_file.close()




# Write out the transaction files to the site directory

In [50]:
import os

inputdir = "./final/"
output_file = open("../transactions.js", "w")
output_file.write("var transactions = [")
for subdir, dirs, files in os.walk(inputdir):
    for file in files:
        if file[-2:] != "js":
            continue
        json_file = open(subdir + os.sep + file).read()
        output_file.write(json_file)
        output_file.write(',')
output_file.write("];")
output_file.close()
