In [3]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [21]:
# %load get_roles_edited.py
import re

# ORACC_FILE = 'raw-data/p001.atf'
DREHEM_P_IDS_FILE = 'drehem_p_ids.txt'

NUM_TEXTS = 25
# things to keep:
#	set: (p_index)
#	dictionary: {p_index: transaction}
#	object transaction has p_index, source, receiver, 

complete_transaction_ls = list()
drehem_transaction_ls = list()

complete_drehem_p_sets = set()
p_sets_of_interest = set()
transaction_ls = list()

class Transaction:
	def __init__(self, p, line):
		self.p_index = p
		# can add date/place/etc.
		self.roles = {}
		# role name: name of person (ex. 'source': 'Turamdatan')
		self.people = set()

		self.line = line;

		self.year = None;
		

		self.ls_lines_containing_PN = list()
		# ls of lines containing PN

	def __str__(self):
		return 'P' + str(self.p_index) + '\nlines: ' + self.line 
		# + '\n\t' + str(self.people)
	def get_num_people(self):
		return len(self.people)

def get_p_index(line):
	# line of the form '&P100259 = ...': return '100259'
	return line.split(' ')[0][2:]


def get_drehem_p_ids():
	with open(DREHEM_P_IDS_FILE) as read_file:
		for line in read_file:
			complete_drehem_p_sets.add(line[:-1])
	return complete_drehem_p_sets

def collect_p_id_of_interest(file_name):
	get_drehem_p_ids();
	with open(file_name) as input_file:
		count = 0
		for line in input_file:
			line = line.strip()# remove \n
			if line.startswith('&P'):
				p_id = get_p_index(line);
				if p_id in complete_drehem_p_sets:
					p_sets_of_interest.add(p_id);
	# print( p_sets_of_interest)
	return p_sets_of_interest

def get_transactions(file_name, p_id_set_sort=None):
	
	with open(file_name) as input_file:
		currentTransaction = None;
		for line in input_file:
			line = line.strip() # remove \n
			if line.startswith('&P'):
				if currentTransaction is None:		
					p_index = get_p_index(line)
					currentTransaction = Transaction(p_index, line)
				else:
					if p_id_set_sort is None or currentTransaction.p_index in p_id_set_sort:
						transaction_ls.append(currentTransaction) # add the transaction to the list
					

					p_index = get_p_index(line)
					currentTransaction = Transaction(p_index, line) # start a new transaction
			else:
				currentTransaction.line += "\n" + line;

		# add the last transaction to the list
		if p_id_set_sort is None or currentTransaction.p_index in p_id_set_sort:
			transaction_ls.append(currentTransaction)
		
	# print(len(transaction_ls))
	# print(transaction_ls[-1])
	return transaction_ls


# with open(ORACC_FILE) as input_file:
# 	for line in input_file:
# 		line = line.strip()
# 		if not re.match(r'^&P|^#lem|^\d+\.|^@|^#|^\$|^\d+ʾ\.|^=:|^\s+$|^$', line):
# 			print(line);
		# if re.match(r'^&P|^#lem|^\d+\.|^\d+ʾ\.', line):
		# 	print(line)
		# elif re.match(r'|^\s+$|^$', line):
			#ignore
		# elif re.match(r'^@|^#|^\$|')
def clean_transaction(transaction):
	# change the whole transaction.line into
	# a list of important text, discarding unimportant lines
	# transaction.line = ["1. ~~ #lem: ~~" ... ]

	# print(transaction.p_index)
	searchObj = re.findall(r'(\d+ʾ\..*\n#lem:.*|\d+\..*\n#lem:.*)', transaction.line)
	
	transaction.line = searchObj


def get_PN(transaction):
	# get the list of lines containing PN --> transaction.ls_lines_containing_PN
	# get the set of PN --> transaction.people
	txt,translit = None, None
	for line in transaction.line:
		if "PN" in line:
			transaction.ls_lines_containing_PN.append(line)

			
	for line in transaction.ls_lines_containing_PN:
		txt,translit = line.split("\n")
		
		translit = translit.replace("#lem: ","")
		
		translit, txt = translit.split(";"), txt.split(" ")[1:]

		for index, word  in enumerate(translit):
		    if "PN" in word:
		        # print(translit, txt, index)

		        transaction.people.add(txt[index])
	# if len(transaction.people) == 0:
	# 	print(transaction.line,txt, translit, transaction.p_index)

	# print(transaction.people)


def process_files():
	#return a complete list of transactions out of all input files
	global p_sets_of_interest
	global transaction_ls
	global complete_transaction_ls
	global drehem_transaction_ls
	i = 1; # oracc file number


	
	while i <= 15:
		p_sets_of_interest = set()
		transaction_ls = list()
		if i < 10:
			ORACC_FILE = 'raw-data/p00'+str(i)+'.atf'
		else:
			ORACC_FILE = 'raw-data/p0'+str(i)+'.atf'
		collect_p_id_of_interest(ORACC_FILE);

		#get complete list of transactions
		get_transactions(ORACC_FILE);
		for trans in transaction_ls:		
			clean_transaction(trans)
			get_PN(trans)
		complete_transaction_ls += transaction_ls

		# get drehem list of transactions
		# TO CHANGE LATER (BAD IMPLEMENTATION; READING THE FILE TWICE)
		transaction_ls = list()
		get_transactions(ORACC_FILE,p_sets_of_interest);
		for trans in transaction_ls:		
			clean_transaction(trans)
			get_PN(trans)
		drehem_transaction_ls += transaction_ls


		print("Got transactions from "+ORACC_FILE)
		i+=1

	print("***FINISH***")
	print("***Total of ", len(complete_transaction_ls), " transactions.***")
	print("***Total of ", len(drehem_transaction_ls), " Drehem transactions.***")
	return complete_transaction_ls

def main():
	ls = process_files()
	no_PN_count = 0
	contain_ki_count = 0
	contain_subati_count = 0
	contain_ragaba_count = 0
	for trans in ls:
		if trans.get_num_people() == 0:
			no_PN_count+=1
		line = ''.join(trans.line)
		if "ki[place]" in line:
			contain_ki_count += 1
			
		if "šu ba-ti" in line:
			contain_subati_count += 1
		if "ra₂-gaba" in line:
			contain_ragaba_count += 1


	print(no_PN_count, " transactions do NOT have PN.")
	print(contain_ki_count, " transactions contain a word ki[place].")
	print(contain_subati_count, " transactions contain a word šu ba-ti.")
	print(contain_ragaba_count, " transactions contain a word ra₂-gaba.")

main()



Got transactions from raw-data/p001.atf
Got transactions from raw-data/p002.atf
Got transactions from raw-data/p003.atf
Got transactions from raw-data/p004.atf
Got transactions from raw-data/p005.atf
Got transactions from raw-data/p006.atf
Got transactions from raw-data/p007.atf
Got transactions from raw-data/p008.atf
Got transactions from raw-data/p009.atf
Got transactions from raw-data/p010.atf
Got transactions from raw-data/p011.atf
Got transactions from raw-data/p012.atf
Got transactions from raw-data/p013.atf
Got transactions from raw-data/p014.atf
Got transactions from raw-data/p015.atf
***FINISH***
***Total of  67499  transactions.***
***Total of  14594  Drehem transactions.***
3794  transactions do NOT have PN.
33241  transactions contain a word ki[place].
10993  transactions contain a word šu ba-ti.
590  transactions contain a word ra₂-gaba.


In [22]:
year_ls = list()
for transaction in drehem_transaction_ls:
    for line in transaction.line:
        if ("mu[year]" in line):
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
            txt = " ".join(txt[1:])
            transaction.year = txt
            year_ls.append(txt)
            
        

In [23]:
pd_year = pd.Series(year_ls)
pd_year.value_counts()

ša-aš-ru{ki} ba-hul                                                                          750
us₂-sa ki-maš{ki} ba-hul                                                                     575
hu-uh₂-nu-ri{ki} ba-hul                                                                      571
en eridu{ki} ba-hun                                                                          564
{d}amar-{d}suen lugal                                                                        559
en-unu₆-gal {d}inanna ba-hun                                                                 530
{d}amar-{d}suen lugal-e ur-bi₂-lum{ki} mu-hul                                                458
en-mah-gal-an-na en {d}nanna ba-hun                                                          394
ki-maš{ki} u₃ hu-ur₅-ti{ki} ba-hul                                                           303
en-unu₆-gal {d}inanna unu{ki} ba-hun                                                         298
{d}šu-{d}suen lugal           

### List of Roles

In [24]:
count = 0;

pninline = 0;
# print(len(drehem_transaction_ls))


for transaction in drehem_transaction_ls:
    for i in range(0,len(transaction.line)):
        line = transaction.line[i];
        # handle "giri" = intermediary
        if "giri₃" in line:
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
#             print(txt, translit)
            if ("giri₃" in txt and translit[txt.index("giri₃")] == "ŋiri[foot]") or ('[giri₃]' in txt and translit[txt.index('[giri₃]')] == "ŋiri[foot]"): # check if the right word
    #             PN can appear in the same line or the previous line
                if "PN" in line:
                    person = txt[translit.index("PN")]
                    role = ['intermediary'];
                    if person in transaction.roles:
                        transaction.roles[person] += role;
                    else:
                        transaction.roles[person] = role;

                elif i+1 < len(transaction.line): #looking at the nxt line
                    line = transaction.line[i+1];
                    txt,translit = line.split("\n")
                    translit = translit.replace("#lem: ","")
                    translit, txt = translit.split("; "), txt.split(" ")[1:]
                    if "PN" in translit and translit.index("PN") == 0:
                        if ("mu-kuₓ(DU)" not in line) and ("maškim" not in line) and ("i₃-dab" not in line) and ("šu ba-ti" not in line) and ("šu ba-an-ti" not in line) and ("ki[place]" not in line):
                            person = txt[translit.index("PN")]
                            role = ['intermediary'];
                            if person in transaction.roles:
                                transaction.roles[person] += role;
                            else:
                                transaction.roles[person] = role;
                        

        # mu-kuₓ(DU) = new owner
        if "mu-kuₓ(DU)" in line:
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
#             print(translit)
            if "PN" in line:
                person = txt[translit.index("PN")]
                role = ['new owner'];
                if person in transaction.roles:
                    transaction.roles[person] += role;
                else:
                    transaction.roles[person] = role;
#             elif "lugal" in line:
# #                 print(line)
#                 person = 'lugal'
#                 role = ['new owner']
#                 if person in transaction.roles:
#                     transaction.roles[person] += role;
#                 else:
#                     transaction.roles[person] = role;
                #DO NOT LOOK AT THE NEXT LINE with mu-ku
#             elif i+1 < len(transaction.line):
#                 line = transaction.line[i+1];
#                 txt,translit = line.split("\n")
#                 translit = translit.replace("#lem: ","")
#                 translit, txt = translit.split("; "), txt.split(" ")[1:]
#                 if "PN" in translit and translit.index("PN") == 0:
#                     person = txt[translit.index("PN")]
#                     role = ['new owner'];
#                     if person in transaction.roles:
#                         transaction.roles[person] += role;
#                     else:
#                         transaction.roles[person] = role;
#                 elif "lugal" in txt and txt.index("lugal") == 0:
#                     person = txt[translit.index("lugal")]
#                     role = ['new owner'];
#                     if person in transaction.roles:
#                         transaction.roles[person] += role;
#                     else:
#                         transaction.roles[person] = role;
        
        # i₃-dab == recipient
        if "i₃-dab" in line:
            
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
#             print(txt)
            if "PN" in line:
                    person = txt[translit.index("PN")]
                    role = ['recipient'];
                    if person in transaction.roles:
                        transaction.roles[person] += role;
                    else:
                        transaction.roles[person] = role;

            elif i != 0 : #looking at the previous line
                
                line = transaction.line[i-1];
                txt,translit = line.split("\n")
                translit = translit.replace("#lem: ","")
                translit, txt = translit.split("; "), txt.split(" ")[1:]
                if "PN" in translit and translit.index("PN") == (len(translit) - 1):
                    if ("mu-kuₓ(DU)" not in line) and ("maškim" not in line) and ("šu ba-ti" not in line) and ("šu ba-an-ti" not in line) and ("ki[place]" not in line):
                        person = txt[translit.index("PN")]
                        role = ['recipient'];
                        if person in transaction.roles:
                            transaction.roles[person] += role;
                        else:
                            transaction.roles[person] = role;
        
        # "šu ba-ti": "recipient", "šu ba-an-ti": "recipient"
        if ("šu ba-ti" in line) or ("šu ba-an-ti" in line):
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
            if "PN" in line:
#                 print(line)
                person = txt[translit.index("PN")]
                role = ['recipient'];
                if person in transaction.roles:
                    transaction.roles[person] += role;
                else:
                    transaction.roles[person] = role;
            elif i != 0 : #looking at the previous line
#                 print(line+" ---> subati line with no PN")
                line = transaction.line[i-1];
                txt,translit = line.split("\n")
                translit = translit.replace("#lem: ","")
                translit, txt = translit.split("; "), txt.split(" ")[1:]
                if "PN" in translit and translit.index("PN") == (len(translit) - 1):
                    if ("mu-kuₓ(DU)" not in line) and ("maškim" not in line) and ("i₃-dab" not in line) and ("ki[place]" not in line):
#                     print(line+"---> previous line containing PN. \n\n")
                        person = txt[translit.index("PN")]
                        role = ['recipient'];
                        if person in transaction.roles:
                            transaction.roles[person] += role;
                        else:
                            transaction.roles[person] = role;
            
        # maškim: representative
        if ("maškim" in line):
#             print(line)
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
            if ("PN" in line):
#                 print(line)
                person = txt[translit.index("PN")]
                role = ['representative'];
                if person in transaction.roles:
                    transaction.roles[person] += role;
                else:
                    transaction.roles[person] = role;
            # With Maskim, I DO NOT LOOK AT PREVIOUS LINE
        
        # ki 
        if "ki[place]" in line: 
            # ki PN-ta ba-zi: source
            if "-ta ba-zi" in line:
                txt,translit = line.split("\n")
                translit = translit.replace("#lem: ","")
                translit, txt = translit.split("; "), txt.split(" ")[1:]
                try:
                    the_index = txt.index("ba-zi")
                except ValueError:
                    for index in range(0,len(txt)):
                        if 'ba-zi' in txt[index]:
                            the_index = index
                
                PN_index = the_index - 1;
                
                person = txt[PN_index][:-3]
                role = ['source'];
                if person in transaction.roles:
                    transaction.roles[person] += role;
                else:
                    transaction.roles[person] = role;
                
            # ki PN-ta: source 
            elif ("-ta" in line) and ("PN" in line):
                txt,translit = line.split("\n")
                translit = translit.replace("#lem: ","")
                translit, txt = translit.split("; "), txt.split(" ")[1:]
                for word in txt:
                    if word[-3:] == "-ta":
                        person = word[:-3];
                        role = ['source'];
                        if person in transaction.roles:
                            transaction.roles[person] += role;
                        else:
                            transaction.roles[person] = role;
                        
        # ziga : source 
        if "ziga[expenditure]" in line:
            
            txt,translit = line.split("\n")
            translit = translit.replace("#lem: ","")
            translit, txt = translit.split("; "), txt.split(" ")[1:]
            if "PN" in line:
                person = txt[translit.index("PN")]
                role = ['source'];
                if person in transaction.roles:
                    transaction.roles[person] += role;
                else:
                    transaction.roles[person] = role;
            elif "ziga[expenditure]" in translit[-1] and (i+1 < len(transaction.line)):
                line = transaction.line[i+1];
                txt,translit = line.split("\n")
                translit = translit.replace("#lem: ","")
                translit, txt = translit.split("; "), txt.split(" ")[1:]
                if ("PN" in line) and (translit[0]=="PN"):
                    if ("mu-kuₓ(DU)" not in line) and ("maškim" not in line) and ("i₃-dab" not in line) and ("šu ba-ti" not in line) and ("šu ba-an-ti" not in line):
                        person = txt[0]
                        role = ['source'];
                        if person in transaction.roles:
                            transaction.roles[person] += role;
                        else:
                            transaction.roles[person] = role;
                        
                        

In [25]:
count = 0;
for transaction in drehem_transaction_ls:
    if len(transaction.roles) >0:
#         print(transaction.line)
        count+=1;
print("# transactions having at least one PN:")
print(count)


# transactions having at least one PN:
12079


In [26]:
count = 0;
for transaction in drehem_transaction_ls:
    if len(transaction.roles) > 1:
#         print(transaction.line)
        count+=1;
print("# transactions having more than one PN:")
print(count)

# transactions having more than one PN:
8277


In [27]:
count = 0;
for transaction in drehem_transaction_ls:
    for person in transaction.roles:
        rolelist = transaction.roles[person]
        if len(rolelist) > 1:
            transaction.roles[person]=list(set(rolelist))
            
for transaction in drehem_transaction_ls:
    for person in transaction.roles:
        rolelist = transaction.roles[person]
        if len(rolelist) > 1:
            if len(rolelist)==2 and ('recipient' in rolelist) and ('new owner' in rolelist):
                transaction.roles[person] = ['recipient']
#             elif len(rolelist)==2 and ('new owner' in rolelist) and ('representative' in rolelist):
#                 transaction.roles[person] = ['new owner']
#             elif ('intermediary' in rolelist) and ('representative' in rolelist):
#                 transaction.roles[person] = ['intermediary representative']
#             elif ('source' in rolelist) and ('recipient' in rolelist):
#                 print("\n".join(transaction.list))
  
    

In [28]:
ls = []
for transaction in drehem_transaction_ls:
    if len(transaction.roles) != 0:
        for name,role in transaction.roles.items():
            ls+=[[name, role, transaction.year,transaction.p_index]]
df = pd.DataFrame(data=ls)
df.index += 1
df.columns =["name", "role", "year", "p_index"]
df
df.to_csv('roles.csv')

In [29]:
df

Unnamed: 0,name,role,year,p_index
1,ab-ba-kal-la,[source],,100041
2,lu₂-dingir-ra,[source],ki-maš{ki} u₃ hu-ur₅-ti{ki} ba-hul,100189
3,ur-nigar{gar},[recipient],ki-maš{ki} u₃ hu-ur₅-ti{ki} ba-hul,100189
4,nu-i₃-da,[new owner],us₂-sa ki-maš{ki} ba-hul,100190
5,na-sa₆,[source],us₂-sa ki-maš{ki} ba-hul,100190
6,lu₂-dingir-ra,[source],[{d}]amar#-{d}suen lugal,100191
7,ur-nigar{gar},[recipient],[{d}]amar#-{d}suen lugal,100191
8,lu₂-dingir-ra,[source],{d}amar-{d}suen lugal,100211
9,ur-nigar{gar},[recipient],{d}amar-{d}suen lugal,100211
10,šu-{d}nin-šubur,[representative],en {d}nanna kar-zi-da ba-hun,100215


# Problem
1. Person appears multiple times having multiple roles?



* ex) ba-ba-an-še-en: line 3(new owner), reverse line 1(representative).
* ex) amar-šuba: reverse line 2(source), reverse line 4(recipient)

In [64]:


for transaction in drehem_transaction_ls:
    if transaction.p_index == "122165" or transaction.p_index == "125923":
        print(transaction.roles)
        print("\n".join(transaction.line))
        print("\n\n\n")

{'ba-ba-an-še-en': ['new owner'], 'ab-ba-sa₆-ga': ['source']}
1. 1(diš) sila₄ {d}nanna
#lem: n; sila[lamb]; DN
2. 1(diš) sila₄ {d}nin-gal
#lem: n; sila[lamb]; DN
3. mu-kuₓ(DU) ba-ba-an-še-en
#lem: mu.DU[delivery]; PN
4. 1(diš) sila₄ an
#lem: n; sila[lamb]; an[sky]
5. 1(diš) sila₄ {d}inanna
#lem: n; sila[lamb]; TN
6. mu-kuₓ(DU) en {d}inanna
#lem: mu.DU[delivery]; en[priest]; TN
1. ba-ba-an-še-en maškim
#lem: PN; maškim[administrator]
2. u₄ 1(u) 3(diš)-kam
#lem: ud[sun]; n; n
3. ki ab-ba-sa₆-ga-ta ba-zi
#lem: ki[place]; PN; PN
4. iti ezem-{d}šul-gi
#lem: itud[moon]; MN
5. mu en-unu₆-gal {d}inanna ba-hun
#lem: mu[year]; X; TN; huŋ[hire]
1. 4(diš)
#lem: n




{'amar-šuba': ['recipient', 'source'], 'dingir-dingir-su': ['recipient', 'source'], '[dingir]-dingir-su': ['recipient'], 'dingir]-dingir-su': ['source']}
1. [n] udu
#lem: n; udu[sheep]
2. [n] 9(diš) maš₂
#lem: n; n; maš[goat]
3. [amar]-šuba
#lem: PN
4. [n] 2(geš₂) 9(diš) maš₂
#lem: n; n; n; maš[goat]
5. [iti še]-sag₁₁-ku₅
#lem: itud[m