# Merge of all dataframes related to "person"

This notebook aims to merge all dataframes that are related to "person" in Maatbase database. Specifically:

- df_person
- df_titles
- df_person_working_activity
- df_attributes
- df_attributes_position
- df_name

Plus using mapping tables:
- df_person_title
- df_titles_general
- df_general_attributes

### database schema

![title](db_schema\Maatbase_schema_colored.svg)

### imports and load

In [1]:
import os
import numpy as np
import pandas as pd
import set_path
import supp.support_load as sl
from supp.support_merge import merge, group_to_list
from supp.support_save import save_df
from supp.support_analyzer import make_excel_analysis

In [2]:
dfs, dfs_name, dfs_export_date = sl.load_pickle()
iton, ntoi = sl.get_name_dicts(dfs_name)
dfs_export_date

Pickle database loaded.
C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\dfs_complete.pickle
Applied preprocessing: merge_on_jones
Applied preprocessing: remove_jones_duplicates


'2024-10-11'

In [3]:
df_person = dfs[ntoi['df_person']]
df_titles = dfs[ntoi['df_titles']]
df_person_working_activity = dfs[ntoi['df_person_working_activity']]
df_attributes = dfs[ntoi['df_attributes']]
df_attributes_position = dfs[ntoi['df_attributes_position']]
df_name = dfs[ntoi['df_name']]
df_person_title = dfs[ntoi['df_person_title']]
df_titles_general = dfs[ntoi['df_titles_general']]
df_general_attributes = dfs[ntoi['df_general_attributes']]
df_general = dfs[ntoi['df_general']]

### heads of dataframes

In [4]:
df_person.head(2)

Unnamed: 0,ID_person,sex,social_status,age_at_death,age_category,career_length
0,322,male,,,,
1,323,male,,,,


In [5]:
df_titles.head(2)

Unnamed: 0,category,completeness,general_work_classification,geographic_feature,ID_title,institution,Jones,notes_shift,period_evidence_1,period_evidence_2,...,reference_evidence_1,reference_evidence_2,reference_shift,specific_profession,specific_work_classification,title,translation_of_title,translation_1,translation_2,type
0,,,,,1,,,,,,...,,,,,,... iry cA-pr,,,,
1,,,legal matters,,2,Great Court,,,,,...,,,,legal matters,,... n Hwt-wrt,... of the great court/Hall of Justice,,,administrative title – legal matters


In [6]:
df_person_working_activity.head(2)

Unnamed: 0,ID_person,working_activity
62,381,household management
63,381,land management


In [7]:
df_attributes.head(2)

Unnamed: 0,ID_attribute,social_attribute
0,2.0,scepter
1,1.0,handkerchief


In [8]:
df_attributes_position.head(2)

Unnamed: 0,ID_position,position
0,1,false door
1,2,false door panel


In [9]:
df_name.head(2)

Unnamed: 0,ID_official_source,name,Gourdon,Ranke,Scheele-Schweitzer,name_ext,name_id
0,1,Abnfn,2/2 Ab.n.f-n(.j),,,Abnfn,0
1,2,Abdw,4/1,"I, 2/4; II, p. 153",192/18,Abdw,1


In [10]:
df_general.head()

Unnamed: 0,buried,complete_to_date,dynasty,find_number,ID_official,ID_person,king,locality,mentioned,nome,owner,shaft_main,site,uncertain,tomb_code_id,name_id
0,,,mid 5,,1,322,Nyuserre ?,Central Abusir,yes,1 LE,,,Abusir,,AC 8,0
1,,,late 5,,2,323,Djedkare?,Central Abusir,,1 LE,,,Abusir,,AC 3,1
2,,,late 5 or 6,,3,324,,Central Abusir,,1 LE,,,Abusir,,AC 2,1
3,,,late 5 – early 6,,4,325,,Central Abusir,,1 LE,,,Abusir,,AC 3,2
4,,,late 5,,5,326,Djedkare,Central Abusir,,1 LE,,,Abusir,,AC 2,3


### merge of atributes

In [11]:
print(f'shape={df_general_attributes.shape}, df_general_attributes shape')
# merge df_general_attributes and df_attributes
df_atributes_all = merge(df_general_attributes, df_attributes, on='ID_attribute')
print(f'shape={df_atributes_all.shape}, merge df_general_attributes and df_attributes')
# add merge df_attributes_position
df_atributes_all = merge(df_atributes_all, df_attributes_position, on='ID_position')
print(f'shape={df_atributes_all.shape}, merge df_atributes_all and df_attributes_position')
# group attributes and possitions into list
df_atributes_all = group_to_list(df_atributes_all, ['ID_person'])
print(f'shape={df_atributes_all.shape}, group into list according to ID_person')
df_atributes_all.head(10)

shape=(714, 3), df_general_attributes shape
shape=(714, 4), merge df_general_attributes and df_attributes
shape=(714, 5), merge df_atributes_all and df_attributes_position
shape=(334, 5), group into list according to ID_person


Unnamed: 0,ID_person,ID_attribute_list,ID_position_list,28_social_attribute_list,30_position_list
0,1,"[1.0, 2.0, 3.0, 7.0]",[],"[heart pendant, scepter, beard, handkerchief]",[]
1,2,"[17.0, 3.0, 15.0]",[],"[holding the father's staff, heart pendant, ho...",[]
2,4,[3.0],[],[heart pendant],[]
3,5,[3.0],[11.0],[heart pendant],[representative scene of tomb owner]
4,10,"[19.0, 5.0]",[11.0],"[elusive shape, cordon]",[representative scene of tomb owner]
5,19,[13.0],[],[worship gesture],[]
6,22,[1.0],[],[handkerchief],[]
7,40,"[9.0, 11.0, 4.0, 14.0]","[11.0, 5.0]","[staff, smelling a lotus flower , sandals, fla...","[representative scene of tomb owner, offering ..."
8,41,[13.0],[],[worship gesture],[]
9,42,"[1.0, 2.0]",[],"[scepter, handkerchief]",[]


### merge working activity

In [12]:
# print shape of df_person_working_activity
print(f'shape={df_person_working_activity.shape}, df_person_working_activity shape')
# group working activity into list
df_person_working_activity_all = group_to_list(df_person_working_activity, ['ID_person'])
print(f'shape={df_person_working_activity_all.shape}, group into list according to ID_person')
df_person_working_activity_all.head()

shape=(1711, 2), df_person_working_activity shape
shape=(968, 2), group into list according to ID_person


Unnamed: 0,ID_person,09_working_activity_list
0,1,"[head of royal works, body care in the royal p..."
1,2,"[assisting King during royal ceremonies, pries..."
2,4,"[body care in the royal palace, royal mortuary..."
3,5,"[body care in the royal palace, royal mortuary..."
4,6,"[priest(ess) of divine cult , prince(ss)]"


### merge of titles

In [13]:
# print shape of df_person_title map 
print(f'shape={df_person_title.shape}, df_person_title shape')
# remove duplicates
df_titles_all = df_person_title[['ID_person', 'ID_title']]
df_titles_all = df_titles_all.drop_duplicates()
print(f'shape={df_titles_all.shape}, df_titles_all removing duplicates')
# merge df_titles_all and df_titles
df_titles_all = merge(df_titles_all, df_titles, on='ID_title')
print(f'shape={df_titles_all.shape}, merge df_titles_all and df_titles')
# group titles into list
df_titles_all = group_to_list(df_titles_all, ['ID_person'])
print(f'shape={df_titles_all.shape}, group into list according to ID_person')
df_titles_all.head(2)

shape=(14182, 3), df_person_title shape
shape=(13695, 2), df_titles_all removing duplicates
shape=(13695, 25), merge df_titles_all and df_titles
shape=(3998, 25), group into list according to ID_person


Unnamed: 0,ID_person,ID_title_list,35_category_list,35_completeness_list,35_general_work_classification_list,35_geographic_feature_list,35_institution_list,35_Jones_list,35_notes_shift_list,35_period_evidence_1_list,...,35_reference_evidence_1_list,35_reference_evidence_2_list,35_reference_shift_list,35_specific_profession_list,35_specific_work_classification_list,35_title_list,35_translation_of_title_list,35_translation_1_list,35_translation_2_list,35_type_list
0,1,"[391.0, 139.0, 142.0, 143.0, 144.0, 145.0, 274...",[rank title],"[no, yes]","[religion, service, treasury, organization of ...","[Upper Egypt, Lybia]","[two houses of gold, treasury, cnwt-shrine, Ho...","[nn, 84/361, 44/231, 407–408/1500, 781/2848, 4...","[head of administration, The title frequently ...",[Middle Kingdom],...,"[Doxey 1998: 107, Moreno Garcia 2013: 8]",[],"[Moreno Garcia 2013: 8, Forshaw 2022: 59]","[palace administrator, participant in the fune...","[(non-)royal mortuary cult operation, royal fa...","[imy-rA prwy-nbw, Xry-Hbt, xrp aH, Hm-nTr Nxbt...","[khet-priest/attendant of Kha, great of censin...",[Elder of a shrine Doxey categorizes it as an...,[],"[epithet connected with afterworld, honorific ..."
1,2,"[161.0, 97.0, 74.0, 363.0, 362.0, 365.0, 79.0,...",[rank title],[no],"[religion, kingship]",[Upper Egypt],[],"[800/2922, nn, 246–247/896, 321–322/1183, 892/...",[The title frequently combined with zS mDAt-nT...,[Middle Kingdom],...,[Moreno Garcia 2013: 8],[],"[Moreno Garcia 2013: 8, Forshaw 2022: 59]","[participant in the funerary ceremonies, head ...","[royal ceremony, land management, king's court...","[cmr waty, zA.f cmcw, imy-rA 5maw, Xry-Hbt, zA...","[his eldest beloved son, sole companion, overs...","[The title became the synonym for ""mayor"", gov...",[],"[rank title, administrative title – land – pro..."


### merge df_general

In [14]:
# print shape of df_general
print(f'shape={df_general.shape}, df_general shape')
# group titles into list
df_general_all = group_to_list(df_general, ['ID_person'])
print(f'shape={df_general_all.shape}, group into list according to ID_person')

shape=(5419, 16), df_general shape
shape=(4962, 16), group into list according to ID_person


In [15]:
df_general_all.head()

Unnamed: 0,ID_person,00_buried_list,00_complete_to_date_list,00_dynasty_list,00_find_number_list,ID_official_list,00_king_list,00_locality_list,00_mentioned_list,00_nome_list,00_owner_list,00_shaft_main_list,00_site_list,00_uncertain_list,tomb_code_id_list,name_id_list
0,1,[],[11.04.2024],[mid 5],[E 3836 = P 48],"[2885, 1415, 236, 237, 238, 239, 240, 4652, 53...","[Neferirkare, Nyuserre]","[North Saqqara, north of Step Pyramid, Central...",[yes],[1 LE],[owner],[yes],"[Abusir, Saqqara]",[uncertain],"[AC 10, AC 22, AC 2, AC 4, AC 8, monument of K...","[562, 379, 172, 166]"
1,2,[],[],"[mid 5, late 5 – early 6, late 5]",[],"[233, 242, 665, 668]","[Nyuserre – Djedkare, Nyuserre]",[Central Abusir],[yes],[1 LE],[owner],[yes],[Abusir],[uncertain],"[AC 9, AC 8, AC 3]","[172, 174]"
2,4,[],[11.04.2024],[mid 5],[],"[489, 490, 491, 3245, 2456]",[Nyuserre],"[North Saqqara, east of Step Pyramid, Central ...",[yes],[1 LE],[owner],[yes],"[Abusir, Saqqara]",[uncertain],"[AC 8, mastaba of Nyankhkhnum and Khnumhotep]",[350]
3,5,[],[06.03.2024],[mid 5],[],"[296, 297, 2455, 295]",[Nyuserre],"[North Saqqara, east of Step Pyramid, Central ...",[yes],[1 LE],[owner],[yes],"[Abusir, Saqqara]",[],"[AC 8, mastaba of Nyankhkhnum and Khnumhotep]",[212]
4,6,[uncertain],[],[mid 5],[],"[1184, 4653, 444, 445, 446]",[Nyuserre],"[North Saqqara, north of Step Pyramid, Central...",[yes],[1 LE],[owner],[yes],"[Abusir, Saqqara]",[],"[AC 8, monument of Khaemwaset, AC 10]","[322, 562]"


### merge of all

In [16]:
# print original shape of df_person
print(f'shape={df_person.shape}, df_person shape')
# add working activity
df_person_all = merge(df_person, df_person_working_activity_all, on='ID_person')
print(f'shape={df_person_all.shape}, merge df_person and df_person_working_activity_all')
# add atributes and possitions
df_person_all = merge(df_person_all, df_atributes_all, on='ID_person')
print(f'shape={df_person_all.shape}, merge df_person_all and df_atributes_all')
# add titles
df_person_all = merge(df_person_all, df_titles_all, on='ID_person')
print(f'shape={df_person_all.shape}, merge df_person_all and df_titles_all')
# add df_general
df_person_all = merge(df_person_all, df_general_all, on='ID_person')
print(f'shape={df_person_all.shape}, merge df_person_all and df_general_all')

# show result
print(pd.Series(df_person_all.columns))
df_person_all.head()

shape=(4962, 6), df_person shape
shape=(4962, 7), merge df_person and df_person_working_activity_all
shape=(4962, 11), merge df_person_all and df_atributes_all
shape=(4962, 35), merge df_person_all and df_titles_all
shape=(4962, 50), merge df_person_all and df_general_all
0                                ID_person
1                                   08_sex
2                         08_social_status
3                          08_age_at_death
4                          08_age_category
5                         08_career_length
6                 09_working_activity_list
7                        ID_attribute_list
8                         ID_position_list
9                 28_social_attribute_list
10                        30_position_list
11                           ID_title_list
12                        35_category_list
13                    35_completeness_list
14     35_general_work_classification_list
15              35_geographic_feature_list
16                     35_institution_l

Unnamed: 0,ID_person,08_sex,08_social_status,08_age_at_death,08_age_category,08_career_length,09_working_activity_list,ID_attribute_list,ID_position_list,28_social_attribute_list,...,00_king_list,00_locality_list,00_mentioned_list,00_nome_list,00_owner_list,00_shaft_main_list,00_site_list,00_uncertain_list,tomb_code_id_list,name_id_list
0,322,male,,,,,,,,,...,[Nyuserre ?],[Central Abusir],[yes],[1 LE],[],[],[Abusir],[],[AC 8],[0]
1,323,male,,,,,,,,,...,[Djedkare?],[Central Abusir],[],[1 LE],[],[],[Abusir],[],[AC 3],[1]
2,324,male,,,,,,,,,...,[],[Central Abusir],[],[1 LE],[],[],[Abusir],[],[AC 2],[1]
3,325,male,,,,,,,,,...,[],[Central Abusir],[],[1 LE],[],[],[Abusir],[],[AC 3],[2]
4,326,male,,,,,,,,,...,[Djedkare],[Central Abusir],[],[1 LE],[],[],[Abusir],[],[AC 2],[3]


### save result

In [17]:
f_name = r'df_person_all'
save_df(df_person_all, f_name)
make_excel_analysis(df_person_all, f_name)

Dataframe saved into C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\df_person_all.csv
Excel analysis save into C:\Users\Stoja\OneDrive\Documents\diplomka\scr\excel_dfs_description\df_person_all.xlsx
