# Merge of all dataframes related to "tomb"

This notebook aims to merge all dataframes that are related to "tomb" in Maatbase database. Specifically:

- df_tomb
- df_tomb_chapel
- df_tomb_osiris
- df_tomb_statue_material
- df_tomb_dim
- df_tomb_king_name

### database schema

![title](db_schema\Maatbase_schema_colored.svg)

### imports and load

In [1]:
import os
import numpy as np
import pandas as pd
import set_path
import supp.support_load as sl
from supp.support_merge import merge, group_to_list, merge_get_prefix, merge_add_prefix
from supp.support_save import save_df
from supp.support_analyzer import make_excel_analysis

In [2]:
dfs, dfs_name, dfs_export_date = sl.load_pickle()
iton, ntoi = sl.get_name_dicts(dfs_name)
dfs_export_date

Pickle database loaded.


'2024-10-11'

In [3]:
df_tomb = dfs[ntoi['df_tomb']]
df_tomb_chapel = dfs[ntoi['df_tomb_chapel']]
df_tomb_osiris = dfs[ntoi['df_tomb_osiris']]
df_tomb_statue_material = dfs[ntoi['df_tomb_statue_material']]
df_tomb_dim = dfs[ntoi['df_tomb_dim']]
df_tomb_king_name = dfs[ntoi['df_tomb_king_name']]

### heads of dataframes

In [4]:
df_tomb.head(2)

Unnamed: 0,tomb_code,tomb_type,provenance,excavator,excavation_year,spacial_distribution,number_of_shafts,label_of_shaft,type_of_decoration,ch_workmanship,ch_false_door,presence_of_wife,embracing,statues,curiosity,servant_statues,A40_great_god,tomb_code_id,catalogized
1,AC 3,pyramid complex,Raneferef's papyrus archive,,,,,,,,,,,,,,,AC 3,True
7,AC 14,pyramid complex,papyrus archive of Khentkaus,,,,,,,,,,,,,,,AC 14,True


In [5]:
df_tomb_chapel.head(2)

Unnamed: 0,tomb_code_id,chapel_dimensions,chapel_area,chapel_dimension_certainty,length,width
0,tomb of Nefer,unknown,,,unknown,unknown
1,AC 17,6.9×0.8+1.0×1.3,6.65,,6.9,0.8


In [6]:
df_tomb_osiris.head(2)

Unnamed: 0,tomb_code_id,Osiris
0,AC 17,epithet
1,AS 7,epithet


In [7]:
df_tomb_statue_material.head(2)

Unnamed: 0,tomb_code_id,statue_material
0,G 2150,travertine
1,AS 68d,limestone


In [8]:
df_tomb_dim.head(2)

Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
0,tomb of Nefer,unknown,,,unknown,unknown
1,AC 17,11.7×7.4,,86.58,11.7,7.4


In [9]:
df_tomb_king_name.head(2)

Unnamed: 0,ID_official,tomb_code_id,source,dynasty
0,4,AC 3,personal name,V.5
1,16,AC 2,personal name,V.3


### preparing df_tomb_chapel for merge

#### removing invalid rows from df_tomb_chapel

In [10]:
df_tomb_chapel.shape

(347, 6)

In [11]:
# invalid values
invalid_values = [np.nan, pd.NA, 'unknown', '×', 'nn', 'no superstructure', 'unobservable', 'unexcavated', 'undetectable', 'destroyed', 'unobesrvable']
# invalid tomb recors
# 'No. 24 = D 38' has only chapel_area value
# 'No. 39 = C 16' has only length value
tomb_code_id_to_omit = ['No. 24 = D 38', 'No. 39 = C 16', 'G 4461']

In [12]:
# show tombs with issues
df_tomb_chapel[df_tomb_chapel['tomb_code_id'].isin(tomb_code_id_to_omit + ['H 2 = LS 17'])]

Unnamed: 0,tomb_code_id,chapel_dimensions,chapel_area,chapel_dimension_certainty,length,width
114,No. 24 = D 38,unobservable,2.05,ca,unobservable,unobservable
143,H 2 = LS 17,13.50×5.60?,75.6,,13.5,
170,No. 39 = C 16,4.35×?,,,4.35,
305,G 4461,"S 2044,S 2045, S 2046, S 2047",2.44,,,2044.0
306,G 4461,"S 2044,S 2045, S 2046, S 2047",2.44,,,2045.0
307,G 4461,"S 2044,S 2045, S 2046, S 2047",2.44,,,


In [13]:
# get all columns except tomb_code_id
columns = [col for col in df_tomb_chapel if col != 'tomb_code_id']

# remove rows where all values are in invalid_values
mask = df_tomb_chapel[columns].isin(invalid_values).all(axis=1)
df_tomb_chapel_v2 = df_tomb_chapel[~mask]

# remove insufficiently described tombs
mask2 = df_tomb_chapel_v2['tomb_code_id'].isin(tomb_code_id_to_omit)
df_tomb_chapel_v2 = df_tomb_chapel_v2[~mask2]

# tomb 'H 2 = LS 17' has potentionaly known width
df_tomb_chapel_v2.loc[df_tomb_chapel_v2['tomb_code_id']=='H 2 = LS 17', 'width'] = 5.6

df_tomb_chapel_v2.shape

(271, 6)

In [14]:
# removed rows
print(df_tomb_chapel[mask].shape)
df_tomb_chapel[mask][:10]

(71, 6)


Unnamed: 0,tomb_code_id,chapel_dimensions,chapel_area,chapel_dimension_certainty,length,width
0,tomb of Nefer,unknown,,,unknown,unknown
3,AC 1,unknown,,,unknown,unknown
7,AC 2,unknown,,,unknown,unknown
8,AC 4,unknown,,,unknown,unknown
15,nn 0,unknown,,,unknown,unknown
16,nn 1,unknown,,,unknown,unknown
17,tomb of Imem,unknown,,,unknown,unknown
22,nn 2,unknown,,,unknown,unknown
29,G 2086,×,,,,
37,"AS 68, shaft 4",nn,,,nn,nn


#### recalculate area

In [15]:
df_tomb_chapel_v2.head()

Unnamed: 0,tomb_code_id,chapel_dimensions,chapel_area,chapel_dimension_certainty,length,width
1,AC 17,6.9×0.8+1.0×1.3,6.65,,6.9,0.8
2,AC 17,6.9×0.8+1.0×1.3,6.65,,1.0,1.3
4,AC 5,4.7×2.1,9.87,,4.7,2.1
5,AC 8,5.24×3.50,18.34,,5.24,3.5
6,AC 9,5.26×2.12,11.15,,5.26,2.12


In [16]:
df_tomb_chapel_v2 = df_tomb_chapel_v2[['tomb_code_id', 'length', 'width']]
# recalculate area
df_tomb_chapel_v2['chapel_area_total'] = df_tomb_chapel_v2.apply(lambda x: round(x['length']*x['width'], 2), axis=1)
# calculate sum of area for aech tomb
df_tomb_chapel_v2['chapel_area_total_sum'] = df_tomb_chapel_v2.groupby('tomb_code_id')['chapel_area_total'].transform('sum')
# calculate number of chaper per tomb
df_tomb_chapel_v2['n_tomb_chapel'] = df_tomb_chapel_v2.groupby('tomb_code_id')['chapel_area_total'].transform('size')

In [17]:
df_tomb_chapel_v2.head()

Unnamed: 0,tomb_code_id,length,width,chapel_area_total,chapel_area_total_sum,n_tomb_chapel
1,AC 17,6.9,0.8,5.52,6.82,2
2,AC 17,1.0,1.3,1.3,6.82,2
4,AC 5,4.7,2.1,9.87,16.37,2
5,AC 8,5.24,3.5,18.34,18.34,1
6,AC 9,5.26,2.12,11.15,11.15,1


#### add prefix and columns convert to list

In [18]:
# add prefix
prefix = merge_get_prefix(df_tomb_chapel)
df_tomb_chapel_v2 = merge_add_prefix(df_tomb_chapel_v2, prefix)

In [19]:
df_tomb_chapel_v2.head()

Unnamed: 0,tomb_code_id,03_length,03_width,03_chapel_area_total,03_chapel_area_total_sum,03_n_tomb_chapel
1,AC 17,6.9,0.8,5.52,6.82,2
2,AC 17,1.0,1.3,1.3,6.82,2
4,AC 5,4.7,2.1,9.87,16.37,2
5,AC 8,5.24,3.5,18.34,18.34,1
6,AC 9,5.26,2.12,11.15,11.15,1


In [20]:
# duplicates will be removed outomatecali in next step
df_tomb_chapel_v2[df_tomb_chapel_v2.duplicated(keep=False)]

Unnamed: 0,tomb_code_id,03_length,03_width,03_chapel_area_total,03_chapel_area_total_sum,03_n_tomb_chapel
102,G 8974,10.0,1.0,10.0,20.0,2
103,G 8974,10.0,1.0,10.0,20.0,2
373,mastaba of Remni,4.3,1.2,5.16,10.32,2
374,mastaba of Remni,4.3,1.2,5.16,10.32,2


In [21]:
print(f'shape={df_tomb_chapel_v2.shape}, df_tomb_chapel_v2 shape')
# group attributes and possitions into list
df_tomb_chapel_all = group_to_list(df_tomb_chapel_v2, ['tomb_code_id'])
print(f'shape={df_tomb_chapel_all.shape}, group into list according to tomb_code_id')
df_tomb_chapel_all.head()

shape=(271, 6), df_tomb_chapel_v2 shape
shape=(243, 6), group into list according to tomb_code_id


Unnamed: 0,tomb_code_id,03_length_list,03_width_list,03_chapel_area_total_list,03_chapel_area_total_sum_list,03_n_tomb_chapel_list
0,AC 15,"[2.78, 3.5]","[1.56, 1.59]","[4.42, 5.46]",[9.879999999999999],[2]
1,AC 17,"[1.0, 6.9]","[0.8, 1.3]","[1.3, 5.52]",[6.819999999999999],[2]
2,AC 18,[2.4],[1.6],[3.84],[3.84],[1]
3,AC 19,[2.3],[1.4],[3.22],[3.22],[1]
4,AC 22,[11.4],[21.1],[240.54],[240.54],[1]


### preparing df_tomb_osiris for merge

In [22]:
df_tomb_osiris_v2 = df_tomb_osiris.drop_duplicates()

In [23]:
print(df_tomb_osiris.shape)
print(df_tomb_osiris_v2.shape)

(200, 2)
(180, 2)


In [24]:
df_tomb_osiris_v2.head()

Unnamed: 0,tomb_code_id,Osiris
0,AC 17,epithet
1,AS 7,epithet
2,AC 15,offering formula
3,AC 4,epithet
4,nn 0,offering formula


In [25]:
df_tomb_osiris_v2['Osiris'].value_counts()

Osiris
offering formula    121
epithet              58
title                 1
Name: count, dtype: int64

In [26]:
df_tomb_osiris_v2['Osiris'].isna().sum()

0

In [27]:
df_tomb_osiris_v2['tomb_code_id'].duplicated().sum()

35

In [28]:
# add prefix
prefix = merge_get_prefix(df_tomb_osiris)
df_tomb_osiris_v2 = merge_add_prefix(df_tomb_osiris_v2, prefix)

In [29]:
print(f'shape={df_tomb_osiris.shape}, df_tomb_osiris shape')
print(f'shape={df_tomb_osiris_v2.shape}, df_tomb_osiris_v2 shape')
# group attributes and possitions into list
df_tomb_osiris_all = group_to_list(df_tomb_osiris_v2, ['tomb_code_id'])
print(f'shape={df_tomb_osiris_all.shape}, group into list according to tomb_code_id')
df_tomb_osiris_all.head()

shape=(200, 2), df_tomb_osiris shape
shape=(180, 2), df_tomb_osiris_v2 shape
shape=(145, 2), group into list according to tomb_code_id


Unnamed: 0,tomb_code_id,04_Osiris_list
0,AC 15,[offering formula]
1,AC 17,[epithet]
2,AC 4,[epithet]
3,AS 16,"[epithet, offering formula]"
4,AS 19,[epithet]


### preparing df_tomb_statue_material for merge

In [30]:
df_tomb_statue_material_v2 = df_tomb_statue_material.drop_duplicates()

In [31]:
df_tomb_statue_material_v2.shape

(40, 2)

In [32]:
df_tomb_statue_material_v2.head()

Unnamed: 0,tomb_code_id,statue_material
0,G 2150,travertine
1,AS 68d,limestone
3,G 5080,travertine
4,G 5080,limestone
5,G 5080,granite


In [33]:
df_tomb_statue_material_v2['statue_material'].value_counts()

statue_material
limestone     22
travertine     6
wood           6
granite        5
diorite        1
Name: count, dtype: int64

In [34]:
df_tomb_statue_material_v2['statue_material'].isna().sum()

0

In [35]:
df_tomb_statue_material_v2['tomb_code_id'].duplicated().sum()

8

In [36]:
# add prefix
prefix = merge_get_prefix(df_tomb_statue_material)
df_tomb_statue_material_v2 = merge_add_prefix(df_tomb_statue_material_v2, prefix)

In [37]:
print(f'shape={df_tomb_statue_material.shape}, df_tomb_statue_material shape')
print(f'shape={df_tomb_statue_material_v2.shape}, df_tomb_statue_material_v2 shape')
# group attributes and possitions into list
df_tomb_statue_material_all = group_to_list(df_tomb_statue_material_v2, ['tomb_code_id'])
print(f'shape={df_tomb_statue_material_all.shape}, group into list according to tomb_code_id')
df_tomb_statue_material_all.head()

shape=(45, 2), df_tomb_statue_material shape
shape=(40, 2), df_tomb_statue_material_v2 shape
shape=(32, 2), group into list according to tomb_code_id


Unnamed: 0,tomb_code_id,05_statue_material_list
0,AS 104,[limestone]
1,AS 68d,[limestone]
2,AS 98,[wood]
3,D 208,[granite]
4,D 47,"[diorite , granite, limestone]"


### preparing df_tomb_dim for merge

In [38]:
print(df_tomb_dim.shape)
df_tomb_dim.head()

(354, 6)


Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
0,tomb of Nefer,unknown,,,unknown,unknown
1,AC 17,11.7×7.4,,86.58,11.7,7.4
2,AC 1,unknown,,,unknown,unknown
3,AC 5,16×18.3,,292.8,16.0,18.3
4,AC 8,42.24×56.24,,2375.6,42.24,56.24


In [39]:
df_tomb_dim[df_tomb_dim['tomb_code_id'].duplicated(keep=False)].sort_values(['tomb_code_id', 'tomb_area', 'length'])

Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
108,G 5230 = L 40,41.25×12.78+11.38×10.90,,651.22,11.38,10.9
107,G 5230 = L 40,41.25×12.78+11.38×10.90,,651.22,41.25,12.78
173,G 5270,25.3×13.0+4.5×3.10,ca,342.85,4.5,3.1
172,G 5270,25.3×13.0+4.5×3.10,ca,342.85,25.3,13.0
70,G 6042,11.7×5.7+12.5×2.5,,97.94,11.7,5.7
71,G 6042,11.7×5.7+12.5×2.5,,97.94,12.5,2.5
75,G 8882,9.3×5.36+20.8×23.0,ca,528.25,9.3,5.36
76,G 8882,9.3×5.36+20.8×23.0,ca,528.25,20.8,23.0
64,G 8911,8.4×6.0+4.3×3.4,,65.02,4.3,3.4
63,G 8911,8.4×6.0+4.3×3.4,,65.02,8.4,6.0


In [40]:
count_of_all_rooms_ids = df_tomb_dim.loc[df_tomb_dim['tomb_dimensions']=='count of all rooms', 'tomb_code_id'].to_list()
df_tomb_dim[df_tomb_dim['tomb_code_id'].isin(count_of_all_rooms_ids)]

Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
88,G 8154 = L 89,count of all rooms,ca,50.16,,
98,G 8172 = L 86,count of all rooms,ca,58.78,,
99,G 8130,count of all rooms,,,,
100,G 8090 = LG 90,count of all rooms,,,,
102,G 8460,count of all rooms,ca,54.85,,
106,G 8080 = L 92,count of all rooms,ca,66.22,,
342,tomb of Akhethotep,count of all rooms,,22.88,,


#### removing invalid rows from df_tomb_dim

In [41]:
# invalid values
invalid_values = [np.nan, 'unknown', '×', 'nn', 'no superstructure', 'unobservable', 'unexcavated', 'undetectable', 'destroyed', 'undetectable                      ', '', 'incomplete excavation']
# invalid tomb recors
tomb_code_id_to_omit = ['tomb of Merefnebef', 'No. 65 = D 17', 'D 17']

In [42]:
df_tomb_dim[df_tomb_dim['tomb_code_id'].isin(tomb_code_id_to_omit)]

Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
41,tomb of Merefnebef,7.80×,,54.6,7.8,
305,No. 65 = D 17,52.20×,,,52.2,
306,D 17,52.20×,,,52.2,


In [43]:
# get all columns except tomb_code_id
columns = [col for col in df_tomb_dim if col != 'tomb_code_id']

# remove rows where all values are in invalid_values
mask = df_tomb_dim[columns].isin(invalid_values) | df_tomb_dim[columns].isna()
mask = mask.all(axis=1) 
df_tomb_dim_v2 = df_tomb_dim[~mask]

# remove insufficiently described tombs
mask2 = df_tomb_dim_v2['tomb_code_id'].isin(tomb_code_id_to_omit)
df_tomb_dim_v2 = df_tomb_dim_v2[~mask2]

# remove rows with 'tomb_dimensions'=='count of all rooms'
# later put it back
df_tomb_dim_v2 = df_tomb_dim_v2[df_tomb_dim_v2['tomb_dimensions']!='count of all rooms']

df_tomb_dim_v2.shape

(258, 6)

In [44]:
df_tomb_dim_v2[50:100]

Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
59,G 8954,14.5×8.3,,120.35,14.5,8.3
60,G 8912,8.4×6.1,,51.24,8.4,6.1
61,G 8910,15.7×7.5,,117.75,15.7,7.5
62,G 8926,11.80×8.30,,97.94,11.8,8.3
63,G 8911,8.4×6.0+4.3×3.4,,65.02,8.4,6.0
64,G 8911,8.4×6.0+4.3×3.4,,65.02,4.3,3.4
65,G 8990,5.45×3.45,,18.8,5.45,3.45
66,G 8980,13.20×7.58,,100.06,13.2,7.58
67,G 8976,13.2×8.5,ca,112.2,13.2,8.5
68,G 8983,8.3×3.4,,28.22,8.3,3.4


#### recalculate area

In [45]:
df_tomb_dim_v2.head()

Unnamed: 0,tomb_code_id,tomb_dimensions,tomb_dimension_certainty,tomb_area,length,width
1,AC 17,11.7×7.4,,86.58,11.7,7.4
3,AC 5,16×18.3,,292.8,16.0,18.3
4,AC 8,42.24×56.24,,2375.6,42.24,56.24
5,AC 9,23.10×16.30,,376.53,23.1,16.3
8,AC 18,15.8×9.1,,143.8,15.8,9.1


In [46]:
df_tomb_dim_v2 = df_tomb_dim_v2[['tomb_code_id', 'length', 'width']]
# recalculate area
df_tomb_dim_v2['tomb_area_total'] = df_tomb_dim_v2.apply(lambda x: round(x['length']*x['width'], 2), axis=1)
# calculate sum of area for aech tomb
df_tomb_dim_v2['tomb_area_total_sum'] = df_tomb_dim_v2.groupby('tomb_code_id')['tomb_area_total'].transform('sum')
# calculate number of chaper per tomb
df_tomb_dim_v2['n_tomb'] = df_tomb_dim_v2.groupby('tomb_code_id')['tomb_area_total'].transform('size')
df_tomb_dim_v2.shape

(258, 6)

In [47]:
df_tomb_dim_v2.head()

Unnamed: 0,tomb_code_id,length,width,tomb_area_total,tomb_area_total_sum,n_tomb
1,AC 17,11.7,7.4,86.58,86.58,1
3,AC 5,16.0,18.3,292.8,292.8,1
4,AC 8,42.24,56.24,2375.58,2375.58,1
5,AC 9,23.1,16.3,376.53,376.53,1
8,AC 18,15.8,9.1,143.78,143.78,1


In [48]:
print(f'shape={df_tomb_dim_v2.shape}, df_tomb_dim_v2 shape')
df1 = df_tomb_dim.loc[df_tomb_dim['tomb_dimensions']=='count of all rooms', ['tomb_code_id', 'length', 'width', 'tomb_area']]
df1.rename(columns={'tomb_area': 'tomb_area_total'}, inplace=True)
df_tomb_dim_v3 = pd.concat([df_tomb_dim_v2, df1])
print(f'shape={df_tomb_dim_v3.shape}, df_tomb_dim_v3 shape')

shape=(258, 6), df_tomb_dim_v2 shape
shape=(265, 6), df_tomb_dim_v3 shape


#### add prefix and columns convert to list

In [49]:
# add prefix
prefix = merge_get_prefix(df_tomb_dim)
df_tomb_dim_v3 = merge_add_prefix(df_tomb_dim_v3, prefix)

In [50]:
df_tomb_dim_v3[df_tomb_dim_v3.duplicated(keep=False)]

Unnamed: 0,tomb_code_id,06_length,06_width,06_tomb_area_total,06_tomb_area_total_sum,06_n_tomb


In [51]:
df_tomb_dim_v3[df_tomb_dim_v3['06_n_tomb']>1]

Unnamed: 0,tomb_code_id,06_length,06_width,06_tomb_area_total,06_tomb_area_total_sum,06_n_tomb
63,G 8911,8.4,6.0,50.4,65.02,2.0
64,G 8911,4.3,3.4,14.62,65.02,2.0
70,G 6042,11.7,5.7,66.69,97.94,2.0
71,G 6042,12.5,2.5,31.25,97.94,2.0
75,G 8882,9.3,5.36,49.85,528.25,2.0
76,G 8882,20.8,23.0,478.4,528.25,2.0
107,G 5230 = L 40,41.25,12.78,527.17,651.21,2.0
108,G 5230 = L 40,11.38,10.9,124.04,651.21,2.0
172,G 5270,25.3,13.0,328.9,342.85,2.0
173,G 5270,4.5,3.1,13.95,342.85,2.0


In [52]:
print(f'shape={df_tomb_dim_v3.shape}, df_tomb_dim_v3 shape')
# group attributes and possitions into list
df_tomb_dim_all = group_to_list(df_tomb_dim_v3, ['tomb_code_id'])
print(f'shape={df_tomb_dim_all.shape}, group into list according to tomb_code_id')
df_tomb_dim_all.head()

shape=(265, 6), df_tomb_dim_v3 shape
shape=(254, 6), group into list according to tomb_code_id


Unnamed: 0,tomb_code_id,06_length_list,06_width_list,06_tomb_area_total_list,06_tomb_area_total_sum_list,06_n_tomb_list
0,AC 10,[32.04],[16.44],[526.74],[526.74],[1.0]
1,AC 15,[18.7],[10.42],[194.85],[194.85],[1.0]
2,AC 16,[12.3],[9.1],[111.93],[111.93],[1.0]
3,AC 17,[11.7],[7.4],[86.58],[86.58],[1.0]
4,AC 18,[15.8],[9.1],[143.78],[143.78],[1.0]


### preparing df_tomb_king_name for merge

In [53]:
df_general = dfs[0]
id_officials = df_tomb_king_name['ID_official'].to_list()
id_officials = list(set(id_officials))
df_general[df_general['ID_official'].isin(id_officials)]

Unnamed: 0,buried,complete_to_date,dynasty,find_number,ID_official,ID_person,king,locality,mentioned,nome,owner,shaft_main,site,uncertain,tomb_code_id,name_id
3,,,late 5 – early 6,,4,325,,Central Abusir,,1 LE,,,Abusir,,AC 3,2
15,,,late 5 or 6,,16,336,,Central Abusir,,1 LE,,,Abusir,,AC 2,11
43,,,late 5,,44,358,Djedkare?,Central Abusir,,1 LE,,,Abusir,,AC 3,31
44,,,mid 5,,45,359,Nyuserre,Central Abusir,yes,1 LE,,,Abusir,,AC 8,32
56,,,mid 5,,57,371,Nyuserre,Central Abusir,yes,1 LE,,,Abusir,,AC 8,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5370,,,late 5,,5453,315,Djedkare – Unis,"North Saqqara, north of Step Pyramid",,1 LE,owner,,Saqqara,,No. 14 = D 43,2415
5374,,,late 5,,5457,315,Djedkare – Unis,"North Saqqara, north of Step Pyramid",,1 LE,owner,,Saqqara,,No. 14 = D 43,2415
5378,,,late 5,,5461,317,,Western Cemetery,yes,1 LE,,,Giza,,G 4761,61
5411,certain,,late 5,,5494,4964,Unis,Gisr el-Mudir,,1 LE,,no,Saqqara,,mastaba of Ptahshepses,562


In [54]:
df_tomb_king_name.shape

(685, 4)

In [55]:
df_tomb_king_name[df_tomb_king_name['ID_official'].duplicated(keep=False)]

Unnamed: 0,ID_official,tomb_code_id,source,dynasty
22,118,AS 4,personal name,V.8
24,142,AC 17,title – sun temple,V.3
30,205,AC 5,priestly title – king,V.2
31,238,AC 8,personal name,IV.6
32,238,AC 8,personal name,V.2
...,...,...,...,...
680,5091,G 2100-I,personal name,IV.2
681,5320,D 51,epithet,V.1
682,5444,tomb of Hermeru,epithet,V.9
683,3813,G 7101,title – affiliation to pyramid complex,VI.2


In [56]:
df = pd.merge(df_general, df_tomb, on='tomb_code_id', suffixes=('_00', '_01'))
df = pd.merge(df, df_tomb_king_name, on='tomb_code_id', suffixes=('_01', '_02'))
df = df[['ID_official_01', 'ID_official_02', 'tomb_code_id']]
df[100:150]

Unnamed: 0,ID_official_01,ID_official_02,tomb_code_id
100,38,299,AC 8
101,38,300,AC 8
102,38,348,AC 8
103,38,356,AC 8
104,38,385,AC 8
105,38,447,AC 8
106,38,448,AC 8
107,38,507,AC 8
108,38,3612,AC 8
109,38,5051,AC 8


### merge tomb

In [57]:
df_tomb['tomb_code_id'].duplicated().sum()

0

In [58]:
df_tomb_chapel.duplicated().sum()

0

In [59]:
df_tomb_chapel_all
df_tomb_osiris_all
df_tomb_statue_material_all
df_tomb_dim_all





Unnamed: 0,tomb_code_id,06_length_list,06_width_list,06_tomb_area_total_list,06_tomb_area_total_sum_list,06_n_tomb_list
0,AC 10,[32.04],[16.44],[526.74],[526.74],[1.0]
1,AC 15,[18.7],[10.42],[194.85],[194.85],[1.0]
2,AC 16,[12.3],[9.1],[111.93],[111.93],[1.0]
3,AC 17,[11.7],[7.4],[86.58],[86.58],[1.0]
4,AC 18,[15.8],[9.1],[143.78],[143.78],[1.0]
...,...,...,...,...,...,...
249,tomb of Neferherenptah,[16.0],[10.2],[163.2],[163.2],[1.0]
250,tomb of Semdent,[6.6],[5.35],[35.31],[35.31],[1.0]
251,tomb of Tjetetu,[5.55],[6.45],[35.8],[35.8],[1.0]
252,tomb of Tjetji,[7.2],[4.9],[35.28],[35.28],[1.0]


In [60]:
print(f'shape={df_tomb.shape}, df_tomb shape')
# merge df_tomb and df_tomb_chapel_all
df_tomb_all = merge(df_tomb, df_tomb_chapel_all, on='tomb_code_id')
print(f'shape={df_tomb_all.shape}, merge df_tomb and df_tomb_chapel_all')
# merge df_tomb_all and df_tomb_chapel_v2
df_tomb_all = merge(df_tomb_all, df_tomb_osiris_all, on='tomb_code_id')
print(f'shape={df_tomb_all.shape}, merge df_tomb_all and df_tomb_osiris_all')
# merge df_tomb_all and df_tomb_statue_material_all
df_tomb_all = merge(df_tomb_all, df_tomb_statue_material_all, on='tomb_code_id')
print(f'shape={df_tomb_all.shape}, merge df_tomb_all and df_tomb_statue_material_all')
# merge df_tomb_all and df_tomb_statue_material_all
df_tomb_all = merge(df_tomb_all, df_tomb_dim_all, on='tomb_code_id')
print(f'shape={df_tomb_all.shape}, merge df_tomb_all and df_tomb_dim_all')
df_tomb_all.columns.to_list()

shape=(903, 19), df_tomb shape
shape=(903, 24), merge df_tomb and df_tomb_chapel_all
shape=(903, 25), merge df_tomb_all and df_tomb_osiris_all
shape=(903, 26), merge df_tomb_all and df_tomb_statue_material_all
shape=(903, 31), merge df_tomb_all and df_tomb_dim_all


['01_tomb_code',
 '01_tomb_type',
 '01_provenance',
 '01_excavator',
 '01_excavation_year',
 '01_spacial_distribution',
 '01_number_of_shafts',
 '01_label_of_shaft',
 '01_type_of_decoration',
 '01_ch_workmanship',
 '01_ch_false_door',
 '01_presence_of_wife',
 '01_embracing',
 '01_statues',
 '01_curiosity',
 '01_servant_statues',
 '01_A40_great_god',
 'tomb_code_id',
 '01_catalogized',
 '03_length_list',
 '03_width_list',
 '03_chapel_area_total_list',
 '03_chapel_area_total_sum_list',
 '03_n_tomb_chapel_list',
 '04_Osiris_list',
 '05_statue_material_list',
 '06_length_list',
 '06_width_list',
 '06_tomb_area_total_list',
 '06_tomb_area_total_sum_list',
 '06_n_tomb_list']

In [61]:
df_tomb_all.head()

Unnamed: 0,01_tomb_code,01_tomb_type,01_provenance,01_excavator,01_excavation_year,01_spacial_distribution,01_number_of_shafts,01_label_of_shaft,01_type_of_decoration,01_ch_workmanship,...,03_chapel_area_total_list,03_chapel_area_total_sum_list,03_n_tomb_chapel_list,04_Osiris_list,05_statue_material_list,06_length_list,06_width_list,06_tomb_area_total_list,06_tomb_area_total_sum_list,06_n_tomb_list
0,AC 3,pyramid complex,Raneferef's papyrus archive,,,,,,,,...,,,,,,,,,,
1,AC 14,pyramid complex,papyrus archive of Khentkaus,,,,,,,,...,,,,,,,,,,
2,nn,unknown,tomb of Nefer,,,,,,,,...,,,,,,,,,,
3,AS 4,stone mastaba,tomb of Rahotep and Izeziseneb,Miroslav Bárta,,,,,,,...,,,,,,,,,,
4,AS 10,stone mastaba,tomb of Iti,Miroslav Bárta,,,,,,,...,,,,,,,,,,


### save result

In [62]:
f_name = r'df_tomb_all'
save_df(df_tomb_all, f_name)
make_excel_analysis(df_tomb_all, f_name)

Dataframe saved into C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\df_tomb_all.csv
continuous_names={'01_statues'}
Excel analysis save into C:\Users\Stoja\OneDrive\Documents\diplomka\scr\excel_dfs_description\df_tomb_all.xlsx
