# Make a dataframe coding whether person ancestor was vizier

**Motivation:**  
Nepotism in the Old Kingdom was significant, so we should encode whether a given person had a vizier in the family (in this case, whether their father was a vizier).

**Workflow:**
- Load relations from `df_family_relations`.  
- Identify all relationships of type father–son and son–father.  
- For each (father, son) pair, assign a boolean value indicating whether the father was a vizier (`father_was_vizier`).  
- Save the final DataFrame with the columns `ID_person` (son) and `father_was_vizier`.

### load

In [1]:
import os
import numpy as np
import pandas as pd
import set_path
import supp.support_load as sl
from supp.support_merge import merge, group_to_list
from supp.support_save import save_df
from supp.support_analyzer import make_excel_analysis

In [2]:
dfs, dfs_name, dfs_export_date = sl.load_pickle()
iton, ntoi = sl.get_name_dicts(dfs_name)
dfs_export_date

Pickle database loaded.
C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\dfs_complete.pickle
Applied preprocessing: merge_on_jones
Applied preprocessing: remove_jones_duplicates


'2024-10-11'

In [3]:
# load df with viziers
vizier = sl.read_csv('vizier')
# load df with all information about persons
df_person_all = sl.read_csv('df_person_all')
# load df with sex of persons
df_sex = sl.read_csv('08_sex')

CSV file loaded.
C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\unpacked\df_person_all\vizier.csv
CSV file loaded.
C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\df_person_all.csv
CSV file loaded.
C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\unpacked\df_person_all\08_sex.csv


In [4]:
# get ID_persons of males
male_id_list = df_sex.loc[df_sex['male']==1, 'ID_person'].to_list()
len(male_id_list)

3930

In [5]:
# get all recorded relations between male-male from table df_family_relations
df_family_relations = dfs[ntoi['df_family_relations']]
print(f'{df_family_relations.shape}\t shape of df_family_relations')
df_family_relations = df_family_relations.loc[df_family_relations['ID_person_1'].isin(male_id_list), :]
df_family_relations = df_family_relations.loc[df_family_relations['ID_person_2'].isin(male_id_list), :]
print(f'{df_family_relations.shape}\t shape of df_family_relations after restriction to "male-male" relations')
df_family_relations.head()

(10531, 8)	 shape of df_family_relations
(3998, 8)	 shape of df_family_relations after restriction to "male-male" relations


Unnamed: 0,certainty,degree_of_certainty,ID_family_relation,ID_official_1,ID_official_2,type_of_relative,ID_person_1,ID_person_2
0,,,1,18,202.0,father of,39,493.0
1,,,2,80,232.0,son of,389,516.0
2,,,3,90,476.0,son of,399,683.0
3,,,7,100,2962.0,son of,409,2766.0
4,,,8,139,542.0,father of,443,732.0


### family relations

In [6]:
df_family_relations['type_of_relative'].value_counts()

type_of_relative
brother of           2165
son of                881
father of             853
grandson of            23
grandfather of         22
unspecified            12
brother-in-law of       3
sister of               2
nephew of               1
uncle of                1
son-in-law of           1
ancestor of             1
descendant of           1
husband of              1
Name: count, dtype: int64

In [7]:
# list of important relations
relations = ['son of',
             'father of',
             'grandson of',
             'grandfather of',
             'son/daughter of',
             'nephew of',
             'uncle of',
             'ancestor of',
             'descendant of'
            ]

#### father-son relations

In [8]:
df_fathers = df_family_relations.loc[df_family_relations['type_of_relative']=='father of', :]
df_fathers = df_fathers[['type_of_relative', 'ID_person_1', 'ID_person_2']]
df_fathers.columns = ['type_of_relative', 'father', 'son']
df_fathers.drop_duplicates(keep='first', inplace=True)
print(df_fathers.shape)
df_fathers.head()

(837, 3)


Unnamed: 0,type_of_relative,father,son
0,father of,39,493.0
4,father of,443,732.0
8,father of,456,611.0
9,father of,16,930.0
16,father of,1,518.0


In [9]:
# print duplicates in father
dup_fathers = df_fathers[df_fathers['son'].duplicated(keep=False)]
dup_fathers = dup_fathers.sort_values(['son', 'father'])
print(dup_fathers.shape)
dup_fathers

(4, 3)


Unnamed: 0,type_of_relative,father,son
9346,father of,2890,309.0
10222,father of,4856,309.0
10320,father of,314,4901.0
10319,father of,3697,4901.0


#### son-father relations

In [10]:
df_sons = df_family_relations.loc[df_family_relations['type_of_relative']=='son of', :]
df_sons = df_sons[['type_of_relative', 'ID_person_1', 'ID_person_2']]
df_sons.columns = ['type_of_relative', 'son', 'father']
df_sons.drop_duplicates(keep='first', inplace=True)
print(df_sons.shape)
df_sons.head()

(850, 3)


Unnamed: 0,type_of_relative,son,father
1,son of,389,516.0
2,son of,399,683.0
3,son of,409,2766.0
6,son of,456,620.0
10,son of,475,931.0


In [11]:
# print duplicates in father
dup_sons = df_sons[df_sons['son'].duplicated(keep=False)]
dup_sons = dup_sons.sort_values(['son', 'father'])
print(dup_sons.shape)
dup_sons.head()

(2, 3)


Unnamed: 0,type_of_relative,son,father
9345,son of,309,2890.0
10218,son of,309,4856.0


In [12]:
dup_sons[:50]

Unnamed: 0,type_of_relative,son,father
9345,son of,309,2890.0
10218,son of,309,4856.0


### save df

In [13]:
# concat father-son and son-father relations
df_sons_2 = df_sons[df_fathers.columns]  # reorder columns
df_fathers_all = pd.concat([df_fathers, df_sons_2], ignore_index=True)
df_fathers_all = df_fathers_all[['father', 'son']]
print(f'{df_fathers_all.shape}\tbefore dropping duplicates')
df_fathers_all.drop_duplicates(keep='first', inplace=True)
print(f'{df_fathers_all.shape}\tafter dropping duplicates')
df_fathers_all.head()

(1687, 2)	before dropping duplicates
(860, 2)	after dropping duplicates


Unnamed: 0,father,son
0,39.0,493.0
1,443.0,732.0
2,456.0,611.0
3,16.0,930.0
4,1.0,518.0


In [14]:
# print df shapes (only for overview)
print(f'{vizier.shape}\t vizier shape')
print(f'{df_person_all.shape}\t df_person_all shape')
print(f'{df_fathers_all.shape}\t df_son_father_vizier shape')

(4962, 2)	 vizier shape
(4962, 50)	 df_person_all shape
(860, 2)	 df_son_father_vizier shape


In [15]:
# drop duplicates persons in df_person_all (if any)
df_person_id = df_person_all.loc[:, 'ID_person'].drop_duplicates()
print(f'{df_person_id.shape}\t df_person_id shape')

(4962,)	 df_person_id shape


In [16]:
# merge fathers with viziers
result = pd.merge(df_fathers_all, vizier,
                             left_on='father', right_on='ID_person',
                             how='inner')
result.rename(columns={'vizier': 'father_was_vizier'}, inplace=True)
result = result[['father', 'son', 'father_was_vizier']]
print(result.shape)
print(result['father_was_vizier'].sum())
result.head()

(860, 3)
120


Unnamed: 0,father,son,father_was_vizier
0,39.0,493.0,0
1,443.0,732.0,0
2,456.0,611.0,0
3,16.0,930.0,0
4,1.0,518.0,1


In [17]:
# Create a new DataFrame with unique son and max father_was_vizier
# Some sons might have recorded two or more fathers
#    -> if any of them was vizier, set father_was_vizier=1
result_2 = result.groupby(['son'], as_index=False).agg({'father_was_vizier': 'max'})
print(result_2.shape)
result_2.head()

(858, 2)


Unnamed: 0,son,father_was_vizier
0,2.0,1
1,4.0,0
2,5.0,0
3,7.0,0
4,9.0,1


In [18]:
# merge all persons and father_was_vizier (sons)
result_3 = pd.merge(df_person_id, result_2,
                             left_on='ID_person', right_on='son',
                             how='left')
result_3 = result_3.fillna(0).astype(int)
result_3 = result_3[['ID_person', 'father_was_vizier']]
result_3

# save
save_df(result_3, 'df_father_was_vizier')

print(result_3.shape)
print(result_3['father_was_vizier'].sum())
result_3.head()

Dataframe saved into C:\Users\Stoja\OneDrive\Documents\diplomka\scr\data\df_father_was_vizier.csv
(4962, 2)
120


Unnamed: 0,ID_person,father_was_vizier
0,322,0
1,323,0
2,324,0
3,325,0
4,326,0
