Skip to content

khipulab/renaming

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

6 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

DOI

Supplemental Code for "A New Naming Convention for Andean Khipus"

The code in this repository can be used to reproduce the renaming procedure described in Brezine, Clindaniel, Ghezzi, Hyland and Medrano (2024) "A New Naming Convention for Andean Khipus."

The code is written in Python 3.9.12 and all of its dependencies can be installed by running the following in the terminal (with the requirements.txt file included in this repository):

pip install -r requirements.txt

Once Python is installed, download the current version of the OKR (v1.0.1 -- either via GitHub or Zenodo) and connect to the database via Python's sqlite3 module.

! git clone https://github.com/khipulab/open-khipu-repository.git -b v1.0.1 --single-branch --quiet
import sqlite3
import pandas as pd

conn = sqlite3.connect('open-khipu-repository/khipu.db')

def delete_khipu(khipu_to_delete, con):
    '''
    Description: Deletes khipu corresponding to `khipu_to_delete`
                 from db (erasing all entries associated with it
                 in all relevant tables)

    Input: khipu_to_delete (khipu_id in db; string of 7 integers), 
           SQLite3 connection object
           
    Returns: Nothing (prints khipu that has been deleted)
    '''
    cur = conn.cursor()
    script = f'''
    DELETE FROM knot
    WHERE
        cord_id
        IN
        (SELECT cord_id
        FROM cord
        WHERE khipu_id = {khipu_to_delete}
        );

    DELETE FROM knot_cluster
    WHERE
        cord_id
        IN
        (SELECT cord_id
        FROM cord
        WHERE khipu_id = {khipu_to_delete}
        );
        
    DELETE FROM cord
    WHERE khipu_id = {khipu_to_delete};

    DELETE FROM cord_cluster
    WHERE khipu_id = {khipu_to_delete};

    DELETE FROM ascher_cord_color
    WHERE khipu_id = {khipu_to_delete};

    DELETE FROM khipu_main
    WHERE khipu_id = {khipu_to_delete};

    DELETE FROM primary_cord
    WHERE khipu_id = {khipu_to_delete};
    '''

    cur.executescript(script)
    con.commit()
    
    print(f'khipu_id {khipu_to_delete} Deleted')

First, we drop the following khipu records that are duplicates of other khipus in the database, or are blank/incomplete records (none of these received an OKR_NUM).

# Drop khipu records that are not listed in translation table
khipu_ids_to_delete = [1000594, 1000364, 1000484, 1000498]
for khipu_id in khipu_ids_to_delete:
    delete_khipu(khipu_id, conn)
khipu_id 1000594 Deleted
khipu_id 1000364 Deleted
khipu_id 1000484 Deleted
khipu_id 1000498 Deleted

Then, we can add the OKR_NUM column to the KHIPU_MAIN table:

add_okr_num_col = 'ALTER TABLE khipu_main ADD COLUMN OKR_NUM LONGTEXT'
cur = conn.cursor()
cur.execute(add_okr_num_col)
conn.commit()

...and use the translation table (rewritten as a CSV file in this repository called renaming.csv) in the Supplemental Appendix to map an OKR_NUM onto each INVESTIGATOR_NUM:

df = pd.read_csv('renaming.csv')
df.loc[:, 'cur_num'] = df.cur_num.apply(lambda x: [i.strip() for i in x.split(",")])
df_explode = df.explode('cur_num') \
               .reset_index(drop=True)
df_explode.head()
okr_num cur_num
0 KH0001 LL01
1 KH0001 UR176
2 KH0002 AS001
3 KH0003 AS002
4 KH0004 AS003
for okr_num, investigator_num in df_explode.to_records(index=False):
    add_okr_num = f"UPDATE khipu_main SET okr_num='{okr_num}' where investigator_num='{investigator_num}'"
    cur = conn.cursor()
    cur.execute(add_okr_num)
    conn.commit()
# check to make sure it worked
pd.read_sql_query('''
                  SELECT okr_num, group_concat(investigator_num,"/")
                  FROM khipu_main
                  GROUP BY okr_num
                  ''', conn).head()
OKR_NUM group_concat(investigator_num,"/")
0 KH0001 LL01/UR176
1 KH0011 AS010
2 KH0012 AS011
3 KH0013 AS012
4 KH0014 AS013

At this point, we need to drop the remaining duplicate entries of khipus (there are 13 duplicate pairings, as per the Supplemental Appendix) and then add the INVESTIGATOR_NUM values that we drop to the canonical reading of each khipu (e.g. the reading that is most complete/most recent). First, let's gather the KHIPU_ID values for the khipus we want to drop and the ones that will serve as our canonical khipu recordings:

khipu_id_drop = pd.read_sql_query('''
                                    SELECT khipu_id
                                    FROM khipu_main
                                    WHERE investigator_num IN 
                                    ('AS070','AS030','UR044','AS208',
                                    'UR115','UR036','AS056','LL01',
                                    'AS181','AS068','AS047','AS038',
                                    'AS046')
                                    ''',
                                    conn
)

khipu_id_keep = pd.read_sql_query('''
                                    SELECT khipu_id, okr_num
                                    FROM khipu_main
                                    WHERE investigator_num IN 
                                    ('UR035','UR043','UR1031','UR083',
                                    'UR126','UR133','UR163','UR176',
                                    'UR236','UR281','HP035','HP036',
                                    'HP041')
                                    ''',
                                    conn
)

Then, we can delete the khipus that are duplicates...

for khipu_id in khipu_id_drop.values.flatten():
    delete_khipu(khipu_id, conn) #13 deleted
khipu_id 1000016 Deleted
khipu_id 1000095 Deleted
khipu_id 1000102 Deleted
khipu_id 1000153 Deleted
khipu_id 1000175 Deleted
khipu_id 1000181 Deleted
khipu_id 1000202 Deleted
khipu_id 1000237 Deleted
khipu_id 1000281 Deleted
khipu_id 1000286 Deleted
khipu_id 1000334 Deleted
khipu_id 1000360 Deleted
khipu_id 1000471 Deleted

And update the INVESTIGATOR_NUM entries for the khipus that we are keeping as canonical readings:

df.loc[:, 'agg_inv_num'] = df.cur_num.apply(lambda x: '/'.join(x))
agg_inv_num_df = khipu_id_keep.merge(df, left_on='OKR_NUM', right_on='okr_num')
agg_inv_num_df[['okr_num', 'agg_inv_num']]
okr_num agg_inv_num
0 KH0033 AS031/UR1031/UR044
1 KH0032 AS030/UR043
2 KH0083 AS070/UR035
3 KH0268 UR036/UR133
4 KH0351 UR115/UR126
5 KH0067 AS056/UR163
6 KH0228 AS208/UR083
7 KH0001 LL01/UR176
8 KH0198 AS181/UR236
9 KH0058 AS047/HP035
10 KH0049 AS038/HP036
11 KH0057 AS046/HP041
12 KH0081 AS068/UR281
agg_inv_array = agg_inv_num_df[['KHIPU_ID', 'agg_inv_num']].to_records(index=False)

for khipu_id, agg_inv_num in agg_inv_array:
    update_inv_num = f"UPDATE khipu_main SET investigator_num='{agg_inv_num}' where khipu_id='{khipu_id}'"
    cur = conn.cursor()
    cur.execute(update_inv_num)
    conn.commit()

Confirm that there are 619 unique okr_num entries and that the investigator_num updates were performed correctly:

khipu_main = pd.read_sql_query(f'''SELECT *
                                   FROM khipu_main
                                ''', conn)

print("There are {} unique OKR_NUM entries".format(len(khipu_main.OKR_NUM)))

khipu_main.loc[khipu_main.KHIPU_ID.isin(agg_inv_num_df.KHIPU_ID), ['OKR_NUM','INVESTIGATOR_NUM']]
There are 619 unique OKR_NUM entries
OKR_NUM INVESTIGATOR_NUM
95 KH0033 AS031/UR1031/UR044
99 KH0032 AS030/UR043
265 KH0083 AS070/UR035
297 KH0268 UR036/UR133
306 KH0351 UR115/UR126
359 KH0067 AS056/UR163
362 KH0228 AS208/UR083
455 KH0001 LL01/UR176
507 KH0198 AS181/UR236
524 KH0058 AS047/HP035
525 KH0049 AS038/HP036
529 KH0057 AS046/HP041
605 KH0081 AS068/UR281

Finally, there are several updates to provenance and museum numbers that need to be made as corrections to the original readings in the DB.

provenance updates (from Supplemental Appendix)

provenance_updates = [
('KH0085', 'Rancho San Juan, Ica Valley'),
('KH0086', 'Rancho San Juan, Ica Valley')
]

for okr_num, new_provenance in provenance_updates:
    update_provenance = f"UPDATE khipu_main SET provenance='{new_provenance}' where okr_num='{okr_num}'"
    cur = conn.cursor()
    cur.execute(update_provenance)
    conn.commit()
khipu_main = pd.read_sql_query(f'''SELECT *
                                   FROM khipu_main
                                   WHERE okr_num IN
                                   {tuple(i[0] for i in provenance_updates)}
                                ''', conn)

khipu_main[['OKR_NUM', 'PROVENANCE']]
OKR_NUM PROVENANCE
0 KH0086 Rancho San Juan, Ica Valley
1 KH0085 Rancho San Juan, Ica Valley

museum_num updates (from Supplemental Appendix):

museum_num_updates = [
('KH0120', 'VA24370(A)'),
('KH0121', 'VA24370(B)'),
('KH0142', 'VA63042(A)'),
('KH0143', 'VA63042(B)'),
('KH0189', 'VA16145(A)'),
('KH0190', 'VA16145(B)'),
('KH0193', 'VA37859(A)'),
('KH0194', 'VA37859(B)'),
('KH0197', 'VA66832'),
('KH0264', 'TM 4/5446'),
('KH0265', 'TM 4/5446'),
('KH0273', '32.30.30/53(A)'),
('KH0348', '1924.18.0001'),
('KH0349', '1931.37.0001'),
('KH0437', 'VA42597(A)'),
('KH0438', 'VA42597(B)'),
('KH0441', 'VA47114c(A)'),
('KH0442', 'VA47114c(B)'),
('KH0443', 'VA47114c(C)'),
('KH0447', 'VA16141(A)'),
('KH0448', 'VA16141(B)'),
('KH0450', 'VA42508(A)'),
('KH0451', 'VA42508(B)'),
('KH0458', 'VA47114b'),
('KH0463', 'VA44677a(A)'),
('KH0464', 'VA44677a(B)'),
('KH0468', 'VA63038(A)'),
('KH0469', 'VA63038(B)'),
('KH0478', 'VA42607(A)'),
('KH0479', 'VA42607(B)'),
('KH0480', 'VA42607(C)'),
('KH0481', 'VA42607(D)'),
('KH0484', 'VA42578i28'),
('KH0535', 'MSP 1389/RN 43370'),
('KH0558', 'MSP 1422/RN 43403'),
('KH0567', 'MNAAHP 4202'),
('KH0587', 'MNAAHP 30564'),
('KH0588', 'B397/T41299.22'),
('KH0589', 'B376/T41299.23'),
('KH0590', 'B388/T41299.24'),
('KH0591', 'B378/T41299.25'),
('KH0592', 'B377/T41299.26'),
('KH0593', 'B384/T41299.27'),
('KH0594', 'B372/T41299.28'),
('KH0595', 'B367/T41299.29'),
('KH0596', 'B366/T41299.30'),
('KH0597', 'B374/T41299.31'),
('KH0598', 'B375/T41299.32'),
('KH0599', 'B391/T41299.20'),
('KH0600', 'B369/T41299.33.A-B'),
('KH0601', 'B399/T41299.34'),
('KH0602', 'B373/T41299.18'),
('KH0603', 'B383&B383A/T41299.35.A-B'),
('KH0604', 'B395/T41299.36'),
('KH0605', 'B382/T41299.37'),
('KH0606', 'B371/T41299.38'),
('KH0405', '41.0/1550, B/3453A')
]

for okr_num, new_museum_num in museum_num_updates:
    update_museum_num = f"UPDATE khipu_main SET museum_num='{new_museum_num}' where okr_num='{okr_num}'"
    cur = conn.cursor()
    cur.execute(update_museum_num)
    conn.commit()
khipu_main = pd.read_sql_query(f'''SELECT *
                                   FROM khipu_main
                                   WHERE okr_num IN
                                   {tuple(i[0] for i in museum_num_updates)}
                                ''', conn)

khipu_main[['OKR_NUM', 'MUSEUM_NUM']]
OKR_NUM MUSEUM_NUM
0 KH0120 VA24370(A)
1 KH0121 VA24370(B)
2 KH0190 VA16145(B)
3 KH0142 VA63042(A)
4 KH0143 VA63042(B)
5 KH0273 32.30.30/53(A)
6 KH0189 VA16145(A)
7 KH0193 VA37859(A)
8 KH0194 VA37859(B)
9 KH0197 VA66832
10 KH0349 1931.37.0001
11 KH0264 TM 4/5446
12 KH0265 TM 4/5446
13 KH0348 1924.18.0001
14 KH0535 MSP 1389/RN 43370
15 KH0558 MSP 1422/RN 43403
16 KH0589 B376/T41299.23
17 KH0591 B378/T41299.25
18 KH0590 B388/T41299.24
19 KH0592 B377/T41299.26
20 KH0593 B384/T41299.27
21 KH0594 B372/T41299.28
22 KH0595 B367/T41299.29
23 KH0596 B366/T41299.30
24 KH0597 B374/T41299.31
25 KH0598 B375/T41299.32
26 KH0599 B391/T41299.20
27 KH0601 B399/T41299.34
28 KH0602 B373/T41299.18
29 KH0603 B383&B383A/T41299.35.A-B
30 KH0604 B395/T41299.36
31 KH0605 B382/T41299.37
32 KH0606 B371/T41299.38
33 KH0405 41.0/1550, B/3453A
34 KH0437 VA42597(A)
35 KH0438 VA42597(B)
36 KH0441 VA47114c(A)
37 KH0442 VA47114c(B)
38 KH0443 VA47114c(C)
39 KH0447 VA16141(A)
40 KH0448 VA16141(B)
41 KH0450 VA42508(A)
42 KH0451 VA42508(B)
43 KH0458 VA47114b
44 KH0463 VA44677a(A)
45 KH0464 VA44677a(B)
46 KH0468 VA63038(A)
47 KH0469 VA63038(B)
48 KH0478 VA42607(A)
49 KH0479 VA42607(B)
50 KH0481 VA42607(D)
51 KH0480 VA42607(C)
52 KH0484 VA42578i28
53 KH0567 MNAAHP 4202
54 KH0587 MNAAHP 30564
55 KH0588 B397/T41299.22