# DANAM Image Metadata Notebook
This notebook is used along with the scripts clean_json and write_csv to query and analyze DANAM's image metadata quickly.

Queries is done via Pandas Dataframe.

Notes 2022.01.15:
- download missing images
- upload new images (monument names already written down!)
- create reports for select monuments and upload them
- report missing images

In [1]:
import csv, os
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

#from datetime import datetime

from scripts.clean_json import clean_json
from scripts.write_csv import list_from_txt

def print_df(df):
    return df[[ 'lastModified', 'validCaption', 'filename', 'danam_caption', 'caption', 'date']]


### Read DANAM json export
Always replace with the latest export

In [2]:
# read DANAM json export
danam_export = "json\DANAM\Monument_2022-01-13_01-28-55.json"
#danam = json.load(codecs.open(danam_export, 'r', 'utf-8'))
#danam = danam['business_data']['resources']
danam_images = clean_json(danam_export)
danam_df = pd.DataFrame(danam_images)

### List of metadata columns

In [3]:
danam_df.columns[3:-19]

Index(['danam_caption', 'empty_column', 'filename_danam', 'filetype',
       'filename', 'mon_id', 'classification', 'notes', 'heidoc', 'heidata',
       'validCaption', 'caption', 'date1', 'date2', 'date', 'date3', 'agent',
       'role', 'agent2', 'role2', 'copyright', 'source', 'class_code',
       'agent3', 'date_scan', 'license', 'url', 'rights_text', 'lastModified'],
      dtype='object')

## Queries

### Caption Validity

In [3]:
valid_captions = danam_df.loc[danam_df['validCaption']].shape[0]
all_images = danam_df.shape[0]

print("Percentage of images with valid captions: {}".format(valid_captions/all_images))
print("Percentage of images with invalid captions: {}".format(1-(valid_captions/all_images)))


Percentage of images with valid captions: 0.7359996107624192
Percentage of images with invalid captions: 0.26400038923758085


### Checking images per monument

In [48]:
# query through metadata table, check if monument is complete 
mon_id = 'DDL0110'
print("Monument ID: {}".format(mon_id))
mon = danam_df.loc[(danam_df['mon_id']==mon_id) | (danam_df['mon_id']==mon_id.upper())]
print("Number of images in DANAM: {}".format(mon.shape[0]))
mon = mon.loc[mon['filename'].str.contains("_D_")]
#mon = mon.loc[mon['validCaption']==False]
print("Images that matched query: {}".format(mon.shape[0]))
mon[[ 'validCaption','filename', 'caption', 'agent']].sort_values('validCaption')

Monument ID: DDL0110
Number of images in DANAM: 24
Images that matched query: 2


Unnamed: 0,validCaption,filename,caption,agent
15423,True,DDL0110_D_2015_floor_plan,"Ajayamerukoṭa temple group 1, śikhara temples with pavilion 1, floor plan",
15443,True,DDL0110_D_2015_site_plan,"Ajayamerukoṭa temple group 1, śikhara temples with pavilion 1, site plan",


## Preparing Metadata Uploads
### Finding Recently Updated Monuments

In [6]:
# find recently updated monuments
mon_ids = list_from_txt('mon/current.mon')

recent = danam_df.loc[danam_df['lastModified'] > '2021-12-05']
print("Number of recently update monuments: {}".format(recent.shape[0]))
recent_mon_ids = set(list(set(recent['mon_id'])) + mon_ids)

uploaded = list_from_txt('mon\\uploaded.mon')
to_update_mon = [mon for mon in recent_mon_ids if mon in uploaded]
print("Number of those monuments already uploaded to HeidIcon: {}".format(len(to_update_mon)))

file = open("mon\\recently_changed.mon", 'w')
for mon_id in to_update_mon:
    file.write(mon_id+"\n")
file.close()

to_update = danam_df.loc[danam_df['mon_id'].isin(to_update_mon)]
to_update = to_update.loc[to_update['validCaption']]    
to_update[[ 'lastModified', 'validCaption', 'filename', 'danam_caption', 'caption', 'date']]

Number of recently update monuments: 634
Number of those monuments already uploaded to HeidIcon: 6


Unnamed: 0,lastModified,validCaption,filename,danam_caption,caption,date
2387,2021-11-08 11:12:55.773,True,BKT0515_D_2021_location_map,"Chumāgaṇeśa Mandira; location map by Niels Gutschow; 1987; updated by Anil Basukala, 2021-11-08","Chumāgaṇeśa Mandira, location map",1987
2388,2021-11-21 12:48:08.761,True,BKT0151_D_2021_site_plan,Chumāgaṇeśa Mandira; site plan by Anil Basukala; 2021-11-21,"Chumāgaṇeśa Mandira, site plan",2021-11-21
2389,2021-11-21 12:48:23.015,True,BKT0151_D_2021_floor_plan,Chumāgaṇeśa Mandira; floor plan by Anil Basukala; 2021-11-21,"Chumāgaṇeśa Mandira, floor plan",2021-11-21
2390,2021-11-28 11:58:45.820,True,BKT0151-004_P_20211126_02,"Chumā Gaṇeśa Mandira, Lion (B), view from E; photo by Anil Basukala; 2021-11-26","Chumā Gaṇeśa Mandira, Lion (B), view from E",2021-11-26
2391,2021-11-28 11:59:00.742,True,BKT0151_I_20211126_02,"Chumā Gaṇeśa Mandira, Inscription from NS 967 above the statue, view from W; photo by Anil Basukala; 2021-11-26","Chumā Gaṇeśa Mandira, Inscription from NS 967 above the statue, view from W",2021-11-26
...,...,...,...,...,...,...
17869,2021-08-24 11:19:58.200,True,KIR4013_P_20210803_05,"Gaṇeśa Bhajana Phalcā, close up of a column,view from N; photo by Anil Basukala; 2021-08-03","Gaṇeśa Bhajana Phalcā, close up of a column,view from N",2021-08-03
17870,2021-08-24 11:20:02.014,True,KIR4013_P_20210803_07,"Gaṇeśa Bhajana Phalcā, detail of a column's foot, view from N; photo by Anil Basukala; 2021-08-03","Gaṇeśa Bhajana Phalcā, detail of a column's foot, view from N",2021-08-03
17871,2021-08-24 11:20:06.747,True,KIR4013_P_20210803_03,"Gaṇeśa Bhajana Phalcā, arcade of columns, view from NW; photo by Anil Basukala; 2021-08-03","Gaṇeśa Bhajana Phalcā, arcade of columns, view from NW",2021-08-03
17872,2021-08-24 11:20:00.046,True,KIR4013_P_20210803_06,"Gaṇeśa Bhajana Phalcā, detail of a capital on a column, view from N; photo by Anil Basukala; 2021-08-03","Gaṇeśa Bhajana Phalcā, detail of a capital on a column, view from N",2021-08-03


### Metadata of recently updloaded maps

In [8]:
only_maps = list_from_txt("mon\\only_maps.mon")

upload_map = danam_df.loc[danam_df['mon_id'].isin(only_maps)]
upload_map = upload_map.loc[upload_map['validCaption']]
upload_map = upload_map.loc[upload_map['filename'].str.contains("_D_")]
print_df(upload_map).sort_values("filename")


Unnamed: 0,lastModified,validCaption,filename,danam_caption,caption,date
11645,2021-12-23 12:14:30.217,True,SKH0010_D_2021_floor_plan,Jyotirliṅgeśvara Mandira; floor plan by Bijay Basukala; 2021-12-23,"Jyotirliṅgeśvara Mandira, floor plan",2021-12-23
11643,2021-12-22 12:54:17.613,True,SKH0010_D_2021_location_map,Jyotirliṅgeśvara Mandira; location map by Thomas Schrom; 2021-10,"Jyotirliṅgeśvara Mandira, location map",2021-10
11644,2021-12-23 12:14:46.334,True,SKH0010_D_2021_site_plan,Jyotirliṅgeśvara Mandira; site plan by Anil Basukala; 2021-12-23,"Jyotirliṅgeśvara Mandira, site plan",2021-12-23
11835,2021-12-21 13:37:48.949,True,SKH0011_D_2021_location_map,"Mahādevasthāna Pokharī; location map by Thomas Schrom, 2021-12-21","Mahādevasthāna Pokharī, location map",2021-12-21
11838,2021-12-21 13:38:35.126,True,SKH0011_D_2021_section,Mahādevasthāna Pokharī; section by Anil Basukala; 2021-12-21,"Mahādevasthāna Pokharī, section",2021-12-21
11836,2021-12-21 13:38:19.173,True,SKH0011_D_2021_site_plan,Mahādevasthāna Pokharī; site plan by Anil Basukala; 2021-12-21,"Mahādevasthāna Pokharī, site plan",2021-12-21
11837,2021-12-21 13:38:05.705,True,SKH0011_D_2021_top_view,Mahādevasthāna Pokharī; top view by Anil Basukala; 2021-12-21,"Mahādevasthāna Pokharī, top view",2021-12-21
3003,2021-12-08 12:47:44.860,True,SKH0015_D_2021_north_elevation,Sīdhvākhā; north elevation by Anil Basukala; 2021-12-08,"Sīdhvākhā, north elevation",2021-12-08
3002,2021-12-08 12:47:24.290,True,SKH0015_D_2021_section,Sīdhvākhā; section by Anil Basukala; 2021-12-08,"Sīdhvākhā, section",2021-12-08
3007,2021-12-16 10:37:09.458,True,SKH0015_D_20221_floor_plan,Sīdhvākhā; floor plan by Anil Basukala; 2021-12-08,"Sīdhvākhā, floor plan",2021-12-08


### Metadata of recently updated images (all)
Might be redundant due to to_update?

In [9]:
# Filter metadata according to current.mon and valid caption
to_upload = danam_df.loc[danam_df['mon_id'].isin(mon_ids)]
to_upload = to_upload.loc[to_upload['validCaption']]
print_df(to_upload)

Unnamed: 0,lastModified,validCaption,filename,danam_caption,caption,date
2626,2021-10-16 22:02:23.695,True,SKH0560_P_20180725_01_Bajracharya,"Caugharā Sataḥ, east wing, view from SW; photo by T M Bajracharya; 2018-07-25; courtesy of UNESCO Kathmandu","Caugharā Sataḥ, east wing, view from SW",2018-07-25
2627,2021-10-16 21:59:38.375,True,SKH0560_P_20210405_Awal,"Caugharā Sataḥ, east wing, view from SE; photo by Purushottam Awal; 2021-04-05; courtesy of UNESCO Kathmandu","Caugharā Sataḥ, east wing, view from SE",2021-04-05
2628,2022-01-01 14:10:12.497,True,SKH0560_001_I_20211031_01,"Caugharā Sataḥ, signboard; photo by Thomas Schrom; 2021-10-31","Caugharā Sataḥ, signboard",2021-10-31
2629,2022-01-01 14:19:09.107,True,SKH0560_002_I_20211217_01,"Caugharā Sataḥ, signboard; photo by Thomas Schrom; 2021-12-17","Caugharā Sataḥ, signboard",2021-12-17
2630,2022-01-01 14:38:56.217,True,SKH0560-001_P_20211205_02,"Caugharā Sataḥ, Shrine with aniconic stone; photo by Thomas Schrom; 2021-12-05","Caugharā Sataḥ, Shrine with aniconic stone",2021-12-05
...,...,...,...,...,...,...
17100,2021-12-29 14:09:35.902,True,SKH0032-001_P_20211217_01,"Vasundharā Phalcā, well with a statue of Viṣṇu, view from NW; photo by Thomas Schrom; 2021-12-17","Vasundharā Phalcā, well with a statue of Viṣṇu, view from NW",2021-12-17
17101,2021-12-29 17:53:12.610,True,SKH0032_D_2021_section,Vasundharā Phalcā; section drawing by Thomas Schrom; 2021-12,"Vasundharā Phalcā, section drawing",2021-12
17102,2021-12-29 14:04:50.977,True,SKH0032-003_P_20211217_02,"Vasundharā Phalcā, Mahākāla, view from S; photo by Thomas Schrom; 2021-12-17","Vasundharā Phalcā, Mahākāla, view from S",2021-12-17
17103,2021-12-29 17:52:04.883,True,SKH0032_D_2021_location_map,Vasundharā Phalcā; location map by Thomas Schrom; 2021-12,"Vasundharā Phalcā, location map",2021-12


### Checking for caption fixes

In [49]:
to_fix = list_from_txt("mon\\to_fix.mon")
upload_fix = danam_df.loc[danam_df['mon_id'].isin(to_fix)]
upload_fix = upload_fix.loc[upload_fix['danam_caption'].str.contains("Attribution 40")]

fixes = ["If not otherwise stated, all images and texts in this folder are published under Creative Commons"
, "If not otherwise stated, all images and texts in this monument folder are published under Creative Commons"
, "Attribution 4.0 License \(CC BY-SA 4.0\),"
, "Attribution 40 License \(CC BY-SA 40\),"
, "and the copyright lies with NHDP. All visuals of this monument folder"
," and more are \(or will be\) also stored in heidICON," 
, "and more are also stored in heidICON,"
, "the object and multimedia database of Heidelberg University" 
, "\(Type the ID-number or key words in the first line and click the search field.\)" 
, "\(type the ID-number or key words in the first line and click the search field.\)" 
, "\(type the ID-number or key words in the first line and click the search field\)" 
, "\(type the ID-number or keywords in the first line and click the search field\)." 
, "You will also find the initial report there"
, "The latest report will always be available in DANAM \(this page\)."
, "You will also find the initial report there. The latest report will always be available in DANAM \(this page\)."
, "."
]

for fix in fixes:
    upload_fix['caption'] = upload_fix['caption'].str.replace(fix, '', regex=True, case=False)

upload_fix['caption'] = upload_fix['caption'].str.strip()

## Exporting Results to CSV for Weekly Metadata Transfer

In [10]:
# Writing to CSV 
cols = [
        'filename', 'caption', 'date1', 'date2', 'date', 'date3', 'agent', 'role', 'agent2', 'role2',
        'copyright', 'source', 'empty_column', 'notes', 'mon_id', 'class_code', 'classification', 'agent3', 'date_scan',
        'license', 'url', 'rights_text', 'heidata', 'heidoc'
        ]

all_upload = pd.concat([to_upload, to_update, upload_map])
all_upload.to_csv("csv/image_metadata.csv", columns=cols, header=False, sep=';', index=False, quotechar = "\"", quoting=csv.QUOTE_ALL)
