# DANAM Image Metadata Notebook
This notebook is used along with the scripts clean_json and write_csv to query and analyze DANAM's image metadata quickly.

Queries is done via Pandas Dataframe.


In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

from datetime import datetime
from IPython.display import Markdown, display

from scripts.clean_json import clean_json


### Read DANAM json export
Always replace with the latest export

In [2]:
# read DANAM json export
danam_export = "json\DANAM\Monument_2022-04-25_04-41-33.json"
danam_images = clean_json(danam_export)
danam_df = pd.DataFrame(danam_images)

## Queries
### Checking images per monument

In [5]:
headers = ['danam_caption', 'empty_column', 'filename_danam', 'filetype',
       'filename_danam_2', 'filetype_2', 'filename', 'mon_id',
       'classification', 'notes', 'heidoc', 'heidata', 'validCaption',
       'caption', 'date1', 'date2', 'date', 'date3', 'agent', 'role', 'agent2',
       'role2', 'copyright', 'source', 'class_code', 'agent3', 'date_scan',
       'license', 'url', 'rights_text', 'lastModified']

In [15]:
# query through metadata table based on mon id

mon_id = 'SKH0270'
print("Monument ID: {}".format(mon_id))
mon = danam_df.loc[(danam_df['mon_id']==mon_id) | (danam_df['mon_id']==mon_id.upper())]
print("Number of images in DANAM: {}".format(mon.shape[0]))

### special queries ###
#mon = mon.loc[mon['filename'].str.contains("_D_")]
#mon = mon.loc[mon['validCaption']==False]
print("Images that matched query: {}".format(mon.shape[0]))

#select_headers = ['validCaption','filename', 'danam_caption', 'caption', 'date', 'agent', 'classification']
select_headers = ['filename', 'danam_caption', 'caption', 'date', 'agent', 'role', 'classification', 'notes', 'copyright', 'source', 'license', 'rights_text']

mon = mon[select_headers].sort_values('filename')
#mon[select_headers].sort_values('filename')


Monument ID: SKH0270
Number of images in DANAM: 14
Images that matched query: 14


### Data Output

In [3]:
## Time Period ##
## format: year, month, date
start = datetime(2021, 10, 1)
end = datetime(2022, 3, 31)
display(Markdown(
    "### Activities between {} and {}"
    .format(start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d"))
    ))

# Filter out danam image entries by the given time period above.
query = danam_df
query = query.loc[query['lastModified'] > start]
query = query.loc[query['lastModified'] < end]

total_monuments = set(query['mon_id'])
inscriptions = query.loc[query['filename'].str.contains("_I_")]
photos = query.loc[query['filename'].str.contains("_P_")]
drawings = query.loc[query['filename'].str.contains("_D_")]
historical = query.loc[query['filename'].str.contains("_H_")]

display(Markdown(
"""
|   |   |
|---|---|
| Monuments updated in this time period  | {0}  |
| Inscriptions updated in this time period  | {1}  |
| Photographs updated in this time period  | {2}  |
| Drawings updated in this time period  | {3}  |
| Historical images updated in this time period  | {5}  |

""".format(len(total_monuments),
            inscriptions.shape[0],
            photos.shape[0],
            drawings.shape[0],
            query.shape[0] - inscriptions.shape[0] - photos.shape[0] - drawings.shape[0],
            historical.shape[0]
)
))

### Activities between 2021-10-01 and 2022-03-31


|   |   |
|---|---|
| Monuments updated in this time period  | 242  |
| Inscriptions updated in this time period  | 394  |
| Photographs updated in this time period  | 2369  |
| Drawings updated in this time period  | 488  |
| Historical images updated in this time period  | 426  |



### Caption Validity

In [3]:
valid_captions = danam_df.loc[danam_df['validCaption']].shape[0]
all_images = danam_df.shape[0]

print("Percentage of images with valid captions: {}".format(valid_captions/all_images))
print("Percentage of images with invalid captions: {}".format(1-(valid_captions/all_images)))


Percentage of images with valid captions: 0.7565614997713763
Percentage of images with invalid captions: 0.24343850022862368
