# Data Discrepancy between HeidICON and DANAM

Compare the filenames between HeidICON and DANAM, and export the results into an excel file.

In [1]:
import pandas as pd
from scripts.compare import *
from scripts.write_csv import list_from_txt
from IPython.display import display_markdown, display, Markdown

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

In [2]:
# read heidicon export
heidicon_export = "heidicon_export.xlsx"
danam_export = "json\DANAM\Monument_2022-11-07_01-16-16.json"

In [3]:
# load both heidicon and danam to pandas
# this might take up to a minute.
heidicon_df, danam_df = load_data(heidicon_export, danam_export)

| key | description |
|-----|-------------|
| mon_id | Monument ID |
| heidicon_img | Number of images in HeidICON |
| heidicon_nometa | Number of images in HeidICON without metadata |
| danam_img | Number of images in DANAM |
| danam_nometa | Number of images in DANAM without a valid caption |
| sds_img | Number of images in SDS |
| files | Files of the monument from SDS, DANAM, and HeidICON as dataframe | 
| missing_danam | Files from HeidICON that are missing in DANAM |
| count_missing_danam | Number of files missing in DANAM |
| missing_heidicon | Files from DANAM that are missing in HeidICON |
| count_missing_heidicon | Number of files missing in HeidICON |

In [4]:
# evaluate file names for all monuments listed in mon/all.mon 
# and export to excel file test.xlsx
# this calculation might take up to 5 minutes, depending 
# on the computer

mon_ids = list_from_txt("mon/all.mon")
all = []
for mon_id in mon_ids:
    all.append(get_info_for_monument(mon_id, heidicon_df, danam_df))
pd.DataFrame(all).to_excel("test.xlsx")

In [4]:
# show the status of a monument and its list of files
# on SDS, DANAM, and HeidICON

mon_id = 'LAL4126'
res = get_info_for_monument(mon_id, heidicon_df, danam_df)

display(Markdown(
"""
| Monument ID   | {0}  |
|---|---|
| Number of images in HeidICON  | {1}  |
| Number of images in HeidICON without metadata  | {2}  |
| Number of images in DANAM  | {3}  |
| Number of image without valid caption in DANAM | {4}  |
| Number of images in SDS  | {5}  |

""".format( res['mon_id'], res['heidicon_img'], res['heidicon_nometa'], 
            res['danam_img'], res['danam_nometa'], res['sds_img'] 
)))
res['files']


| Monument ID   | LAL4126  |
|---|---|
| Number of images in HeidICON  | 16  |
| Number of images in HeidICON without metadata  | 0  |
| Number of images in DANAM  | 18  |
| Number of image without valid caption in DANAM | 0  |
| Number of images in SDS  | 0  |



Unnamed: 0,danam,heidicon
0,LAL4126,LAL4126-001_P_20190206_01
1,LAL4126-001_P_20190206_01,LAL4126_D_2020_floor_plan
2,LAL4126_D__2020FVnE,LAL4126_D_2020_location_map
3,LAL4126_D__2020_floor_plan,LAL4126_D_2020_section
4,LAL4126_D__2020_location_map,LAL4126_D_2020_site_plan
5,LAL4126_D__2020_site_plan,LAL4126_H_1978_01
6,LAL4126_H_01,LAL4126_H_20111209_01
7,LAL4126_H_1920s_anonymous,LAL4126_H_20111209_02
8,LAL4126_H_20111208_01,LAL4126_H_20111209_03
9,LAL4126_H_20111208_02,LAL4126_H_20111209_04
