# Minimal Working Example for Image Providers
## Directory structure

In [7]:
%ls
# Does the present directory contain the minimal working example directory 2019-11-21?

2019-11-14-revisions-to-metadata-import.ipynb
[34m2019-11-21[m[m/
2019-11-21-minimal-working-example.ipynb


In [8]:
!mkdir 2019-11-21/data 2019-11-21/out
# Do the source and output subdirectories exist?

mkdir: 2019-11-21/data: File exists
mkdir: 2019-11-21/out: File exists


In [3]:
data = "2019-11-21/data" # Where are the images and metadata tag files?
out  = "2019-11-21/out"  # Where should we put the renamed files and metadata catalog?

## Function definitions

In [4]:
%run -i ../script/rdai
# We run rdai script interactively to obtain function definitions

## Creating the metadata catalog

In [11]:
get_fixed_seq()
# Have we generated a fixed sequence for uuids?
# The global variable fixed_seq needs to be defined prior to calling mint_uuid

In [5]:
normalized_catalog = get_normalized_catalog(data)
# We generate a metadata catalog (unnormalized) from the data directory.

RDAI: Failed to mint a uuid! Call 'get_fixed_seq()' before trying again.
RDAI: Tag EXIF:ImageUniqueID=54237aa2071411eaaf01985aebdcd794 already exists in file 2019-11-21/data/ncar/mesa.jpg.
RDAI: Tag EXIF:ImageUniqueID=c70da35a070d11ea88e2985aebdcd794 already exists in file 2019-11-21/data/ncar/ncar.jpg.
RDAI: Failed to mint a uuid! Call 'get_fixed_seq()' before trying again.


{'contents': [{'file_path': '2019-11-21/data/arc.csv',
   'media_type': 'text/plain',
   'uuid': None},
  {'contents': [{'file_path': '2019-11-21/data/ncar/mesa.jpg',
     'media_type': 'image/jpeg',
     'uuid': '54237aa2071411eaaf01985aebdcd794'},
    {'file_path': '2019-11-21/data/ncar/ncar.jpg',
     'media_type': 'image/jpeg',
     'uuid': 'c70da35a070d11ea88e2985aebdcd794'},
    {'file_path': '2019-11-21/data/ncar/doc.csv',
     'media_type': 'text/plain',
     'uuid': None}],
   'document.start_date': '1892-07-01',
   'document.end_date': '1892-12-31'}],
 'archive.name': 'National Archives, Brisbane',
 'archive.host_country': 'Australia',
 'archive.notes': 'Imaged for the WeatherDetective project',
 'platform.name': '',
 'document.contact_person': 'Christa Pudmenzky <Christa.Pudmenzky@usq.edu.au>',
 'document.id_within_archive': '',
 'document.id_within_archive_type': '',
 'document.record_type': 'Ship log extract',
 'document.standardized_region_list': 'global ocean',
 'documen

In [6]:
catalog = unnormalize_catalog(normalized_catalog)
# We flatten the normalized catalog. 
# Each file in the data directory "has its own entry" in this catalog.
# We'll eventually ignore non-image files.

[{'archive.name': 'National Archives, Brisbane',
  'archive.host_country': 'Australia',
  'archive.notes': 'Imaged for the WeatherDetective project',
  'platform.name': '',
  'document.contact_person': 'Christa Pudmenzky <Christa.Pudmenzky@usq.edu.au>',
  'document.id_within_archive': '',
  'document.id_within_archive_type': '',
  'document.record_type': 'Ship log extract',
  'document.standardized_region_list': 'global ocean',
  'document.start_date': '',
  'document.end_date': '',
  'document.rights_statement': 'CC-BY',
  'document.notes': 'https://data-rescue.copernicus-climate.eu/sites/default/files/2018-10/PudmenzkyC_C3SDRS_CB1Dec2017.pdf',
  'file_path': '2019-11-21/data/arc.csv',
  'media_type': 'text/plain',
  'uuid': None},
 {'archive.name': 'National Archives, Brisbane',
  'archive.host_country': 'Australia',
  'archive.notes': 'Imaged for the WeatherDetective project',
  'platform.name': '',
  'document.contact_person': 'Christa Pudmenzky <Christa.Pudmenzky@usq.edu.au>',
  '

In [7]:
write_timestamped_catalog(catalog, out)
# We write this version of the metadata catalog to the output directory.

## Bundling the images to send to the RDA

In [14]:
catalog = read_timestamped_catalog(out)
# We read in the most recent version of the metadata catalog from the out directory.

In [15]:
elementary_family = [child for child in catalog if child['media_type'].startswith("image")]
# We create a list of all the entries in the catalog that are image files.

In [16]:
import os
# We'll perform some file renames between the data directory and the out directory.

In [13]:
# We move all the images in the catalog to the output directory.
for member in elementary_family:
    os.rename(member['file_path'], os.path.join(out, member['uuid']))

In [12]:
# Conversely, we move all the images in the catalog back to the data directory.
for member in elementary_family:
    os.rename(os.path.join(out, member['uuid']), member['file_path'])