### First we need to call all of the libraries we need to perform in our metadata wrangle

In [1]:
import pandas as pd
pd.options.display.width = 180
#import os
from os import walk
import shutil
import ipywidgets as widgets
from ipywidgets import *
import requests
import csv
import io

### Now let's select some metadata. 

If you have prepared metadata\* on your computer that you want to add, it is possible to upload into the repository locally using the [Add Metadata](00AddMetadata.ipynb) Notebook before completing the following cells in this notebook. Otherwise, follow along and use some of the sample metadata the following steps will help you to select.

\* Prepared metadata contains a root element that has a standardized namespace and namespace prefix. Many dialects such as ISO and DIF are consistently written this way, but some dialects such as CSDGM are often written by organizations as only well-formed XML.

#### Choose an Organization

In [2]:
#Create a list of subdirectories in the collection directory of MILE2 to select metadata for evaluation
Organizations = []
for (dirpath, dirnames, filenames) in walk('../collection/'):
    Organizations.extend(dirnames)
    break  

#Create a function to select the organization the metadata comes from
def OrganizationChoices(organization):
    global OrganizationChoice
    global Organization
    Organization=organization
    print("Organization of the collection is", Organization)

#organization selector dropdown    
interactive(OrganizationChoices, organization=Organizations)    

#### Choose a collection

In [6]:
# Create a list of collections in the organization directory selected in the dropdown above
Collections = []
for (dirpath, dirnames, filenames) in walk(os.path.join('../collection',Organization)):
    Collections.extend(dirnames)
    break 
    
# Create a function to select the collection the metadata comes from
def CollectionChoices(collection):
    global CollectionChoice
    global Collection
    Collection=collection 
    
#Create a dropdown using the Collections list and the organization selector function. This sets the Collection variable.    
interactive(CollectionChoices, collection=Collections)    

Many organizations support multiple metadata dialects, and share their collections in more than one dialect. This list is created the same way the others are. It adds the different dialects the collection is shared in to a list.

In [7]:
# Many organizations support multiple metadata dialects, and share their collections in more than one dialect. 
# This list is created the same way the others are. It lists the different dialects the collection is shared in.
Dialects = []
for (dirpath, dirnames, filenames) in walk(os.path.join('../collection',Organization,Collection)):
    Dialects.extend(dirnames)
    break 
dialectList=Dialects

#Create a function to select the dialect you want to send to the evaluator service.
def dialectChoice(dialect):
    global Dialect
    Dialect=dialect
    print("Dialect of the collection is", Dialect)

# Create a dropdown using the Dialects list and the dialect selector function. This sets the Dialect variable. 
interactive(dialectChoice,dialect=dialectList)

#### Create the package to send to the Evaluator

In [9]:
# change to the zip directory 
%cd ../zip

# Combine the Organization, Collection, and Dialect variables with the string 'xml' as a relative path 
# and save the string to a variable
MetadataDestination=os.path.join(Organization,Collection,Dialect,'xml')

#Use the path to create a directory structure in the zip directory
os.makedirs(MetadataDestination, exist_ok=True)

# Create a path to the metadata you selected earlier and save the string to a variable, 'MetadataLocation'.
MetadataLocation=os.path.join('../collection/',Organization,Collection,Dialect,'xml')

# copy the metadata to the new directory structure.
src_files = os.listdir(MetadataLocation)
for file_name in src_files:
    full_file_name = os.path.join(MetadataLocation, file_name)
    if (os.path.isfile(full_file_name)):
        shutil.copy(full_file_name, MetadataDestination)
        
# Make a zip file to upload to the evaluator service
shutil.make_archive('../upload/metadata', 'zip', os.getcwd())



/Users/scgordon/MILE2/zip


'/Users/scgordon/MILE2/upload/metadata.zip'

#### Evaluate metadata

Send metadata to the Evaluator. Get the responses with csv encoding. This step can take up to a minute and doesn't track progress, but a dataframe or an error message will be returned.

In [248]:
# Switch to upload directory
%cd ../upload 

# Send metadata package, read the response into a dataframe
url = 'http://metadig.nceas.ucsb.edu/metadata/evaluator'
files = {'zipxml': open('metadata.zip', 'rb')}
r = requests.post(url, files=files, headers={"Accept-Encoding": "gzip"})
r.raise_for_status()
EvaluatedMetadataDF = pd.read_csv(io.StringIO(r.text), quotechar='"')

#build filepaths, directories and file names. Create gzipped archive with collection organization and dialect information
Filedirectory=os.path.join('../data/',Organization)
os.makedirs(Filedirectory, exist_ok=True)
Filename='/'+Collection+'_'+Dialect+'_Evaluated.csv.gz'
SimplfiedFilename='/'+Collection+'_'+Dialect+'_SimplifiedEvaluated.csv.gz'
FilePath=Filedirectory+Filename
SimplifiedFilePath=Filedirectory+SimplfiedFilename
EvaluatedMetadataDF.insert(3, 'Collection', Organization+'_'+Collection+'_'+Dialect)
EvaluatedMetadataDF.to_csv(FilePath, mode = 'w', compression='gzip', index=False)

#Change directories, delete upload directory and zip. Delete copied metadata.
%cd ../
shutil.rmtree('upload')
%cd zip
shutil.rmtree(Organization)
%cd ../data

#Create a simplified XPath output and archive it
EvaluatedSimplifiedMetadataDF = EvaluatedMetadataDF.copy()
EvaluatedSimplifiedMetadataDF['XPath']=EvaluatedSimplifiedMetadataDF['XPath'].str.replace('/gco:CharacterString', '')
EvaluatedSimplifiedMetadataDF['XPath']=EvaluatedSimplifiedMetadataDF['XPath'].str.replace('/[a-z]+:+?', '/')
EvaluatedSimplifiedMetadataDF['XPath']=EvaluatedSimplifiedMetadataDF['XPath'].str.replace('/[A-Z]+_[A-Za-z]+/?', '/')
EvaluatedSimplifiedMetadataDF['XPath']=EvaluatedSimplifiedMetadataDF['XPath'].str.replace('//', '/')
EvaluatedSimplifiedMetadataDF['XPath']=EvaluatedSimplifiedMetadataDF['XPath'].str.rstrip('//')
EvaluatedSimplifiedMetadataDF.to_csv(SimplifiedFilePath, mode = 'w', compression='gzip', index=False)

/Users/scgordon/MILE2
/Users/scgordon/MILE2/zip
/Users/scgordon/MILE2/data


### Select the notebook that prepares the data for different types of analysis

* [Create RAD Data](02RADdf.ipynb)
* [Cross Collection Comparisons](03CrossCollectionComparisons.ipynb)
* [Concept Content Consistency](04ConceptVerticals.ipynb)
* [Exploring Unknown Concepts](05ExploringUnknownConcepts.ipynb)
* [Adding Quality tests](05ExploringUnknownConcepts.ipynb)