### This notebook allows the user to evaluate the structure and content of valid XML metadata using the metadataEvaluation Python module. Documentation concepts can be identified for a broad subset of earth science metadata standards. These concepts allow for records to be assessed by many communities conventions for essential discovery metadata. 

#### Now let's select some metadata.

In [1]:
#modules to support functions
import pandas
from ipywidgets import *
import ipywidgets as widgets
from IPython.display import display
import metadataEvaluation

os.makedirs('../data/Combined', exist_ok=True)

#functions to identify and label an xml collection for Metadata Evaluation
def OrganizationChoices(organization):
    global Organization
    Organization=organization
    print("Curating Organization", Organization)
    
def CollectionName(collection):
    global Collection
    Collection=collection
    print("Collection is named", Collection)
    
def DialectName(dialect):
    global Dialect
    Dialect=dialect
    print("Collection is written in the", Dialect,"dialect")   
    
def metadataLocation(directory):
    global MetadataLocation
    MetadataLocation=directory
    print("The local directory", MetadataLocation,"contains the metadata for evaluation")    
        
w=interactive(OrganizationChoices, organization='')    
w2=interactive(CollectionName, collection='')
w3=interactive(DialectName, dialect='')
w4=interactive(metadataLocation, directory='')    

display(w)
display(w2)
display(w3)
display(w4)

Evaluate the collection and create data products

In [53]:
EvaluatedMetadataDF=metadataEvaluation.XMLeval(MetadataLocation, Organization, Collection, Dialect)
SimplifiedEvaluatedMetadataDF=metadataEvaluation.simpleXPathDataProduct(EvaluatedMetadataDF, Organization, Collection, Dialect)
xpathOccurrenceDF=metadataEvaluation.xpathOccurrence(SimplifiedEvaluatedMetadataDF, Organization, Collection, Dialect)
XpathCounts(SimplifiedEvaluatedMetadataDF, Organization, Collection, Dialect)

Unnamed: 0,Concept,Content,Record,XPath
0,Abstract,This report is a summary of meteorological obs...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
1,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WATER...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
2,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WINDS...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
3,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WATER...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
4,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC TEMPE...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
5,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC PRESS...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
6,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > PRECIPITATION > P...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
7,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC RADIA...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
8,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > CLOUDS,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...
9,Additional Attributes - Descriptive Keywords,EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WATER...,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD...


### Combine different types of evaluated data to compare between collections or observe trends over time

In [57]:
#Create a dropdown that allows multiple selection using command and click with the mouse 
#or arrow keys to select additional individual data tables, or 
#shift with the mouse to select each choice between the two clicked.

#Creates a list of all the paths to evaluated collection data.
EvaluatedMetadata=[]
for dirpath, dirnames, filenames in os.walk("../data"):
    for filename in [f for f in filenames if '.csv' in f]:
        EvaluatedMetadata.extend([os.path.join(dirpath, filename)])

#widget for selecting multiple collections        
w6=widgets.SelectMultiple(
    options=sorted(EvaluatedMetadata),
    value=['../data/Sample/Sample_Sample_Evaluated.csv.gz'],
    #rows=10,
    description='Evaluated Data',
    disabled=False
)
#Show the widget



#widget for naming the file
w5=widgets.Text(
    value='',
    placeholder='Your file name (no spaces)',
    description='Name your File:',
    disabled=False
)

display(w5)
display(w6)

create variables from the widget choices and run functions to combine 

In [62]:
CollectionComparisons=w6.value
DataDestination=os.path.join('../data/Combined', str(w5.value)+'.csv')
metadataEvaluation.CombineXPathOccurrence(CollectionComparisons,DataDestination)

In [None]:
display(w5)
display(w6)

In [62]:
CollectionComparisons=w6.value
DataDestination=os.path.join('../data/Combined', str(w5.value)+'.csv')
metadataEvaluation.CombineXPathCounts(CollectionComparisons,DataDestination)

## Now that the data products are created and combined, we can look for a specific lab or across NCAR to discover which records don't contain an element, what content a certain element contains, and if a collection contains essential metadata using the following functions.

In [25]:
fredDF=XpathCounts(EvaluatedMetadataDF, Organization, Collection, Dialect)
df2=fredDF.filter(["/characterSet", "/characterSet/@codeList"])
df2

XPath,/characterSet,/characterSet/@codeList
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
5,1,1
6,1,1
7,1,1
8,1,1
9,1,1


In [27]:
doesntHaveDF=fredDF.loc[fredDF['/metadataMaintenance/maintenanceAndUpdateFrequency'] == 0]
doesntHaveDF

XPath,Collection,Record,/characterSet,/characterSet/@codeList,/characterSet/@codeListValue,/contact,/contact/contactInfo,/contact/contactInfo/address/electronicMailAddress,/contact/individualName,/contact/individualName/@nilReason,...,/language/LanguageCode/@codeListValue,/metadataConstraints/otherConstraints,/metadataConstraints/otherConstraints/@nilReason,/metadataConstraints/useLimitation,/metadataMaintenance/maintenanceAndUpdateFrequency,/metadataMaintenance/maintenanceAndUpdateFrequency/@codeList,/metadataMaintenance/maintenanceAndUpdateFrequency/@codeListValue,/metadataStandardName,/metadataStandardVersion,/parentIdentifier/@nilReason
4,testNCAR_NCAR_ISO,20150320160359_0%5B008(p07_p10)0_00050_16%5D__...,1,1,1,2,1,2,4,1,...,1,1,1,1,0,0,0,2,2,1


In [32]:
content=EvaluatedMetadataDF.loc[EvaluatedMetadataDF['XPath']=='/metadataMaintenance/maintenanceAndUpdateFrequency']
content

Unnamed: 0,Concept,Content,Record,Collection,XPath
211,Unknown,asNeeded,20150305171408_0%5B008(p07_p10)0_00050_16%5D__...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
403,Unknown,asNeeded,20150312190346_0%5B008(p07_p10)0_00050_16%5D__...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
605,Unknown,asNeeded,20150320113229_02___english.xml,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
825,Unknown,asNeeded,20150320155624_0%5B008(p07_p10)0_00050_16%5D__...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
1222,Unknown,asNeeded,bl_nle_1uuu_20150320161300_0_ocn19946898___eng...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
1422,Unknown,asNeeded,ch_pulea_1966_20150320154109_0_ocn122983602___...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
1612,Unknown,asNeeded,dcu_mxf_1956_20150320114638_0_ocm34124669___en...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
1818,Unknown,asNeeded,dcu_udi_1958_20150320113628_0_ocm01332775___en...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
2020,Unknown,asNeeded,ke_dlc_19uu_20150320112153_0_ocm01791199___eng...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...
2201,Unknown,asNeeded,ncu_dlc_19uu_20150320122128_0_ocm02240440___en...,testNCAR_NCAR_ISO,/metadataMaintenance/maintenanceAndUpdateFrequ...


In [36]:
ConceptVerticals=EvaluatedMetadataDF.Concept.unique()
Verticals=ConceptVerticals.tolist()

def ConceptVerticalTable(Concept):
    global VerticalTable
    VerticalTable = EvaluatedMetadataDF[EvaluatedMetadataDF.Concept == Concept]
    return VerticalTable
interact(ConceptVerticalTable, Concept=Verticals) 

<function __main__.ConceptVerticalTable>

In [51]:
from bokeh.io import show, output_file
from bokeh.plotting import figure
VerticalTable.groupby('Content').size()
data = VerticalTable.Content.str.replace(':','.')

p = Bar(data, 'Content', title="Vertical Value Occurance Count", legend=False)

show(p)

NameError: name 'plot' is not defined

In [45]:
output_file("bars.html")


p = figure(y_range=VerticalTable, plot_height=250, title="Content Consistency")

p.hbar(y=VerticalTable, top=[5, 3, 4, 2, 4, 6], width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

TypeError: Could not compare [None] with block values

In [38]:
XPathVerticals=EvaluatedMetadataDF.XPath.unique()
XPathVerticals=XPathVerticals.tolist()
XPathVerticals
def XPathVerticalTable(XPath):
    global XPathVerticalTable
    XPathVerticalTable = EvaluatedMetadataDF[EvaluatedMetadataDF.XPath == XPath]
    return XPathVerticalTable
interact(XPathVerticalTable, XPath=XPathVerticals) 


<function __main__.XPathVerticalTable>