# Part 5 - Field-Specific Helper Functions

In [1]:
from mdf_forge.forge import Forge

In [2]:
mdf = Forge()

## Field-Specific Query Builders

### match_source_names
`match_source_names()` matches values against the `"mdf.source_name"` field. It is equivalent to chaining `match_field("mdf.source_name", value)` for each value.

In [3]:
mdf.match_source_names("oqmd")

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [4]:
res = mdf.search(limit=10)
res[0]

{'crystal_structure': {'number_of_atoms': 4,
  'space_group_number': 225,
  'volume': 93.2374},
 'dft': {'converged': True,
  'cutoff_energy': 520.0,
  'exchange_correlation_functional': 'PBE'},
 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',
   'filename': '332513.json',
   'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/332513.json',
   'length': 10403,
   'mime_type': 'text/plain',
   'sha512': 'a4732793bff687ea39f43f741d6e3f380bb3608d38114420569282005efaee92e21799e6d6c3a4c63d7cf8b42d6388a81bc992d2a71709fd4d4f05cb3e6bb077',
   'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/332513.json'}],
 'material': {'composition': 'Mn1Pa1Pm2', 'elements': ['Mn', 'Pa', 'Pm']},
 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',
  'mdf_id': '5be5e3af2ef388650efd6705',
  'parent_id': '5be5e3ab2ef388650efd6704',
  'resource_type': 'record',
  'scroll_id': 1,
  's

### match_elements
`match_elements()` matches values against the `"materials.elements"` field. It is equivalent to chaining `match_field("materials.elements", value)` for each value.

In [5]:
mdf.match_elements(["Al", "Cu"])

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [6]:
res = mdf.search(limit=10)
res[0]

{'cip': {'bv': '95.5',
  'energy': '-3.48',
  'forcefield': 'AlCu.eam.alloy',
  'gv': '14.3',
  'mpid': 'mp-998',
  'totenergy': '-333.69096'},
 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',
   'filename': 'classical_interatomic_potentials.json',
   'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',
   'length': 1841203,
   'mime_type': 'text/plain',
   'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',
   'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],
 'material': {'composition': 'Al64 Cu32', 'elements': ['Al', 'Cu']},
 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',
  'mdf_id': '5bd747d32ef3880b0f2142a6',
  'parent_id': '5bd747cd2ef3880b0f2135d1',
  'resource_type': 'record',
  '

### match_resource_types
`match_resource_types()` matches values against the `"mdf.resource_types"` field. It is equivalent to chaining `match_field("mdf.resource_types", value)` for each value.

In [7]:
mdf.match_resource_types("record")

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [8]:
res = mdf.search(limit=10)
res[0]

{'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',
   'filename': 'nist_xps_27469.json',
   'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27469.json',
   'length': 1196,
   'mime_type': 'text/plain',
   'sha512': 'da4cac24fce125d061d6726a43439024dcb6be4ddf6664a93ff30c36755d0383ad47da76a4b9002dc754a3b2784c664a21432f292021a6c19c7d71f08940df3e',
   'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27469.json'}],
 'material': {'composition': 'InP', 'elements': ['In', 'P']},
 'mdf': {'ingest_date': '2018-11-06T16:57:59.847843Z',
  'mdf_id': '5be1c8512ef3883312755ed3',
  'parent_id': '5be1c8172ef388331274efdf',
  'resource_type': 'record',
  'scroll_id': 28404,
  'source_id': 'nist_xps_db_v1',
  'source_name': 'nist_xps_db',
  'version': 1},
 'nist_xps_db': {'binding_energy_ev': '186.3',
  'energy_uncertainty_ev': '',
  'notes': ''

### match_ids
`match_ids()` matches values against the `"mdf.mdf_id"` field. It is equivalent to chaining `match_field("mdf.mdf_id", value)` for each value.

In [9]:
an_id = res[1]["mdf"]["mdf_id"]
mdf.match_ids(an_id)

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [10]:
mdf.search()

[{'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',
    'filename': 'nist_xps_27079.json',
    'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27079.json',
    'length': 1083,
    'mime_type': 'text/plain',
    'sha512': '2ffed70fbcc42c97119ced5905a51b651a86186586da5bc0b434f5904552d8482bfbdffead338aeab3e9db59bd3bcfbeb5a9e4e8d745736601c5d7beab84e0fe',
    'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27079.json'}],
  'material': {'composition': 'ZnI2', 'elements': ['I', 'Zn']},
  'mdf': {'ingest_date': '2018-11-06T16:57:59.847843Z',
   'mdf_id': '5be1c8512ef3883312755ed4',
   'parent_id': '5be1c8172ef388331274efdf',
   'resource_type': 'record',
   'scroll_id': 28405,
   'source_id': 'nist_xps_db_v1',
   'source_name': 'nist_xps_db',
   'version': 1},
  'nist_xps_db': {'binding_energy_ev': '1022.9',
   'energy_uncertainty_ev

### match_titles
`match_titles()` matches values against the `"dc.titles.title"` field. It is equivalent to chaining `match_field("dc.titles.title", value)` for each value.

Remember, values with special characters (like spaces) need to be wrapped in double quotes.

In [11]:
mdf.match_titles('"The Open Quantum Materials Database"')

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [12]:
mdf.search()

[{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/',
   'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/oqmd_v13/'},
  'dc': {'creators': [{'affiliations': ['Northwestern University'],
     'creatorName': 'Wolverton, Chris',
     'familyName': 'Wolverton',
     'givenName': 'Chris'},
    {'affiliations': ['Northwestern University'],
     'creatorName': 'Kirklin, Scott',
     'familyName': 'Kirklin',
     'givenName': 'Scott'},
    {'affiliations': ['Northwestern University'],
     'creatorName': 'Hegde, Vinay',
     'familyName': 'Hegde',
     'givenName': 'Vinay'},
    {'affiliations': ['Northwestern University'],
     'creatorName': 'Ward, Logan',
     'familyName': 'Ward',
     'givenName': 'Logan'}],
   'descriptions': [{'description': 'The OQMD is a database of DFT-calculated thermodynamic and structural properties.',
     'descriptionType'

### match_years
`match_years()` matches values against the `"dc.publicationYear"` field.

In [13]:
mdf.match_years(["2015", 2010])

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [14]:
res = mdf.search(limit=10)
res[0]

{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/mdr_item_571_v1/',
  'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/mdr_item_571_v1/'},
 'dc': {'alternateIdentifiers': [{'alternateIdentifier': 'http://hdl.handle.net/11256/272',
    'alternateIdentifierType': 'Handle'},
   {'alternateIdentifier': '571',
    'alternateIdentifierType': 'NIST DSpace ID'}],
  'creators': [{'affiliations': ['University of Maryland'],
    'creatorName': 'Joost, William J.',
    'familyName': 'Joost',
    'givenName': 'William J.'},
   {'affiliations': ['University of Maryland'],
    'creatorName': 'Ankem, Sreeramamurthy',
    'familyName': 'Ankem',
    'givenName': 'Sreeramamurthy'},
   {'affiliations': ['University of Maryland'],
    'creatorName': 'Kuklja, Maija M.',
    'familyName': 'Kuklja',
    'givenName': 'Maija M.'}],
  'publicationYear': '2015',
  'publisher': 'NIST

You can also specify a range of years.

In [15]:
mdf.match_years(start=2014, stop=2016, inclusive=True)

<mdf_forge.forge.Forge at 0x7fca4827ae80>

In [16]:
res = mdf.search(limit=10)
res[0]

{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/khazana_vasp_v4/',
  'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/khazana_vasp_v4/'},
 'dc': {'contributors': [{'affiliations': ['University of Connecticut'],
    'contributorName': 'Ramprasad, Rampi',
    'contributorType': 'ContactPerson',
    'familyName': 'Ramprasad',
    'givenName': 'Rampi'}],
  'creators': [{'affiliations': ['University of Connecticut'],
    'creatorName': 'Ramprasad, Rampi'}],
  'dates': [{'date': '2017-08-04T19:25:05.718973Z', 'dateType': 'Collected'}],
  'descriptions': [{'description': 'A computational materials knowledgebase',
    'descriptionType': 'Other'}],
  'publicationYear': '2016',
  'publisher': 'MDF (placeholder)',
  'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},
  'subjects': [{'subject': 'DFT'}, {'subject': 'VASP'}],
  'titles': [{'tit

### match_repositories
`match_repositories()` matches values against the `"mdf.repositories"` field. It is equivalent to chaining `match_field("mdf.repositories", value)` for each value.

In [17]:
mdf.match_repositories(["NIST", "DOE"], match_all=False)

In [18]:
res = mdf.search(limit=10)
res[0]

{'files': [{'data_type': 'TIFF image data, big-endian, direntries=13, height=0, bps=8, PhotometricIntepretation=BlackIsZero, description=ImageJ=1.50i, width=0',
   'filename': 'eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif',
   'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif',
   'length': 213607,
   'mime_type': 'image/tiff',
   'sha512': '8460d1b7f8543bd909245d0e3456e5e58207245e619172e8b0631f7d7da7a00e6af94d0f9fcaf47be8e09984ef96826e098172c2ac968fb25c237abd223f62f1',
   'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif'}],
 'image': {'height': 393, 'megapixels': 0.213399, 'width': 543},
 'mdf':

## Premade Searches

### search_by_elements
`search_by_elements()` executes a search for the provided elements in the provided sources and returns the results. Note that calling `search()` is not required, as this helper function already performs the search.

In [19]:
res = mdf.search_by_elements(["Al", "Cu"], source_names=["oqmd"])
res[0]

{'crystal_structure': {'cross_reference': {'icsd': 150823},
  'number_of_atoms': 4,
  'space_group_number': 225,
  'volume': 49.3454},
 'dft': {'converged': True,
  'cutoff_energy': 520.0,
  'exchange_correlation_functional': 'PBE'},
 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',
   'filename': '1815.json',
   'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/1815.json',
   'length': 11693,
   'mime_type': 'text/plain',
   'sha512': '3f26300e0c9d4ce4a53ac5169b3cb8720927263f34d3654e3134fe52e3c0069c41c2db2d38f26cffd28d48073e266914d9a1fd517c344e022b11d4dfe94876a8',
   'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/1815.json'}],
 'material': {'composition': 'Al1Cu3', 'elements': ['Al', 'Cu']},
 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',
  'mdf_id': '5be5e4122ef388650efdf50d',
  'parent_id': '5be5e3ab2ef388650efd6704',
  'resource_type': 'record'

### search_by_titles
`search_by_titles()` executes a search for the provided elements in the provided sources and returns the results.

In [20]:
res = mdf.search_by_titles(['"High-throughput Ab-initio Dilute Solute Diffusion Database"'])
res[0]

{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/',
  'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/'},
 'dc': {'contributors': [{'affiliations': ['University of Wisconsin-Madison'],
    'contributorName': 'Morgan, Dane',
    'contributorType': 'ContactPerson',
    'familyName': 'Morgan',
    'givenName': 'Dane'}],
  'creators': [{'affiliations': ['University of Wisconsin-Madison'],
    'creatorName': 'Morgan, Dane',
    'familyName': 'Morgan',
    'givenName': 'Dane'},
   {'affiliations': ['University of Wisconsin-Madison'],
    'creatorName': 'Mayeshiba, Tam',
    'familyName': 'Mayeshiba',
    'givenName': 'Tam'},
   {'affiliations': ['University of Wisconsin-Madison'],
    'creatorName': 'Henry, Wu',
    'familyName': 'Henry',
    'givenName': 'Wu'}],
  'dates': [{'date': '2017-08-07T16:

### aggregate_sources
`aggregate_source_names()` fetches and returns all the records for a provided `"mdf.source_name"` value. Calling `search()` or `aggregate()` is not required, as this helper function does that for you. Please note that it is not possible to use the `limit` argument with this helper function, so you may get back a large number of results.

In [21]:
res = mdf.aggregate_sources("ge_nanoparticles")
res[0]

{'crystal_structure': {'number_of_atoms': 131.0,
  'space_group_number': 1,
  'stoichiometry': 'A60B71',
  'volume': 8000.0},
 'files': [{'data_type': 'ASCII text',
   'filename': 'ge.cell',
   'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ge_nanoparticles_v1/CASTEP_DFPT/Ge71H60/50GPa/ge.cell',
   'length': 5686,
   'mime_type': 'text/plain',
   'sha512': '979c4f3b9cc84424d174f5f3faa5bc9c977f526b7f29c52bc99f2e5c2eeb80e98accdecd37b905b84061310f3fd810c93b756e4aaa80f3bd6c30bdf3389b5a3f',
   'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ge_nanoparticles_v1/CASTEP_DFPT/Ge71H60/50GPa/ge.cell'}],
 'material': {'composition': 'Ge71H60', 'elements': ['Ge', 'H']},
 'mdf': {'ingest_date': '2018-11-06T17:25:23.949096Z',
  'mdf_id': '5be1ce832ef388345774efe1',
  'parent_id': '5be1ce832ef388345774efdf',
  'resource_type': 'record',
  'scroll_id': 2,
  'source_id': 'ge_nanoparticles_v1',
  'source_name': 'ge_nanopartic