In [1]:
import json
import pandas as pd
import requests
import tqdm

from io import StringIO
from multiprocessing.pool import Pool

# Materials specific packages
from ase.io import read as ase_read

# MDF Utils
from mdf_forge import forge

# Authentication
Authentication is handled automatically. Just follow the prompt once and let Forge take care of the rest.


In [2]:
mdf = forge.Forge()

# Perform Queries

### List all MDF datasets

In [3]:
query = "mdf.resource_type:dataset"
advanced = True
limit = 9999

res = mdf.search(query, advanced=advanced, limit=limit)
mdf_resources = [(r['mdf']['source_name'],r['mdf']['title']) for r in res]
df = pd.DataFrame(mdf_resources, columns=['source_name','title'])

print("Number of data resources: {n_datasets}".format(n_datasets=len(df)))
df.head(15)

Number of data resources: 41


Unnamed: 0,source_name,title
0,fretr_bayesian_restraint,Benchmark of the FRETR Bayesian restraint
1,core_mof,Computation-Ready Experimental Metal-Organic F...
2,jcap_xps_spectral_db,JCAP XPS Spectral Database
3,gw100,Benchmark of G0W0 on 100 Molecules
4,gw_soc81,Benchmark of G0W0 on 81 Molecules with Spin-Or...
5,nist_th_ar_lamp_spectrum,NIST Spectrum of Th-Ar Hollow Cathode Lamps
6,nist_heat_transmission,NIST Heat Transmission Properties of Insulatin...
7,cys_scanning_phoq,Cys-Scanning Disulfide Crosslinking and Bayesi...
8,sluschi,Solid and Liquid in Ultra Small Coexistence wi...
9,nanomine,NanoMine


### Finding a Specific Dataset

In [4]:
query = "mdf.source_name:oxygen_interstitials_deformation AND mdf.resource_type:dataset"
advanced = True


res = mdf.search(query, advanced=advanced)
print(json.dumps(res[0], sort_keys=True,indent=4, separators=(',', ': ')))

{
    "mdf": {
        "author": [
            {
                "email": "william.joost@gmail.com",
                "family_name": "Joost",
                "full_name": "William Joost",
                "given_name": "William",
                "institution": "University of Maryland"
            },
            {
                "family_name": "Ankem",
                "full_name": "Sreeramamurthy Ankem",
                "given_name": "Sreeramamurthy",
                "institution": "University of Maryland"
            },
            {
                "family_name": "Kuklja",
                "full_name": "Maija Kuklja",
                "given_name": "Maija",
                "institution": "University of Maryland"
            }
        ],
        "citation": [
            "Interaction Between Oxygen Interstitials and Deformation Twins in alpha-Titanium, Acta Materialia v. 105 (2016), pp. 44 - 51 http://dx.doi.org/10.1016/j.actamat.2015.12.019"
        ],
        "collection": "Oxygen Inter

### Get Records for a dataset

In [5]:
query = "mdf.source_name:oxygen_interstitials_deformation AND mdf.resource_type:record"
advanced = True
info = True

res, stats = mdf.search(query, advanced=advanced, info=info)
print("Total Matching Records: "+str(stats["total_query_matches"]))
print(json.dumps(res[0:5], sort_keys=True,indent=4, separators=(',', ': ')))

Total Matching Records: 15
[
    {
        "mdf": {
            "collection": "Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "composition": "OTi160",
            "elements": [
                "Ti",
                "O"
            ],
            "ingest_date": "2017-08-04T19:41:27.857340Z",
            "links": {
                "landing_page": "https://materialsdata.nist.gov/dspace/xmlui/handle/11256/272#1",
                "outcar": {
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",
                    "path": "/collections/oxygen_interstitials_deformation/0L_VASP/ShortCell_0L_C1_DFT/OUTCAR"
                },
                "parent_id": "5984cde4f2c004384aa29e22"
            },
            "mdf_id": "5984cde7f2c004384aa29e23",
            "metadata_version": "0.3.2",
            "resource_type": "record",
            "scroll_id": 1,
            "sourc

### Get records for a given dataset with a specific element in the composition

In [6]:
source_name = "core_mof"
element = "Cl"

query = "mdf.source_name:{source_name} AND \
          mdf.resource_type:record AND mdf.elements:{element}".format(source_name = source_name, 
                                                                      element=element)
advanced = True
limit =20
info = True

res, stats = mdf.search(query, advanced=advanced, limit=limit, info=info)
print("Total Matching Records: "+str(stats['total_query_matches']))
print(json.dumps(res[0:5], sort_keys=True,indent=4, separators=(',', ': ')))

Total Matching Records: 116
[
    {
        "mdf": {
            "collection": "CoRE-MOF",
            "composition": "C24Cl8O16Th2",
            "elements": [
                "O",
                "C",
                "Th",
                "Cl"
            ],
            "ingest_date": "2017-08-04T19:15:03.584690Z",
            "links": {
                "cif": {
                    "http_host": "https://raw.githubusercontent.com",
                    "path": "/gregchung/gregchung.github.io/master/CoRE-MOFs/core-mof-v1.0-ddec/MOBBOU_clean.cif"
                },
                "landing_page": "https://github.com/gregchung/gregchung.github.io/blob/master/CoRE-MOFs/core-mof-v1.0-ddec/MOBBOU_clean.cif",
                "parent_id": "5984c750f2c00437afe55da9",
                "publication": [
                    "10.1039/b109296k"
                ]
            },
            "mdf_id": "5984c7b7f2c00437afe563be",
            "metadata_version": "0.3.2",
            "resource_type": "record