In [1]:
import json
import pandas as pd
import requests
import tqdm

from io import StringIO, BytesIO
from multiprocessing.pool import Pool

# Materials specific packages
from ase.io import read as ase_read
from ase.visualize import view

# MDF imports
from mdf_forge import forge

# Instantiate Base Class

In [2]:
mdf = forge.Forge()

### Perform a raw search

In [3]:
source_name = "cytochrome_qsar"
element = "Cl"

q = "mdf.source_name:{source_name} AND \
          mdf.resource_type:record AND mdf.elements:{element}".format(source_name = source_name, 
                                                                element=element)
res = mdf.search(q, advanced=True)

### Perform the same query with helper function

In [6]:
sources = ["cytochrome_qsar"]
elements = ["Cl"]

res, info = mdf.search_by_elements(elements=elements, sources=sources, info=True)
print(info)
print("Total Matching Records: {n_records}".format(n_records = len(res)))
print(json.dumps(res[:2], sort_keys=True,indent=4, separators=(',', ': ')))

{'total_query_matches': 335303, 'query': {'q': '(mdf.elements:Cl OR mdf.source_name:cytochrome_qsar)', 'advanced': True, 'limit': 10000}}
Total Matching Records: 10000
[
    {
        "mdf": {
            "collection": "OQMD",
            "composition": "Cl1",
            "elements": [
                "Cl"
            ],
            "ingest_date": "2017-08-04T16:28:51.675886Z",
            "links": {
                "landing_page": "http://oqmd.org/analysis/calculation/1289758",
                "metadata": {
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",
                    "path": "/collections/oqmd/data/home/oqmd/libraries/icsd/22406/relaxation/metadata.json"
                },
                "outcar": {
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",
                    "p

## Retrieve SDF file contents and read into a pandas dataframe

In [5]:
def get_sdf(r):
    mdf_base = r['mdf']
    URL = mdf_base['links']['sdf']['http_host']+mdf_base['links']['sdf']['path']
    response = requests.get(URL)
    r_data = ase_read(StringIO(response.text), format="sdf")
    return r_data

tasks = res
n_workers = 5

mp = Pool(n_workers)
mdf_data = mp.map(get_sdf, tasks)
mp.close()
mp.join()

formulae = [d.get_chemical_formula() for d in mdf_data]
df_cyto = pd.DataFrame({"formula":formulae, "data":mdf_data})


KeyError: 'sdf'

## Visualize a retrieved molecule

In [None]:
print(mdf_data[0].get_chemical_formula())
view(mdf_data[0], viewer='x3d')

## Get records via Globus for a larger, mixed source, result set

In [None]:
elements = ["Al","Ti"]
sources = ["trinkle_elastic_fe_bcc", "dilute_mg_alloys_dft"]
my_ep = "c8ee7e5c-6d04-11e5-ba46-22000b92c6ec"
#my_path = "/Users/ben/Desktop/blaiszik-macbookpro/Ti" # This path should be writeable by Globus
my_path = "/Users/jonathongaff/Downloads/tmpstuff"

mdf = Forge()
res = mdf.search_by_elements(elements=elements, sources=sources, limit=10)
mdf.globus_download(res, dest=my_path, 
               local_ep=my_ep, preserve_dir=True)