In [1]:
import qmpy_rester as qr

In [2]:
def structure_inspection(structure):
    """
    Need to know the coordination in a given oxide, the 2.5 Å cutoff could be tuned per element
    """
    num_o_sites = 0 
    m_coordinations = []
    for site in structure.sites:
        if str(site.specie)=='O':
            num_o_sites += 1
        else:
            # check nearest neighbours for coordination
            nn_info = structure.get_neighbors(site, 2.5)
            m_coord = 0
            for nn in nn_info:
                #print(str(nn.specie))
                if str(nn.specie)=="O":
                    m_coord+=1
                #assert str(nn.specie)=='O'
            m_coordinations.append(m_coord)
    o_percent = num_o_sites/len(structure.sites)
    ox_state = round((o_percent*2/(1-o_percent)), 3)
    return m_coordinations, ox_state

In [3]:
# Import libraries

import requests
from copy import deepcopy
import os
import shutil
import json

In [4]:
# Set the base URL for OPTIMADE REST queries

rest_base = "http://oqmd.org/optimade/structures?"

In [8]:
elements2consider = ['Ga', 'Ge', 'Se',
    #'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 
    #'Cu', 'Nb', 'Mo', 'Tc', 'Ru', 
    #'Ru', 'Rh', 'Pd', 'Sn', 'Sb', 'Te',
    #'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Tl', 'Ti', 'Nb', 'Bi', 'Pb',  'Si'
                     ]
#elements2consider = ['Te','Ta', 'Se', 'Ti']

# Create the query URL with filter, response_fields and paging requirements

# The following query filters for data of ternary non-metallic oxides
# Crystal structure parameters and Band gap values are returned in response_fields
# Crystal structures will be used to generate representational vectors (input features) for ML
# Bandgap values will be used as targets for ML
oqmd_optimade_queries = []
for ele in elements2consider:

    # filter_   = 'stability<=0 AND elements HAS "O" AND elements HAS "Sn" AND nelements<3'
    # response_ = 'id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap'
    filter_   = 'elements HAS "O" AND elements HAS "{}" AND nelements<3 AND nelements>1'.format(ele)
    response_ = 'id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability'

    page_     = ["page_offset=0", 
                 "page_limit=200"]

    filter_   = 'filter=' + filter_
    response_ = 'response_fields=' + response_
    oqmd_optimade_queries.append(rest_base + "&".join([filter_, response_]+page_))
    print("Created Query: \n\n{}".format(oqmd_optimade_queries[-1]))

Created Query: 

http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Ga" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
Created Query: 

http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Ge" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
Created Query: 

http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Se" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200


In [9]:
response = requests.get(oqmd_optimade_queries[0])
if response.status_code == 200:
    print("Success!")
    #print(response.json())
else:
    print("Query failed. Status: {}".format(response.status_code))
    print("Error Message: {}".format(response.text))

Success!


In [10]:
# We need more than 200 datapoints for machine learning - if more data is available

# As the first step, here's the same script from the cell above, but kept inside a function 

def query_oqmd_optimade(query):
    print("\nQuerying: {}".format(query))
    print(query)
    response = requests.get(query)
    if response.status_code == 200:
        print("Success!")
        return response.json()
    else:
        print("Query failed. Status: {}".format(response.status_code))
        print("Error Message: {}".format(response.text))
        return 

    
    

# Next, we query for 1000 materials in total using 5 sequential API queries - each paginated to
# retrieve 200 materials


load_data_from_saved = False  
# This is to avoid querying OQMD repeatedly for the same data, if .
# Because the data I downloaded is already available as a JSON file in this Git repo.

# But if you'd like to try out querying OQMD, set "load_data_from_saved" as "False"
# Querying OQMD for this particular data would take about 5-10 minutes to complete

datasets = []
for idx, oqmd_optimade_query in enumerate(oqmd_optimade_queries):
    dataset = []
    query = oqmd_optimade_query
    #oqmd_optimade_query
    for i in range(5):
        jsondata = query_oqmd_optimade(oqmd_optimade_query)
        if jsondata is None:
            break
        else:
            # Get the link to the next page and query it in next loop iteration
            query = deepcopy(jsondata['links']['next'])
            dataset.append(deepcopy(jsondata))
    
    filename = 'oqmd_data/json_data/{}_data.json'.format(elements2consider[idx])
    with open(filename, 'w') as fout:
        json.dump(dataset, fout)
    datasets.append(dataset)


Querying: http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Ga" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Ga" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
Success!

Querying: http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Ga" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "

Success!

Querying: http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Se" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
http://oqmd.org/optimade/structures?filter=elements HAS "O" AND elements HAS "Se" AND nelements<3 AND nelements>1&response_fields=id,_oqmd_entry_id,lattice_vectors,cartesian_site_positions,species_at_sites,_oqmd_band_gap,_oqmd_spacegroup,_oqmd_delta_e,_oqmd_stability&page_offset=0&page_limit=200
Success!


In [11]:
datasets[-1]

[{'links': {'next': None,
   'previous': None,
   'base_url': {'href': 'https://oqmd.org/optimade',
    'meta': {'_oqmd_version': '1.0'}}},
  'resource': {},
  'data': [{'id': 4469247,
    'type': 'structures',
    'attributes': {'lattice_vectors': [[2.898481, 0.0, 0.0],
      [0.0, 2.898481, 0.0],
      [0.0, 0.0, 2.898481]],
     'species_at_sites': ['O', 'Se'],
     'cartesian_site_positions': [[0.0, 0.0, 0.0],
      [1.44924, 1.44924, 1.44924]],
     '_oqmd_entry_id': 305924,
     '_oqmd_band_gap': 0.0,
     '_oqmd_delta_e': 0.58,
     '_oqmd_stability': 1.295,
     '_oqmd_spacegroup': 'Pm-3m'}},
   {'id': 4501392,
    'type': 'structures',
    'attributes': {'lattice_vectors': [[1.491937, 0.861371, 3.314609],
      [-1.491937, 0.861371, 3.314609],
      [0.0, -1.722742, 3.314609]],
     'species_at_sites': ['O', 'Se'],
     'cartesian_site_positions': [[0.0, 0.0, 0.0], [0.0, 0.0, 4.97191]],
     '_oqmd_entry_id': 327008,
     '_oqmd_band_gap': 0.0,
     '_oqmd_delta_e': 0.3471,
  

In [12]:
# Here's a function to convert OPTIMADE's structure data to POSCAR. 

# Make sure that 'lattice_vectors', 'species_at_sites', and 'cartesian_site_positions' are
# included in the response_fields of query URL

def get_poscar_from_optimade_structure(structure):
    if '_oqmd_entry_id' in structure['attributes'].keys():
        poscar =  ["REST API StructureID {}, OQMD Entry ID {}".format(
            structure['id'], structure['attributes']['_oqmd_entry_id']
        )]
        filename = "ID-{}_OQMD-EnID-{}.poscar".format(structure['id'],structure['attributes']['_oqmd_entry_id'])
    else:
        poscar =  ["REST API StructureID {}".format(structure['id'])]
        filename = "ID-{}.poscar".format(structure['id'])
        
    poscar.append("1.0")
    
    poscar += [" ".join([str(jtem) for jtem in item]) 
               for item in structure['attributes']['lattice_vectors']
              ]
    
    elems  = []
    counts = []
    for item in structure['attributes']['species_at_sites']:
        if item in elems:
            assert elems.index(item) == len(elems)-1
            counts[-1] += 1
        else:
            elems.append(deepcopy(item))
            counts.append(1)
    poscar.append(" ".join(elems))
    poscar.append(" ".join([str(item) for item in counts]))
    
    poscar.append("Cartesian")
    
    poscar += [" ".join([str(jtem) for jtem in item]) 
               for item in structure['attributes']['cartesian_site_positions']
              ]
    poscar = "\n".join(poscar)
    return (poscar, filename)

In [13]:
datasets[0][0]['data']

[{'id': 4379688,
  'type': 'structures',
  'attributes': {'lattice_vectors': [[2.805856, 0.0, 0.0],
    [0.0, 2.805856, 0.0],
    [0.0, 0.0, 2.805856]],
   'species_at_sites': ['Ga', 'O'],
   'cartesian_site_positions': [[0.0, 0.0, 0.0], [1.40293, 1.40293, 1.40293]],
   '_oqmd_entry_id': 305101,
   '_oqmd_band_gap': 0.0,
   '_oqmd_delta_e': -0.6004,
   '_oqmd_stability': 1.186,
   '_oqmd_spacegroup': 'Pm-3m'}},
 {'id': 4511184,
  'type': 'structures',
  'attributes': {'lattice_vectors': [[1.978074, 1.978074, 0.0],
    [0.0, 3.956147, 0.0],
    [0.0, 0.0, 2.809984]],
   'species_at_sites': ['Ga', 'O'],
   'cartesian_site_positions': [[0.0, 0.0, 0.0], [0.0, 1.97807, 1.40499]],
   '_oqmd_entry_id': 336643,
   '_oqmd_band_gap': 0.0,
   '_oqmd_delta_e': -0.3925,
   '_oqmd_stability': 1.394,
   '_oqmd_spacegroup': 'P4/mmm'}},
 {'id': 5492068,
  'type': 'structures',
  'attributes': {'lattice_vectors': [[0.0, 2.277563, 2.277563],
    [2.277563, 0.0, 2.277563],
    [2.277563, 2.277563, 0.0]],


In [14]:
for idx, ele in enumerate(elements2consider):
    #print(ele)
    if ele=='Si':
        continue
    # Call the OPTIMADE structure -> POSCAR conversion function 
    # and save all structures in directory "./input_poscars"

    # Also save the bandgap values in a file "target_properties.csv"

    poscar_dir = "oqmd_data/input_poscars/input_poscars_{}o2".format(ele)
    if os.path.exists(poscar_dir):
        shutil.rmtree(poscar_dir)
    os.mkdir(poscar_dir)

    properties = []

    for dt in datasets[idx]:
        for st in dt['data']:
            print(st)
            poscar, filename = get_poscar_from_optimade_structure(deepcopy(st))
            target_value = deepcopy(st['attributes']['_oqmd_delta_e'])
            spg = deepcopy(st['attributes']['_oqmd_spacegroup'])
            properties.append(",".join([filename,str(target_value), str(spg)]))
            with open(os.path.join(poscar_dir,filename),"w") as fout:
                fout.write(poscar)
    with open("oqmd_data/target_props/target_properties_{}o2.csv".format(ele),"w") as fout:
        fout.write("filename, _oqmd_delta_e, _oqmd_spacegroup \n")
        fout.write("\n".join(properties))

{'id': 4379688, 'type': 'structures', 'attributes': {'lattice_vectors': [[2.805856, 0.0, 0.0], [0.0, 2.805856, 0.0], [0.0, 0.0, 2.805856]], 'species_at_sites': ['Ga', 'O'], 'cartesian_site_positions': [[0.0, 0.0, 0.0], [1.40293, 1.40293, 1.40293]], '_oqmd_entry_id': 305101, '_oqmd_band_gap': 0.0, '_oqmd_delta_e': -0.6004, '_oqmd_stability': 1.186, '_oqmd_spacegroup': 'Pm-3m'}}
{'id': 4511184, 'type': 'structures', 'attributes': {'lattice_vectors': [[1.978074, 1.978074, 0.0], [0.0, 3.956147, 0.0], [0.0, 0.0, 2.809984]], 'species_at_sites': ['Ga', 'O'], 'cartesian_site_positions': [[0.0, 0.0, 0.0], [0.0, 1.97807, 1.40499]], '_oqmd_entry_id': 336643, '_oqmd_band_gap': 0.0, '_oqmd_delta_e': -0.3925, '_oqmd_stability': 1.394, '_oqmd_spacegroup': 'P4/mmm'}}
{'id': 5492068, 'type': 'structures', 'attributes': {'lattice_vectors': [[0.0, 2.277563, 2.277563], [2.277563, 0.0, 2.277563], [2.277563, 2.277563, 0.0]], 'species_at_sites': ['Ga', 'O'], 'cartesian_site_positions': [[0.0, 0.0, 0.0], [2.2

{'id': 4113867, 'type': 'structures', 'attributes': {'lattice_vectors': [[-1.816313, 1.816313, 4.833074], [1.816313, -1.816313, 4.833074], [1.816313, 1.816313, -4.833074]], 'species_at_sites': ['Ge', 'Ge', 'Ge', 'O'], 'cartesian_site_positions': [[0.0, 1.81631, 2.41654], [1.81631, 0.0, 2.41654], [0.0, 0.0, 4.83308], [0.0, 0.0, 0.0]], '_oqmd_entry_id': 299872, '_oqmd_band_gap': 0.0, '_oqmd_delta_e': 0.1492, '_oqmd_stability': 0.858, '_oqmd_spacegroup': 'I4/mmm'}}
{'id': 4132560, 'type': 'structures', 'attributes': {'lattice_vectors': [[0.0, 3.107368, 3.107368], [3.107368, 0.0, 3.107368], [3.107368, 3.107368, 0.0]], 'species_at_sites': ['Ge', 'Ge', 'Ge', 'O'], 'cartesian_site_positions': [[0.0, 0.0, 0.0], [3.10737, 3.10737, 3.10737], [1.55368, 1.55368, 1.55368], [4.66105, 4.66105, 4.66105]], '_oqmd_entry_id': 310414, '_oqmd_band_gap': 0.0, '_oqmd_delta_e': 0.1339, '_oqmd_stability': 0.843, '_oqmd_spacegroup': 'Fm-3m'}}
{'id': 4149540, 'type': 'structures', 'attributes': {'lattice_vectors

In [15]:
elements2consider

['Ga', 'Ge', 'Se']