# Querying MP for structure files for Fabini data

In [1]:
from mp_api.client import MPRester
from jarvis.db.jsonutils import dumpjson
from jarvis.db.jsonutils import loadjson
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from jarvis.core.atoms import pmg_to_atoms
from pymatgen.io.jarvis import JarvisAtomsAdaptor as JAA

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
abs = loadjson('fabini_abs.json')

In [3]:
ids = list(abs.keys())

In [4]:
with MPRester("GdU0AaMi4XS8vSb2YOsMRRyEgnD3quCD") as mpr:
    docs = mpr.materials.summary.search(material_ids=ids,fields=["material_id","band_gap","structure","energy_above_hull"])

Retrieving SummaryDoc documents: 100%|███████████████████████████████████████████| 718/718 [00:00<00:00, 8779913.33it/s]


In [5]:
all_mats = []
for doc in docs:
    dic = {}
    ats = JAA.get_atoms(doc.structure)
    struc=ats.to_dict()
    dic['jid'] = str(doc.material_id)
    dic['atoms'] = struc
    dic['MP bandgap'] = doc.band_gap
    all_mats.append(dic)

In [17]:
mp_mats = set()
for i in all_mats:
    mp_mats.add(i['jid'])

In [20]:
missing = [x for x in ids if x not in mp_mats]

In [21]:
missing

['mp-645421',
 'mp-867194',
 'mp-867203',
 'mp-505677',
 'mp-867334',
 'mp-504878',
 'mp-867335',
 'mp-567326',
 'mp-867359']

In [23]:
df = pd.read_csv('fabini_labels.csv')

In [26]:
mpids = list(df['mpid'])
forms = list(df['formula'])

In [27]:
id_to_form = {}
for id, form in zip(mpids,forms):
    id_to_form[id] = form

In [28]:
for i in missing:
    print(id_to_form[i])

LaP5
Sr(MgAs)2
Sr2CdAs2
Hg2MoO4
KYGeS4
VBiO4
K2SbAu
Bi2MoO6
AlTlSe2


In [40]:
new_mpids = {'LaP5':"mp-1864", 'Sr(MgAs)2':"mp-863260","Sr2CdAs2":"mp-863261","Hg2MoO4": "mp-558802", "KYGeS4": "mp-863755","VBiO4":"mp-23044", "K2SbAu": "mp-863757","Bi2MoO6":"mp-23064","AlTlSe2":"mp-863762"}

In [33]:
old_to_new = {}
d_keys = list(new_mpids.keys())
for i,x in enumerate(missing):
    old_to_new[x]=new_mpids[d_keys[i]]

In [34]:
old_to_new

{'mp-645421': 'mp-1864',
 'mp-867194': 'mp-863260',
 'mp-867203': 'mp-863261',
 'mp-505677': 'mp-558802',
 'mp-867334': 'mp-863755',
 'mp-504878': 'mp-23044',
 'mp-867335': 'mp-863757',
 'mp-567326': 'mp-23064',
 'mp-867359': 'mp-863762'}

In [35]:
dumpjson(old_to_new, "fab_replace_mpids.json")

In [36]:
for i in missing:
    ind = ids.index(i)
    ids[ind]=old_to_new[i]

In [37]:
with MPRester("GdU0AaMi4XS8vSb2YOsMRRyEgnD3quCD") as mpr:
    docs = mpr.materials.summary.search(material_ids=ids,fields=["material_id","band_gap","structure","energy_above_hull"])

Retrieving SummaryDoc documents: 100%|███████████████████████████████████████████| 727/727 [00:00<00:00, 5544107.29it/s]


In [38]:
new_mats = []
for doc in docs:
    dic = {}
    ats = JAA.get_atoms(doc.structure)
    struc=ats.to_dict()
    dic['jid'] = str(doc.material_id)
    dic['atoms'] = struc
    dic['MP bandgap'] = doc.band_gap
    new_mats.append(dic)

In [39]:
dumpjson(new_mats,"fab_structures.json")