## Site Volume Dictionary 
By: Tasha Lewis
01/10/2023

In [1]:
from pymatgen.core import Structure, Element, Lattice, PeriodicSite
from pymatgen.ext.matproj import MPRester
mpr = MPRester()
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from pprint import pprint
import numpy as np
from pydash import get

  from tqdm.autonotebook import tqdm


In [3]:
from maggma.stores.advanced_stores import MongograntStore
from maggma.stores.compound_stores import ConcatStore

In [4]:
from pymatgen.core import Structure
from pymatgen.analysis.local_env import VoronoiNN, CrystalNN
vnn = VoronoiNN()
cnn = CrystalNN()
from pymatgen.analysis.structure_matcher import StructureMatcher
sm = StructureMatcher()

### Connect to Databases

In [5]:
materials = MongograntStore("ro:mongodb07-ext.nersc.gov/fw_acr_mv", "materials_2022", key="task_id")
materials.connect()

ConfigurationError: Unknown option username

In [5]:
# material_id and task_ids
materials = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv", "materials_2022",key="task_id")
materials.connect()

# "material_ids" & "host_material_ids" & "insertion_material_ids" 
sgroup = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv","sgroups_2022",key="_id")
sgroup.connect()

# "material_ids" & "host_structure" & "battery_id" & "entries_composition_summary" 
insert_elec = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv","insertion_electrodes_2022",key="_id")
insert_elec.connect()

# "battery_id" & "migration_graph" & "matrix_supercell_structure" & "inserted_ion_coords
mig_graph = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv","migration_graph_2022",key="task_id")
mig_graph.connect()

aneba_store = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv", "approx_neb_analysis", key="wf_uuid")
aneba_store.connect()

elec_store = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv","vw_elec",key="battid")
elec_store.connect()

# "battery_id" & "host_structure" & "material_ids" & "migration_graph" & "inserted_ion_coords"
new_elec_store = MongograntStore("ro:mongodb03.nersc.gov/fw_acr_mv","rank_electrodes_2022")
new_elec_store.connect()


No credentials for read:mongodb03.nersc.gov/fw_acr_mv found in local config
Requesting credentials from https://grantmedb.materialsproject.org


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# Documents from approx_neb_analysis

In [6]:
aneba_doc_list = list(aneba_store.query({'battid':{'$regex':'Mg'}}))

In [6]:
# aneba_doc_list

In [7]:
len(aneba_doc_list)

147

In [8]:
aneba_doc_list[0]['data'].keys()

dict_keys(['0+1', '5+6', '3+4'])

In [9]:
aneba_doc_list[0]['battid']

'6827_Mg'

## Structures

In [17]:
battery_ids = []
matching_count = 0
for doc in aneba_doc_list:
    battid = doc['battid']
    wf_uuid = doc['wf_uuid']
    struct = Structure.from_dict(doc['host']['input'])
    elements = list(struct.composition.as_dict().keys())
    elec_doc = elec_store.query_one({"approx_neb_wf_uuid":wf_uuid,'battid':battid})  # The old way to query with battery id and wf uuid
    
    if elec_doc:
            material_ids = [str(i) for i in elec_doc['material_ids']]   # get material ids
            material_ids.extend(['js-'+i for i in material_ids])  
            
            if new_elec_store.count({'material_ids':{'$in':material_ids},'elements':{'$all':elements}}) == 1:
                elec_doc = new_elec_store.query_one({'material_ids':{'$in':material_ids},'elements':{'$all':elements}})
                mg = MigrationGraph.from_dict(elec_doc['migration_graph'])   # get migration graph
                battery_id = elec_doc['battery_id']
                battery_ids.append(battery_id)
                matching_count += len(list(elec_doc))
                print(matching_count)
                #host_structure = elec_doc['battery_id']
        #print(battery_ids[0])
                #print("structures don't match for "+battid) if sm.fit(struct,mg.host_structure)==False else None
        
            #else:
                #print(str(new_elec_store.count({'material_ids':{'$in':material_ids},'elements':{'$all':elements}}))+ ' documents found for '+battid,elec_doc['formula_charge'])         
            

49
98
147
196
245
294
343
392
441
490
539
588
637
686
735
784
833
882
931
980
1029
1078
1127
1176
1225
1274
1323
1372
1421
1470
1519
1568
1617
1666
1715
1764
1813
1862
1911
1960
2009
2058
2107
2156
2205
2254
2303
2352
2401
2450
2499
2548
2597
2646
2695
2744
2793
2842
2891
2940
2989
3038
3087
3136
3185
3234
3283
3332
3381
3430
3479


In [134]:
matching_count = 0
battery_id = []

for doc in aneba_doc_list:
    battid = doc['battid']
    wf_uuid = doc['wf_uuid']
    struct = Structure.from_dict(doc['host']['input'])
    elements = list(struct.composition.as_dict().keys())
    elec_doc = elec_store.query_one({"approx_neb_wf_uuid":wf_uuid,'battid':battid})  # The old way to query with battery id and wf uuid
    
    matching_count += elec_store.count({"approx_neb_wf_uuid": wf_uuid, 'battid': battid})
    pprint(elements)
#print("Matching count:", matching_count)

    if elec_doc: 
        material_ids = [str(i) for i in elec_doc['material_ids']]   # get material ids
        material_ids.extend(['js-'+i for i in material_ids]) 
        #pprint(len(list(material_ids)))
        if new_elec_store.query_one({'elements':{'$all':elements}}) == 1:
            elec_doc_new= new_elec_store.find_one({'elements':{'$all':elements}})
            #mg = MigrationGraph.from_dict(elec_doc['migration_graph'])   # get migration graph
            #battery_id = elec_doc['battery_id']
            #battery_ids.append(battery_id)
            #host_struct = Structure.from_dict(elec_doc["host_structure"]["sites"])
            #working_ion = elec_doc['working_ion']
            #pprint(elec_doc.as_dict())
            #pprint(elec_doc_new["elements"])
        else:
            print("No matching document found for material_ids and elements")
            

['Nb', 'Ag', 'P', 'S']
['Cr', 'Ag', 'O']
No matching document found for material_ids and elements
['V', 'P', 'O']
No matching document found for material_ids and elements
['Cr', 'O', 'F']
No matching document found for material_ids and elements
['Fe', 'Ni', 'Sb', 'P', 'O']
No matching document found for material_ids and elements
['Rb', 'Sn', 'I', 'O']
No matching document found for material_ids and elements
['Na', 'Ni', 'O']
No matching document found for material_ids and elements
['V', 'P', 'O']
No matching document found for material_ids and elements
['Mn', 'Co', 'O']
No matching document found for material_ids and elements
['Yb', 'Cu', 'Ge', 'O']
No matching document found for material_ids and elements
['Ta', 'W', 'S']
No matching document found for material_ids and elements
['Ag', 'Te', 'Mo', 'O']
No matching document found for material_ids and elements
['Cu', 'O', 'F']
No matching document found for material_ids and elements
['V', 'O', 'F']
No matching document found for material_

In [None]:
        
#     if elec_doc:
#         material_ids = [str(i) for i in elec_doc['material_ids']]   # get material ids
#         material_ids.extend(['js-'+i for i in material_ids])  
        
#         if new_elec_store.count_documents({'material_ids':{'$in':material_ids},'elements':{'$all':elements}}) == 1:
#             matching_count += 1
#             elec_doc = new_elec_store.find_one({'material_ids':{'$in':material_ids},'elements':{'$all':elements}})
#             mg = MigrationGraph.from_dict(elec_doc['migration_graph'])   # get migration graph
#             battery_id = elec_doc['battery_id']
#             battery_ids.append(battery_id)
        

In [None]:
doc_list = list(new_elec_store.query({'battery_id':{'$regex':'Mg'}}))
doc_list

## Elec doc: How many match?

In [None]:
matching_count = 0
for doc in aneba_doc_list:
    battid = doc['battid']  # Old battery id
    wf_uuid = doc['wf_uuid']
    
    elec_doc = elec_store.query({'approx_neb_wf_uuid': wf_uuid, 'battid': battid})
    #new_elec_doc = new_elec_store.query({'approx_neb_wf_uuid': wf_uuid})
    matching_count += len(list(elec_doc))

print("Number of matching documents in elec_store:", matching_count)

In [None]:
#lec_doc = elec_store.query({'approx_neb_wf_uuid': wf_uuid, 'battid': battid})
#list(elec_doc[0])

# Ex

In [None]:
doc_1 = aneba_doc_list[0]
keys_1 = list(doc_1['data'].keys())
#struct_1 = Structure.from_dict(doc_1['data'][keys_1[0]]['input_structure'])
Structure.from_dict(doc_1['data'][keys_1[0]]['input_structures'][0])

In [None]:
for n,d in vnn.get_voronoi_polyhedra(struct_1,0).items():
    print(n,d["volume"])

## Battid list

In [None]:
battid_list = []
for doc in aneba_doc_list:
    battid = doc['battid']
    battid_list.append(battid)
    #battid_list.append({"battid": battid})
battid_list[0]

## Information about materials in aneba doc

In [None]:
def information(battid):
    doc = aneba_store.query_one({"battid": battid})
    battid = doc['battid'] 
    wf_uuid = doc['wf_uuid']
    hop_keys = list(doc['data'].keys())
    struct = Structure.from_dict(doc['data'][hop_keys[0]])
    #struct_input = Structure.from_dict(doc['host']['input'])
    
    #struct_output = Structure.from_dict(doc['host']['output'])
    #elements = list(struct.composition.as_dict().keys())
    return {"wf_uuid":wf_uuid,
            "battid":battid,
            "hop_keys":hop_keys,
            
            #"structs_input":struct_input,
            #"structs_output":struct_output,
            
            #"doc":doc
            }

In [None]:
information(battid_list[0])

In [None]:
elec_doc[0]

## Base task id site volume

In [None]:
for doc in elec_doc:
    battid = doc['battid']  # Old battery id
    wf_uuid = doc['wf_uuid']
    #struct = Structure.from_dict(doc['host']['input'])
print(battid)

In [None]:
structs = [Structure.from_dict(s) for s in doc["data"][hop_key]["output_structures"]]

## Approx Neb site volume

In [None]:
for doc in aneba_doc_list:
    battid = doc['battid']  # Old battery id
    wf_uuid = doc['wf_uuid']
    struct = Structure.from_dict(doc['host']['input'])
    elements = list(struct.composition.as_dict().keys())
    if elec_store.count({'battid':battid})>1:
        print('more than one elec doc found for battid'+battid)
        
    elec_doc = elec_store.query_one({"approx_neb_wf_uuid":wf_uuid,'battid':battid})  # The old way to query with battery id and wf uuid
    if elec_doc:
        material_ids = [str(i) for i in elec_doc['material_ids']]   # get material ids
        material_ids.extend(['js-'+i for i in material_ids])  
        if new_elec_store.count({'material_ids':{'$in':material_ids},'elements':{'$all':elements}}) == 1:
            elec_doc = new_elec_store.query_one({'material_ids':{'$in':material_ids},'elements':{'$all':elements}})
            mg = MigrationGraph.from_dict(elec_doc['migration_graph'])   # get migration graph
            print("structures don't match for "+battid) if sm.fit(struct,mg.host_structure)==False else None
        else:
            print(str(new_elec_store.count({'material_ids':{'$in':material_ids},'elements':{'$all':elements}}))+ ' documents found for '+battid,elec_doc['formula_charge'])         
    

## Migration Graph

In [None]:
doc_mig = mig_graph.query_one({"battery_id": "19117_Mg"})