In [None]:
import os
import json
import gzip
import multiprocessing as mp
import warnings
warnings.filterwarnings('ignore')
from tqdm.autonotebook import tqdm
from atomate2.lobster.schemas import LobsterTaskDocument, CondensedBondingAnalysis
from pydantic import create_model
from pymatgen.io.lobster import Charge,MadelungEnergies

In [None]:
parent=os.getcwd()
os.chdir(parent)
os.chdir('Results/') #Directory consisting of lobster outputs

In [None]:
mpids= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids]))
mats.sort()

In [None]:
def get_lobster_lightweight_json(mpid, json_save_dir='/path/to/store/lobsterlightweightjsons/'):
    
    directory = mpid
    
    which_bonds=['cation-anion','all']
    with gzip.open(os.path.join(json_save_dir, "{}.json.gz".format(mpid)), 'wt',encoding='UTF-8') as f:
            f.write('[')
            for which_bond in which_bonds:
                    try:
                        (lobsterpy_data,
                         lobsterpy_text,
                         sb_icobi,
                         sb_icohp,
                         sb_icoop) = CondensedBondingAnalysis.from_directory(dir_name=mpid, which_bonds=which_bond,
                                                                       save_cohp_plots=False)

                        bond_type = which_bond.replace('-','_')
                        dict_data= {bond_type+'_bonds':{
                        'lobsterpy_data': lobsterpy_data.dict(),
                        'lobsterpy_text': ["".join(lobsterpy_text.text)],
                        'sb_icobi': sb_icobi.dict(),
                        'sb_icohp': sb_icohp.dict(),
                        'sb_icoop': sb_icoop.dict(),
                        }}

                        for item in dict_data[bond_type+'_bonds']['lobsterpy_data']['cohp_plot_data'].items():
                            key, value = item
                            # check if item has a `as_dict` method (i.e. it is a pymatgen object)
                            if hasattr(value, 'as_dict'):
                                dict_data[bond_type+'_bonds']['lobsterpy_data']['cohp_plot_data'][key]=value.as_dict()            
                        json.dump(dict_data, f)

                        if which_bond != which_bonds[-1]: 
                            f.write(',') #add comma seperator between two dicts
                            
                    except AttributeError:
                        dict_data={bond_type+'_bonds':{}}
                        json.dump(dict_data, f)
                        
                        if which_bond != which_bonds[-1]: 
                            f.write(',') #add comma seperator between two dicts
                            
            f.write(',')        
            madelung_energies_path = os.path.join(mpid, "MadelungEnergies.lobster.gz")
            charge_path = os.path.join(mpid,"CHARGE.lobster.gz")
            madelung_obj = MadelungEnergies(filename=madelung_energies_path)

            madelung_energies = {'madelung_energies':{
                "Mulliken": madelung_obj.madelungenergies_Mulliken,
                "Loewdin": madelung_obj.madelungenergies_Loewdin,
                "Ewald_splitting": madelung_obj.ewald_splitting,
            }}
            json.dump(madelung_energies, f)
            f.write(',')
            charge = Charge(charge_path)
            charges = {"charges":{"Mulliken": charge.Mulliken, "Loewdin": charge.Loewdin}}
            json.dump(charges, f)
            f.write(']')
    return mpid+' Done'

In [None]:
# Caution : Make sure to change the number of parallel processes as per your system
with mp.Pool(processes=12,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap_unordered(get_lobster_lightweight_json, mats, chunksize=1),
        total=len(mats),
    )
    row=[]
    for result in results:
        row.append(result)