# Imports required to query from/work with AiiDa

In [1]:
import numpy as np
from aiida import load_profile
from aiida.orm import load_node, Node, Group, Computer, User, CalcJobNode, Code, Data, StructureData, KpointsData, Dict, UpfData, Int, WorkChainNode, List, CifData, CalcFunctionNode
from aiida.plugins import CalculationFactory, DataFactory
from aiida.orm import QueryBuilder
from aiida.orm import load_node
import pandas as pd
import copy
# Load our profile
load_profile()

<aiida.manage.configuration.profile.Profile at 0x7fd6345109a0>

## Use this to analyze JSON Data

In [30]:
qb = QueryBuilder()
qb.append(Group, tag='group', filters={'type_string': {'!==': 'core.import'}}, project='description')
qb.append(CifData, with_group='group', filters={'extras.tag4': 'orig_cif'}, project='attributes.filename')
qb.append(Node, with_group='group', filters={'extras.tag4': 'isot_ch4', 'attributes.is_porous': True}, project='attributes')
cofs = qb.all()

gases = ["isotmt_h2", "isot_o2", "isot_co2", "isot_ch4", "isot_n2",  "kh_xe", "kh_kr", "kh_h2o", "kh_h2s"]
# isotmt -> all pressures all temperatures, h2
# isot -> n2 co2 varying pressures and o2 ch4 all pressures
# kh only henry

print(len(cofs))





585


## Query for all gases
### Certain gases have have different properties so they need to be queried slightly differently
#### co2 and n2 have ranging pressures, thus we only take the constant pressures which are 0.001 and 30. o2 and ch4 have constant pressures across queried COFs so we take all of them. Finally, h2 has 3 temperatures all with different pressures and values so we need to pull extra data.

# Check if there is multiple temperatures compared to hard coding isot and isotmt

In [21]:
# informative names
# change to dictionary
# return the dataframe for the gas
def DataCollection(cofs, gas):
    gasPrefix, gasName = gas.split("_")
    cofData = pd.DataFrame()
        
    # For each COF
    for cof in range(len(cofs)):
        # Create a temporary storage
        currentCof = {}
        
        # Add the COF name
        currentCof["cof"] = cofs[cof][1]
        
        # Check which gas type we are working with
        if gasPrefix == "isot":
            # co2 and n2 have massively varying pressures, thus we only add 0.001bar and 30bar
            if(gasName == "co2" or gasName == "n2"):
                currentCof['{0}_{1}bar'.format(gasName, cofs[0][2]['isotherm']['pressure'][0])] = cofs[cof][2]['isotherm']['loading_absolute_average'][0]
                currentCof['{0}_{1}bar'.format(gasName, cofs[0][2]['isotherm']['pressure'][-1])] = cofs[cof][2]['isotherm']['loading_absolute_average'][-1]
                
            # o2 and ch4 don't vary, so we add all pressure values
            else:
                for pressure in range(len(cofs[cof][2]['isotherm']['pressure'])):
                    currentCof['{0}_{1}bar'.format(gasName, cofs[cof][2]['isotherm']['pressure'][pressure])] = cofs[cof][2]['isotherm']['loading_absolute_average'][pressure]
                
            # In both cases we want the valid units
            currentCof['{0}_ads_unit'.format(gasName)] = cofs[cof][2]['isotherm']['loading_absolute_unit']            
            
        # If we are working with H2 we have several pressures at different temperatures
        elif gasPrefix == "isotmt":
            
            # Iterate through each temperature pressure list and add it
            for pressureList in range(len(cofs[0][2]['temperature'])):
                for pressure in range(len(cofs[0][2]['isotherm'][pressureList]['pressure'])):
                    currentCof['{0}_{1}K_{2}bar'.format(gasName, cofs[0][2]['temperature'][pressureList], int(cofs[0][2]['isotherm'][pressureList]['pressure'][pressure]))] = cofs[cof][2]['isotherm'][pressureList]['loading_absolute_average'][pressure]
                
            # Need the units
            currentCof['{0}_ads_unit'.format(gasName)] = cofs[cof][2]['isotherm'][0]['loading_absolute_unit']            

            
        # Disregards H2 and adds henry units for all others
        # H2 has a massive list of henry coefficient averages because of the different temperature values
        if gasPrefix != "isotmt":
            currentCof['{0}_henry'.format(gasName)] = cofs[cof][2]['henry_coefficient_average']
            currentCof['{0}_henry_unit'.format(gasName)] = cofs[cof][2]['henry_coefficient_unit']
        
        
        # append to dataframe
        cofData = cofData.append(currentCof, ignore_index=True)
        
    return cofData

# Driver

In [41]:
# Hold all of our dataframes to be merged at the end
dataFrames = {}
# Nine gases
gases = ["isotmt_h2", "isot_o2", "isot_co2", "isot_ch4", "isot_n2",  "kh_xe", "kh_kr", "kh_h2o", "kh_h2s"]

for gas in gases:
    # Query for the specific gas
    qb = QueryBuilder()
    qb.append(Group, tag='group', filters={'type_string': {'!==': 'core.import'}}, project='description')
    qb.append(CifData, with_group='group', filters={'extras.tag4': 'orig_cif'}, project='attributes.filename')
    qb.append(Node, with_group='group', filters={'extras.tag4': gas, 'attributes.is_porous': True}, project='attributes')
    res = qb.all()    
    
    # Collect data for that gas
    dataFrames[gas] = DataCollection(res, gas)
    

Unnamed: 0,cof,o2_1bar,o2_5bar,o2_10bar,o2_20bar,o2_30bar,o2_50bar,o2_80bar,o2_100bar,o2_140bar,o2_200bar,o2_ads_unit,o2_henry,o2_henry_unit
0,20610N2.cif,0.267540,1.287679,2.479882,4.707451,6.756054,10.142231,14.573220,16.697586,20.609347,24.944485,mol/kg,2.679060e-06,mol/kg/Pa
1,21052N2.cif,0.432725,1.839179,3.516703,6.188280,8.363036,12.067272,16.230583,18.457954,22.116277,26.012643,mol/kg,4.444560e-06,mol/kg/Pa
2,20473N2.cif,0.313438,1.507559,3.026248,5.838340,8.468914,13.304997,19.482671,23.181372,29.767765,38.090693,mol/kg,3.224810e-06,mol/kg/Pa
3,15161N2.cif,0.233824,1.120439,2.144863,3.981059,5.518666,8.050689,10.894914,12.371725,14.566810,17.023229,mol/kg,2.433280e-06,mol/kg/Pa
4,17061N2.cif,0.342248,1.499412,2.644759,4.204660,5.211985,6.443870,7.456196,7.894414,8.529768,9.187391,mol/kg,3.525770e-06,mol/kg/Pa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,08010N2.cif,0.079500,0.332880,0.529973,0.783069,0.900030,1.101567,1.286836,1.370930,1.486977,1.627038,mol/kg,8.490070e-07,mol/kg/Pa
587,20640N2.cif,0.271469,1.309162,2.455822,4.613120,6.436640,9.297867,12.314989,13.936243,16.155864,18.626698,mol/kg,2.770250e-06,mol/kg/Pa
588,16056N2.cif,0.292629,1.382149,2.633638,4.638823,6.297488,8.898399,11.800897,13.302141,15.727506,18.343249,mol/kg,3.021830e-06,mol/kg/Pa
589,21013N2.cif,0.264184,1.289830,2.538601,4.710200,6.721380,10.237045,14.263876,16.543527,20.223637,24.590429,mol/kg,2.721720e-06,mol/kg/Pa


In [44]:
df = copy.deepcopy(dataFrames["isotmt_h2"])
for gas in gases[1:]:
    df = df.merge(dataFrames[gas], on='cof', how='outer')

df

isot_o2
isot_co2
isot_ch4
isot_n2
kh_xe
kh_kr
kh_h2o
kh_h2s


Unnamed: 0,cof,h2_77K_1bar,h2_77K_5bar,h2_77K_25bar,h2_77K_50bar,h2_77K_75bar,h2_77K_100bar,h2_198K_1bar,h2_198K_5bar,h2_198K_25bar,...,n2_henry,n2_henry_unit,xe_henry,xe_henry_unit,kr_henry,kr_henry_unit,h2o_henry,h2o_henry_unit,h2s_henry,h2s_henry_unit
0,20610N2.cif,8.608329,18.943764,33.019479,42.110343,48.077294,52.526693,0.220603,1.091591,4.971186,...,2.492560e-06,mol/kg/Pa,0.000022,mol/kg/Pa,0.000006,mol/kg/Pa,3.355660e-06,mol/kg/Pa,0.000021,mol/kg/Pa
1,21052N2.cif,12.492448,24.194547,40.296970,48.517777,53.710548,56.759958,0.275615,1.299559,5.850303,...,3.663020e-06,mol/kg/Pa,0.000068,mol/kg/Pa,0.000013,mol/kg/Pa,4.110900e-05,mol/kg/Pa,0.000164,mol/kg/Pa
2,20473N2.cif,7.351015,18.898034,39.891623,56.159212,69.075600,79.111762,0.323447,1.542330,7.367576,...,3.078940e-06,mol/kg/Pa,0.000021,mol/kg/Pa,0.000007,mol/kg/Pa,2.626000e-05,mol/kg/Pa,0.000019,mol/kg/Pa
3,15161N2.cif,11.103453,19.127537,29.268909,34.150457,36.880407,38.670559,0.182272,0.874597,3.948490,...,1.709490e-06,mol/kg/Pa,0.000033,mol/kg/Pa,0.000007,mol/kg/Pa,8.488530e-06,mol/kg/Pa,0.000029,mol/kg/Pa
4,17061N2.cif,11.473854,15.031012,17.343098,18.156936,18.552978,18.818870,0.158416,0.736588,2.972030,...,2.183040e-06,mol/kg/Pa,0.000144,mol/kg/Pa,0.000017,mol/kg/Pa,9.238350e-06,mol/kg/Pa,0.000122,mol/kg/Pa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,16056N2.cif,9.601632,16.119540,25.716533,31.354143,34.988888,37.406117,0.198080,0.947045,4.090201,...,2.187860e-06,mol/kg/Pa,0.000049,mol/kg/Pa,0.000009,mol/kg/Pa,6.895100e-06,mol/kg/Pa,0.000045,mol/kg/Pa
589,21013N2.cif,9.128204,18.733539,32.621997,42.024845,47.935489,52.415050,0.218663,1.109665,4.897949,...,2.538220e-06,mol/kg/Pa,0.000024,mol/kg/Pa,0.000007,mol/kg/Pa,2.143000e-04,mol/kg/Pa,0.000031,mol/kg/Pa
590,19292N2.cif,12.682302,18.935154,25.033135,27.678384,29.176354,30.008674,0.212761,0.981538,4.091099,...,2.892340e-06,mol/kg/Pa,0.000189,mol/kg/Pa,0.000022,mol/kg/Pa,1.889470e-05,mol/kg/Pa,0.000215,mol/kg/Pa
591,20542N2.cif,,,,,,,,,,...,3.535510e-06,mol/kg/Pa,0.000177,mol/kg/Pa,0.000019,mol/kg/Pa,7.180830e-06,mol/kg/Pa,,


In [None]:
df.dropna().to_csv('test_data.csv', index=False)