# Usage of ABCD database with extra information

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from abcd import ABCD
from abcd.backends.atoms_properties import Properties

First of all, we need to define the url of the database. It could be local or remote:

- direct access: url = 'opensearch://admin:admin@localhost:9200'
- api access: url = 'http://localhost/api'

using with statement to catch the riased exceptions. You may can ignore them  but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )

In [3]:
url = 'opensearch://admin:admin@localhost:9200'
abcd =  ABCD.from_url(url)

print(abcd)

OpenSearchDatabase(url=localhost:9200, index=atoms) 


In [4]:
abcd.print_info()

      type: opensearch
      host: localhost
      port: 9200
        db: abcd
     index: atoms
number of confs: 0
      type: opensearch


## Cleanup 

WARNING!! Remove all elements from the database.
Only supported in the case of local access

In [5]:
abcd.destroy()

In [6]:
abcd.create()

In [7]:
abcd.print_info()

      type: opensearch
      host: localhost
      port: 9200
        db: abcd
     index: atoms
number of confs: 0
      type: opensearch


## Uploading configurations

In [8]:
from ase.io import iread, read
!pwd

/home/ubuntu/abcd/tutorials


Data can be entered into the database as ASE Atoms objects, allowing any format readable by ase.io.read to be used.

In [9]:
directory = Path('/home/ubuntu/data/')
file = directory / 'input.data.2055.xyz'

In [10]:
%%time
with abcd as db:
    for atoms in iread(file.as_posix(), index=slice(None)):
        db.push(atoms, store_calc=False)

CPU times: user 15 s, sys: 181 ms, total: 15.2 s
Wall time: 25 s


Extra information can be added manually via a dictionary, or read in through a csv/Excel file. A template for the structures corresponding to each row in the data file, and units in the form of `field (unit)` or `field / unit`, can also be inferred.

In [11]:
directory = Path('/home/ubuntu/data/')
data_file = directory / 'DATA_copy.csv'
struct_file_template = str(directory) + "/{struct_name}_FSR-out.cif"

In [12]:
properties = Properties(
    data_file=data_file,
    store_struct_file=True,
    struct_file_template=struct_file_template,
    struct_name_label = "MOF_name",
    infer_units=True,
    # units={"Density": "g/cm3"}
)

Using the inferred structure file, this data can then be uploaded together. The `extra_info`, and properties in general, do not need to match that of existing documents stored.

In [13]:
for i, data in enumerate(properties.to_list()):
    if data['MOF_name'] == "EWIKAX03":
        atoms = read(properties.struct_files[i])
        with abcd as db:
            db.push(
                atoms,
                store_calc=False,
                extra_info=data,
            )

In [14]:
data["units"]

{'Dos at Fermi energy': 'eln/cell',
 'Dos at VBM': 'eln/cell',
 'Dos at CBM': 'eln/cell',
 'Density': 'g/cm3',
 'Accessible Surface Area': 'm2/g'}

In [15]:
abcd.print_info()

      type: opensearch
      host: localhost
      port: 9200
        db: abcd
     index: atoms
number of confs: 2056
      type: opensearch


In [16]:
print(atoms.info)

{'spacegroup': Spacegroup(1, setting=1), 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'

In [17]:
print(abcd.properties())

{'info': ['1aromatico-up', '2D', '2aromatici-up', '5-m-rings', '5m-ring-leg2met', '6m-rings', 'Accessible Surface Area', 'Band_gap', 'CN-M', 'COOM', 'Cell volume', 'Crit: metal', 'Crit: pi-pi stacking', 'Crit: redox active linker', 'Crit: redox match', 'Criteria#', 'Density', 'Dos at CBM', 'Dos at Fermi energy', 'Dos at VBM', 'HSE band gap', 'LCD', 'M-C-C-TRIANG', 'M-H2O-M', 'M-N-NM-N-M', 'M-h2o', 'MOF_name', 'Metal', 'Metal density', 'Metals number', 'Multiplier_Sum', 'N3--NCN up', 'PLD', 'Space_group', 'Space_group#', 'Temp', 'Volume Fraction', 'Year', 'Zprime', 'benzene', 'cell', 'energy', 'formula', 'metal-N', 'metal-O', 'metal-S', 'metal-halogen', 'n_atoms', 'occupancy', 'pbc', 'pyridine', 'pyrimidine', 'spacegroup', 'unit_cell', 'units', 'volume', 'without ions'], 'derived': ['elements', 'hash', 'hash_structure', 'modified', 'uploaded', 'username', 'volume'], 'arrays': ['forces', 'numbers', 'positions', 'spacegroup_kinds']}


In [18]:
# print(abcd.property("6m-rings"))
print(abcd.count_property("Dos at Fermi energy"))

{29.868799209594727: 1}


In [19]:
query = 'n_atoms: 140'
print(len(list(abcd.get_items(query))))
print(list(abcd.get_items(query))[0])

1
{'_id': 'J4TzUYoBQvF7oZdWKd8C', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.787902740200000

In [20]:
query = 'Accessible  Surface*'
print(len(list(abcd.get_items(query))))

1


In [21]:
query = 'Year: [2006 TO 2009]'
print(len(list(abcd.get_items(query))))

1


In [22]:
query = '*ubuntu'
print(len(list(abcd.get_items(query))))

2056


In [23]:
query = 'username:[ubunta TO ubuntx] AND formula:?48H28O32Zr6'
print(len(list(abcd.get_items(query))))

316


In [24]:
query = 'MOF_name: *'
print(len(list(abcd.get_items(query))))
print(list(abcd.get_items(query))[0])
print(list(abcd.get_items(query))[0].keys())

1
{'_id': 'J4TzUYoBQvF7oZdWKd8C', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.787902740200000

In [25]:
query = 'MOF_name: *'
abcd.add_property(
    data={"example_property": "example_value"},
    query=query
)

In [28]:
abcd.refresh()
abcd.property("example_property", query)

['example_value']

In [29]:
abcd.rename_property(
    name="example_property",
    new_name="renamed_property",
    query=query
)

In [30]:
abcd.refresh()
abcd.property("renamed_property", query)

['example_value']

In [31]:
abcd.delete_property(
    name="renamed_property",
    query=query
)

In [32]:
abcd.refresh()
# abcd.property("example_property", query)
abcd.property("renamed_property", query)

[]