# Basic usage of ABCD database

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from ase.io import iread, read

from abcd import ABCD

First of all, we need to define the url of the database. It could be local or remote:

- direct access: url = 'opensearch://admin:admin@localhost:9200'
- api access: url = 'http://localhost/api'

using with statement to catch the riased exceptions. You may can ignore them  but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )

In [3]:
url = 'opensearch://admin:admin@localhost:9200'
abcd =  ABCD.from_url(url)

print(abcd)

OpenSearchDatabase(url=localhost:9200, index=atoms) 


In [4]:
abcd.print_info()

      type: opensearch
      host: localhost
      port: 9200
     index: atoms
number of confs: 82204
      type: opensearch


## Cleanup 

WARNING!! Remove all elements from the database.
Only supported in the case of local access

In [11]:
abcd.delete()

In [12]:
abcd.create()

In [13]:
abcd.print_info()

      type: opensearch
      host: localhost
      port: 9200
     index: atoms
number of confs: 0
      type: opensearch


## Uploading configurations

In [14]:
!pwd

/home/ubuntu/abcd/tutorials


In [15]:
directory = Path('/home/ubuntu/data/')
file = directory / 'input.data.2055.xyz'

Uploading configurations one-by-one directly from an ase atoms object:

In [16]:
%%time
with abcd as db:
    for atoms in iread(file.as_posix(), index=slice(None)):
        
        # Hack to fix the representation of forces
        
#         atoms.calc.results['forces'] = atoms.arrays['force']
#         del(atoms.info['energy'])
            
        db.push(atoms, store_calc=False)
        # break

CPU times: user 15.3 s, sys: 162 ms, total: 15.5 s
Wall time: 23.7 s


In [8]:
%%time
with abcd as db:

    atoms_list = []
    for i in range(32):
        for atoms in iread(file.as_posix(), index=slice(None)):
            atoms_list.append(atoms)
    db.push(atoms_list, store_calc=False)

CPU times: user 7min 27s, sys: 6.73 s, total: 7min 33s
Wall time: 9min 28s


In [17]:
abcd.print_info()

      type: opensearch
      host: localhost
      port: 9200
     index: atoms
number of confs: 2055
      type: opensearch


In [18]:
atoms.info

{'energy': -55970.4941491558}

Reading the trajectory from file:

In [19]:
%%time
traj = read(file.as_posix(), index=slice(None))
len(traj)

CPU times: user 2.7 s, sys: 60.1 ms, total: 2.76 s
Wall time: 2.76 s


2055

Pushing the whole trajectory to the database:

In [20]:
traj[0]

Atoms(symbols='C48H28O32Zr6', pbc=True, cell=[[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], forces=..., calculator=SinglePointCalculator(...))

In [22]:
%%time
db.push(traj, store_calc=False)

CPU times: user 12.5 s, sys: 116 ms, total: 12.6 s
Wall time: 16.1 s


Uploading a whole file and injecting to the database on the server side:

In [23]:
%%time
abcd.upload(file.as_posix(), store_calc=False)

CPU times: user 15.2 s, sys: 172 ms, total: 15.4 s
Wall time: 19 s


In [24]:
abcd.info()

{'host': 'localhost',
 'port': 9200,
 'index': 'atoms',
 'number of confs': 8220,
 'type': 'opensearch'}

In [26]:
abcd.count_properties()

{'cell': {'count': 8220, 'category': 'info', 'dtype': 'array(float)'},
 'elements': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(dict)'},
 'energy': {'count': 8220, 'category': 'info', 'dtype': 'scalar(float)'},
 'filename': {'count': 2055, 'category': 'info', 'dtype': 'scalar(str)'},
 'forces': {'count': 8220,
  'category': 'arrays',
  'dtype': 'array(float, N x 3)'},
 'formula': {'count': 8220, 'category': 'info', 'dtype': 'scalar(str)'},
 'hash': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},
 'hash_structure': {'count': 8220,
  'category': 'derived',
  'dtype': 'scalar(str)'},
 'modified': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},
 'n_atoms': {'count': 8220, 'category': 'info', 'dtype': 'scalar(int)'},
 'numbers': {'count': 8220, 'category': 'arrays', 'dtype': 'vector(int, N)'},
 'pbc': {'count': 8220, 'category': 'info', 'dtype': 'vector(bool)'},
 'positions': {'count': 8220,
  'category': 'arrays',
  'dtype': 'array(float, N x 3)'