Skip to content

Commit

Permalink
Addition of to_pandas() method for raw ASE data and general
Browse files Browse the repository at this point in the history
improvements.

- Improved documentation about visualizing with plotly.
- Bumped version.
- ml4chem.data.utils:
    * Addition of `ase_to_xyz()` function.
- data.handler:
    * Addition of `to_pandas()` method.
    * Improved documentation.
  • Loading branch information
muammar committed Jan 12, 2020
1 parent a041d4c commit 6e73579
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 12 deletions.
6 changes: 5 additions & 1 deletion docs/source/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Introduction
==============
Data is central in Machine Learning and ML4Chem provides some tools to
prepare your Datas. We support the following:
prepare your Datas. We support the following input formats:

1. `Atomic Simulation Environment (ASE) <https://wiki.fysik.dtu.dk/ase/>`_.

Expand Down Expand Up @@ -62,3 +62,7 @@ created.
.. raw:: html
:file: _static/pca_visual.html

To activate plotly in Jupyter or JupyterLab follow the instructions shown in
`https://plot.ly/python/getting-started/#jupyter-notebook-support <https://plot.ly/python/getting-started/#jupyter-notebook-support>`_


2 changes: 1 addition & 1 deletion ml4chem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


__all__ = ["Potentials"]
__version__ = "0.0.5"
__version__ = "0.0.6-dev"
33 changes: 24 additions & 9 deletions ml4chem/data/handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from collections import OrderedDict
from ml4chem.data.utils import ase_to_xyz
from ml4chem.utils import get_hash
import datetime
import logging
import pandas as pd

logger = logging.getLogger()

Expand Down Expand Up @@ -51,13 +53,6 @@ def prepare_images(self, images, purpose=None):
The purpose of the data so that structure is prepared accordingly.
Supported are: 'training', 'inference'
Returns
-------
self.images : dict
Ordered dictionary of images corresponding to order of self.targets
list.
self.targets : list
Targets used for training the model.
"""
logger.info("Preparing images for {}...".format(purpose))
self.images = OrderedDict()
Expand Down Expand Up @@ -171,12 +166,21 @@ def get_unique_element_symbols(self, images=None, purpose=None):
return self.unique_element_symbols

def get_data(self, purpose=None):
"""
"""A method to get data
Parameters
----------
purpose : str
The purpose of the data so that structure is prepared accordingly.
Supported are: 'training', 'inference'
Returns
-------
self.images : dict
Ordered dictionary of images corresponding to order of self.targets
list.
self.targets : list
Targets used for training the model.
"""

if purpose == "training":
Expand All @@ -185,7 +189,18 @@ def get_data(self, purpose=None):
return self.images

def get_total_number_atoms(self):
"""Get the total number of atoms"""
return sum(self.atoms_per_image)

def to_pandas(self):
raise NotImplementedError
"""Convert data to pandas DataFrame"""
images = OrderedDict()
columns = ["xyz"]

for key, atoms in self.images.items():
images[key] = ase_to_xyz(atoms, file=False)

df = pd.DataFrame.from_dict(images, orient="index", columns=columns)
df["energy"] = self.targets

return df
22 changes: 21 additions & 1 deletion ml4chem/data/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from ase.io import Trajectory
from io import StringIO
import random


def split_Data(
def split_data(
images,
training_name="training_images.traj",
test_name="test_images.traj",
Expand Down Expand Up @@ -64,3 +65,22 @@ def split_Data(

log.write(str(test_images))
log.close()


def ase_to_xyz(atoms, comment="", file=True):
"""Convert ASE to xyz
This function is useful to save xyz to DataFrame.
"""
xyz = StringIO()
symbols = atoms.get_chemical_symbols()
natoms = len(symbols)
xyz.write("%d\n%s\n" % (natoms, comment))

for s, (x, y, z) in zip(symbols, atoms.positions):
xyz.write("%-2s %22.15f %22.15f %22.15f\n" % (s, x, y, z))

if file:
return xyz
else:
return xyz.getvalue()

0 comments on commit 6e73579

Please sign in to comment.