# AtomsToGraph

> Routine to convert ase atom objects to graphs

In [None]:
#| default_exp AtomsToGraph

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *

In [None]:
#| export
from ase.io import read, write
from ase import Atoms
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import torch
import torch_geometric

## ASE File IO

Read in an extxyz using ASE's read function

In [None]:
read_atoms = read("../tests/methane.extxyz")
print(read_atoms)
print(read_atoms.get_positions())
print(read_atoms.get_chemical_symbols())

Atoms(symbols='CH4', pbc=False)
[[ 0.     0.     0.   ]
 [ 0.625  0.625  0.625]
 [-0.625 -0.625  0.625]
 [-0.625  0.625 -0.625]
 [ 0.625 -0.625 -0.625]]
['C', 'H', 'H', 'H', 'H']


In [None]:
#| hide
atom_symbols = ["C", "H", "H", "H", "H"]
coords = [[0.000, 0.000, 0.000],
          [0.625, 0.625, 0.625],
          [-0.625, -0.625, 0.625],
          [-0.625, 0.625, -0.625],
          [0.625, -0.625, -0.625]]
atoms = Atoms(atom_symbols, coords)
test_eq(read_atoms.get_positions(), atoms.get_positions())
test_eq(read_atoms.get_chemical_symbols(), atoms.get_chemical_symbols())
print(atoms.get_all_distances())

[[0.         1.08253175 1.08253175 1.08253175 1.08253175]
 [1.08253175 0.         1.76776695 1.76776695 1.76776695]
 [1.08253175 1.76776695 0.         1.76776695 1.76776695]
 [1.08253175 1.76776695 1.76776695 0.         1.76776695]
 [1.08253175 1.76776695 1.76776695 1.76776695 0.        ]]


In [None]:
#| export
def generate_onehot_features(symbols):
    """Generate one-hot features for a list of atom symbols.

    Args:
        symbols (list): List of atom symbols.

    Returns:
        numpy.ndarray: One-hot features.
    """
    symbols = np.array(symbols).reshape(-1, 1) # Convert List to 2D array with N rows and 1 column
    enc = OneHotEncoder(sparse_output=False)
    enc.fit(symbols) # Creates one-hot encoder with N rows and M unique elments for the columns
    return enc.transform(symbols)

## One Hot Encoding

Instead of atom symbols, we use one hot encoding to discretize atom types

In [None]:
generate_onehot_features(read_atoms.get_chemical_symbols())

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [None]:
#| hide
methane_onehot = np.array([[1., 0.], [0., 1.], [0., 1.], [0., 1.], [0., 1.]])
test_eq(generate_onehot_features(atom_symbols), methane_onehot)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()