# Smooth Overlap of Atomic Positions (SOAP) Descriptors
Prominant 3D representations for molecules include the [Coulomb matrix](./Coulomb-Matrices.ipynb), Smooth Overlap of Atomic Positions (SOAP), Atom-Centered Symmetry Functions (ACSF), the atomic cluster expansion (ACE), atomic features built by the hierarchically interacting particle neural network (HIP-NN), and the N-body iterative contraction of equivariants (NICE).

Here, we demonstrate how to generate SOAP descriptors for molecules.

We will use the same kernel as in the [Fingerprints and SMILES](./1-Fingerprints-and-SMILES.ipynb) notebook, and the QM7 dataset.

## Loading the data
This particular notebook uses `asaplib`, a library containing "Automatic Selection And Prediction" tools for materials and molecules. It provides tools for analyzing and visualizing atomic simulation data.

In [None]:
from asaplib.data import ASAPXYZ

# Import QM7 coordinates using `asaplib`
asapxyz = ASAPXYZ('../data/qm7.xyz', periodic=False)

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from asaplib.hypers import universal_soap_hyper

global_species = asapxyz.get_global_species()

universal_soap = 'minimal'
soap_spec = universal_soap_hyper(global_species, universal_soap, dump=True)

soap_spec

for k in soap_spec.keys():
    soap_spec[k]['rbf'] = 'gto'
    soap_spec[k]['crossover'] = False

In [None]:
print(global_species)

In [None]:
# specify the parameters

reducer_spec = {'reducer1': {
                          'reducer_type': 'average', # [average], [sum], [moment_average], [moment_sum]
                          'element_wise': False}
               }

desc_spec = {'avgsoap': {
                  'atomic_descriptor': soap_spec,
                  'reducer_function': reducer_spec}
            }


In [None]:
# compute descriptors for the whole structures
asapxyz.compute_global_descriptors(desc_spec_dict=desc_spec,
                                    sbs=[],
                                    keep_atomic=False, # set to True to keep the atomic descriptors
                                    tag='qm7',
                                    n_process=4)

In [None]:
X_train = asapxyz.fetch_computed_descriptors(['avgsoap'])

In [None]:
np.shape(X_train)

In [None]:
fy = 'atomization_energy'
y_train = asapxyz.get_property(fy) #, extensive = 'True')

In [None]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

X_train = torch.from_numpy(X_train).float()

# On some computers the explicit cast to .float() is
# necessary
y_train = torch.from_numpy(y_train).float()

dataset = TensorDataset(X_train, y_train)

train_ds, val_ds = random_split(dataset, [0.9, 0.1])

batch_size = 1
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size, shuffle=True)

In [None]:
class NNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = nn.Sigmoid()(x)
        x = self.layer2(x)
        return x

input_size = X_train.shape[1]
hidden_size = 8
output_size = 1

nnmodel = NNModel(input_size, hidden_size, output_size)

In [None]:
nnmodel

In [None]:
learning_rate = 0.00001

loss_fn = nn.MSELoss(reduction='mean')

optimizer = torch.optim.Adam(nnmodel.parameters(), lr=learning_rate)

num_epochs = 100
log_epochs = 1

for epoch in range(num_epochs):
    for x_batch, y_batch in train_loader:
        # 1. Generate predictions
        pred = nnmodel(x_batch)[:, 0]

        # 2. Calculate loss
        loss = loss_fn(pred, y_batch)

        # 3. Compute gradients
        loss.backward()

        # 4. Update parameters using gradients
        optimizer.step()

        # 5. Reset the gradients to zero
        optimizer.zero_grad()

    if epoch % log_epochs==0:
        print(f'Epoch {epoch}  Loss {loss.item():.4e}')

In [None]:
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1)
ax.plot(loss_hist, lw=3)
ax.set_title('Training loss', size=15)
ax.set_xlabel('Epoch', size=15)
ax.tick_params(axis='both', which='major', labelsize=15)

plt.tight_layout()

plt.show()

## Tips💡

### TODO
