In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
!ls
%cd gdrive/MyDrive/Colab\ Notebooks
!ls

gdrive	sample_data
/content/gdrive/MyDrive/Colab Notebooks
 Autoencoder.ipynb	      sgd_stability_files
'Boosting experiments'	      Sounak-Paul-FinalProject.ipynb
 Cryo-EM		      Untitled0.ipynb
 FundamentalsOfDL	      Variational_Autoencoder_detailed.ipynb
 GNNs			      Variational_Autoencoder.ipynb
 Good_ResNet_Tutorial.ipynb


In [6]:
# Only do this once
!git clone https://github.com/atomistic-machine-learning/schnetpack.git

Cloning into 'schnetpack'...
remote: Enumerating objects: 10493, done.[K
remote: Counting objects: 100% (1551/1551), done.[K
remote: Compressing objects: 100% (421/421), done.[K
remote: Total 10493 (delta 1232), reused 1246 (delta 1130), pack-reused 8942[K
Receiving objects: 100% (10493/10493), 29.88 MiB | 9.82 MiB/s, done.
Resolving deltas: 100% (6875/6875), done.
Checking out files: 100% (177/177), done.


In [8]:
%cd schnetpack/
!pip install -r requirements.txt
!pip install .

/content/gdrive/MyDrive/Colab Notebooks/schnetpack
Looking in indexes: https://pypi.python.org/simple/
Collecting ase>=3.19
  Downloading ase-3.22.1-py3-none-any.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 12.7 MB/s 
[?25hCollecting tensorboardX
  Downloading tensorboardX-2.5-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 52.1 MB/s 
Collecting pytest-datadir
  Downloading pytest_datadir-1.3.1-py2.py3-none-any.whl (5.9 kB)
Collecting pre-commit
  Downloading pre_commit-2.17.0-py2.py3-none-any.whl (195 kB)
[K     |████████████████████████████████| 195 kB 50.4 MB/s 
[?25hCollecting black
  Downloading black-22.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 41.2 MB/s 
Collecting cfgv>=2.0.0
  Downloading cfgv-3.3.1-py2.py3-none-any.whl (7.3 kB)
Collecting identify>=1.0.0
  Downloading identify-2.4.12-py2.py3-none-any.whl (98 kB)
[K     |█████████████████████████

In [None]:
# Testing if everything was loaded properly
!pytest

In [10]:
import schnetpack as spk
import os

forcetut = './forcetut'
if not os.path.exists(forcetut):
    os.makedirs(forcetut)

In [11]:
from schnetpack.datasets import MD17

# The following code is taking a lot of time, so skip it please
ethanol_data = MD17(os.path.join(forcetut,'ethanol.db'), molecule='ethanol')
atoms, properties = ethanol_data.get_properties(0)
print('Loaded properties:\n', *['{:s}\n'.format(i) for i in properties.keys()])

KeyboardInterrupt: ignored

In [12]:
if not os.path.exists('./ethanol_dft.zip'):
    !wget http://quantum-machine.org/gdml/data/xyz/ethanol_dft.zip

if not os.path.exists('./ethanol.xyz'):
    !unzip ./ethanol_dft.zip

# The following code is taking a lot of time, so skip it please
#!spk_parse.py ./ethanol.xyz ./ethanol.db --atomic_properties Properties=species:S:1:pos:R:3:forces:R:3 --molecular_properties energy

--2022-03-18 11:26:25--  http://quantum-machine.org/gdml/data/xyz/ethanol_dft.zip
Resolving quantum-machine.org (quantum-machine.org)... 130.149.80.145
Connecting to quantum-machine.org (quantum-machine.org)|130.149.80.145|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 194215234 (185M) [application/zip]
Saving to: ‘ethanol_dft.zip’


2022-03-18 11:26:29 (44.7 MB/s) - ‘ethanol_dft.zip’ saved [194215234/194215234]

Archive:  ./ethanol_dft.zip
  inflating: ethanol.xyz             


In [22]:
from schnetpack import AtomsData
from ase.io import read
import numpy as np

# load atoms from xyz file. Here, we only parse the first 10 molecules
atoms = read('./ethanol.xyz', index=':1000')

# comment line is weirdly stored in the info dictionary as key by ASE. here it corresponds to the energy
print('Energy:', atoms[0].info)
print()

# parse properties as list of dictionaries
property_list = []
for at in atoms:
    # All properties need to be stored as numpy arrays.
    # Note: The shape for scalars should be (1,), not ()
    # Note: GPUs work best with float32 data
    energy = np.array([float(list(at.info.keys())[0])], dtype=np.float32)
    property_list.append(
        {'energy': energy}
    )

print('Properties:', property_list[:5])

Energy: {'-97208.40600498248': True}

Properties: [{'energy': array([-97208.41], dtype=float32)}, {'energy': array([-97208.375], dtype=float32)}, {'energy': array([-97208.04], dtype=float32)}, {'energy': array([-97207.5], dtype=float32)}, {'energy': array([-97206.84], dtype=float32)}]


In [25]:
%rm './new_dataset.db'
new_dataset = AtomsData('./new_dataset.db', available_properties=['energy'])
new_dataset.add_systems(atoms, property_list)

print('Number of reference calculations:', len(new_dataset))
print('Available properties:')

for p in new_dataset.available_properties:
    print('-', p)
print()

example = new_dataset[0]
print('Properties of molecule with id 0:')

for k, v in example.items():
    print('-', k, ':', v.shape)

Number of reference calculations: 1000
Available properties:
- energy

Properties of molecule with id 0:
- energy : torch.Size([1])
- _atomic_numbers : torch.Size([9])
- _positions : torch.Size([9, 3])
- _neighbors : torch.Size([9, 8])
- _cell : torch.Size([3, 3])
- _cell_offset : torch.Size([9, 8, 3])
- _idx : torch.Size([1])


In [39]:
from ase.visualize import view
atoms, properties = new_dataset.get_properties(10)
#print('Loaded properties:\n', *['{:s}\n'.format(i) for i in properties.keys()])
view(atoms, viewer='x3d')

In [36]:
for k, v in new_dataset[1].items():
    print('-', k, ':', v)

- energy : tensor([-97208.3750])
- _atomic_numbers : tensor([6, 6, 8, 1, 1, 1, 1, 1, 1])
- _positions : tensor([[ 1.0958e-02, -5.6557e-01,  6.0480e-06],
        [-1.2826e+00,  2.4957e-01,  1.4012e-03],
        [ 1.1295e+00,  3.1435e-01, -8.7197e-04],
        [ 4.4488e-02, -1.2016e+00,  8.8911e-01],
        [ 7.6492e-03, -1.1856e+00, -8.8519e-01],
        [-1.3380e+00,  8.6895e-01,  8.8439e-01],
        [-1.3187e+00,  8.7079e-01, -8.9766e-01],
        [-2.1451e+00, -4.2465e-01, -5.4017e-03],
        [ 1.9757e+00, -1.5174e-01,  1.1824e-02]])
- _neighbors : tensor([[1, 2, 3, 4, 5, 6, 7, 8],
        [0, 2, 3, 4, 5, 6, 7, 8],
        [0, 1, 3, 4, 5, 6, 7, 8],
        [0, 1, 2, 4, 5, 6, 7, 8],
        [0, 1, 2, 3, 5, 6, 7, 8],
        [0, 1, 2, 3, 4, 6, 7, 8],
        [0, 1, 2, 3, 4, 5, 7, 8],
        [0, 1, 2, 3, 4, 5, 6, 8],
        [0, 1, 2, 3, 4, 5, 6, 7]])
- _cell : tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
- _cell_offset : tensor([[[0., 0., 0.],
         [0., 0