# Imports and Settings

In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import os
from scipy.spatial.distance import squareform, cdist
from scipy.spatial import distance_matrix
import time
from numpy.testing import *
from ase.db import connect

In [2]:
train_db = "./Dataset/iso17/reference.db"
within_db = "./Dataset/iso17/test_within.db"
other_db = "./Dataset/iso17/test_other.db"

# Transformation Function

In [3]:
def db_to_numpy(path_to_db):
    molecules = []
    energies = []
    #forces = []
    with connect(path_to_db) as conn:
        for row in conn.select():
            positions = row['positions']
            numbers = row['numbers'][:, np.newaxis]
            #forces.append(row.data['atomic_forces'])
            energies.append(row['total_energy'])
            dist_matrix = distance_matrix(positions, positions)
            # drop diag
            dist_matrix = dist_matrix[~np.eye(dist_matrix.shape[0],dtype=bool)].reshape(dist_matrix.shape[0],-1)
            molecules.append(np.hstack((dist_matrix, numbers)))
    return np.array(molecules), np.array(energies)

# Perform Transformation

In [4]:
# train
molecules, energies = db_to_numpy(train_db)
data_path = './Dataset/iso17_train_X'
label_path = './Dataset/iso17_train_Y'
np.save(data_path, molecules)
np.save(label_path, energies)

# within
molecules, energies = db_to_numpy(within_db)
data_path = './Dataset/iso17_within_X'
label_path = './Dataset/iso17_within_Y'
np.save(data_path, molecules)
np.save(label_path, energies)

#other
molecules, energies = db_to_numpy(other_db)
data_path = './Dataset/iso17_other_X'
label_path = './Dataset/iso17_other_Y'
np.save(data_path, molecules)
np.save(label_path, energies)