In [3]:
import os
import os.path as path
import csv
import numpy as np

In [4]:
dataset_path = r"./dat/data-set-2016-TiO2/"

In [9]:
def check_periodics():
    count = 0

    # ripped and slightly modified from xsf_clean.py
    for s_no in range(1, 7816):
        file = path.join(
            dataset_path,
            f"structure{str(s_no).zfill(4)}.xsf"
        )
    
        with open(file, "r") as f:
            lines = f.readlines()
            if "PRIMVEC\n" in lines:
                count += 1

    print(f"non-periodic file count: {count}")

check_periodics()

non-periodic file count: 7815


In [5]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# suppress tensorflow warnings
import tensorflow as tf
import keras

from keras.models import Sequential 
from keras import Input
from keras.layers import Dense, \
                         Dropout

In [6]:
class BPNN(keras.Model):
    """
    The Model discussed in the 2007 paper by
    Behler and Parinello.

    This class contains an adaptive model which is
    the subnet for one atom.

    The subnet takes all the atom coordinates,
    and outputs the atomic contribution of energy
    """
    def __init__(self, layers: list, num_syms = 14):
        super().__init__()

        # properties of the subnet
        self.subnet   = Sequential([
            Input(shape = (num_syms,)),
            *layers
        ])
        self.num_syms = num_syms

    def call(self, inputs, training = False):
        """
        Feed-forward algorithm for the model.
        split each row and feed to the subnet
        for each atom and applies to the same layers.
        """
        # splits the rows
        syms = tf.unstack(inputs, axis = 1)

        sym_e_contribution = []
        for sym in syms:
            # feed to subnet
            subnet_out = self.subnet(sym, training = training)
            sym_e_contribution.append(subnet_out)

        # turns the list into another tensor
        sym_preproc = tf.stack(sym_e_contribution, axis = 1)

        # add all the values in the tensor
        return tf.reduce_sum(sym_preproc)

In [7]:
import pandas as pd
from sklearn.utils import shuffle

N_SYSTEMS = 61

# read all systems
X = np.array(
    [
        pd.read_csv(
            f"./dat/symmetries/sym{str(idx).zfill(4)}.csv"
        ).to_numpy()
        for idx in range(1, N_SYSTEMS + 1)
    ]
)

# read energy file
y = pd.read_csv(
    "./dat/energies.csv"
)[0:N_SYSTEMS]

In [6]:
N_atoms = np.array([np.shape(x)[0] for x in X])
np.unique(N_atoms)

array([95])

In [8]:
print(np.shape(X))
X, y = shuffle(X, y)

lo = int(0.6 * N_SYSTEMS)
hi = int(0.8 * N_SYSTEMS)

X_train = X[:lo]
X_val   = X[lo:hi]
X_test  = X[hi:]

y_train = y[:lo]
y_val   = y[lo:hi]
y_test  = y[hi:]

(61, 95, 14)


In [26]:
model = BPNN([
        Dense(20, activation = "relu"),
        Dense(20, activation = "relu")
    ])

model.compile(
    optimizer = keras.optimizers.RMSprop(),
    loss = keras.losses.MeanAbsoluteError(),
    metrics = [keras.metrics.RootMeanSquaredError()]
)

AttributeError: module 'keras.losses' has no attribute 'RootMeanSquaredError'

In [25]:
n_epochs = 60
batch_sz = 2

bpnn = model.fit(
    X_train, y_train,
    batch_size = batch_sz,
    epochs = n_epochs,
    validation_data = (X_val, y_val),
    verbose = 0
)

In [24]:
import matplotlib.pyplot as plt

xax = np.arange(1, n_epochs + 1)

fig, ax = plt.subplots(nrows= 1, ncols=1)
ax.plot(bpnn.history["root_mean_squared_error"])
ax.plot(bpnn.history["val_root_mean_squared_error"])

ax.plot(bpnn.history["loss"])
ax.plot(bpnn.history["val_loss"])

ax.set_yscale("log")
ax.set_xscale("log")
plt.show()


NameError: name 'n_epochs' is not defined

In [1]:
from SymmetryCalculator import SymmetryCalculator

In [2]:
sym_calc = SymmetryCalculator()
sym_calc.write_symmetries(r"./dat/symmetries/", 1)