In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# matplotlib.use("Agg")

from ase import Atoms
from ase.build import bulk
from ase.io import read
from agox.databases import Database
from agox.environments import Environment
from agox.utils.graph_sorting import Analysis

import numpy as np
from sklearn.decomposition import PCA
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

In [None]:
## Set up the plotting environment
# matplotlib.rcParams.update(matplotlib.rcParamsDefault)
plt.rc('text', usetex=True)
plt.rc('font', family='cmr10', size=12)
plt.rcParams["axes.formatter.use_mathtext"] = True

In [None]:
## Set the plotting parameters
seed = 0
identifier = "4"
# min_energy = -9.064090728759766

In [None]:
## Set the descriptors
from agox.models.descriptors import SOAP
local_descriptor = local_descriptor = SOAP.from_species(["C"], r_cut=5.0)

In [None]:
## Set the calculators
from chgnet.model import CHGNetCalculator
from ase.calculators.singlepoint import SinglePointCalculator
calc = CHGNetCalculator()

In [None]:
## Load the unrelaxed structures
unrlxd_structures = read("DOutput"+identifier+"/unrlxd_structures_seed"+str(seed)+".traj", index=":")
for structure in unrlxd_structures:
  structure.calc = calc

In [None]:
## Load the relaxed structures
rlxd_structures = read("DOutput"+identifier+"/rlxd_structures_seed"+str(seed)+".traj", index=":")
for structure in rlxd_structures:
  structure.calc = calc

In [None]:
# read energies from energies_unrlxd_seed0.txt and add to the respective structures using a SinglePointCalculator
# the file has the form "index energy"
filename = "DOutput"+identifier+"/energies_unrlxd_seed"+str(seed)+".txt"
with open(filename) as f:
    for line in f:
        index, energy = line.split()
        index = int(index)
        energy = float(energy)
        unrlxd_structures[index].calc = SinglePointCalculator(unrlxd_structures[index], energy=energy * len(unrlxd_structures[index]))


filename = "DOutput"+identifier+"/energies_rlxd_seed"+str(seed)+".txt"
with open(filename) as f:
    for line in f:
        index, energy = line.split()
        index = int(index)
        energy = float(energy)
        rlxd_structures[index].calc = SinglePointCalculator(rlxd_structures[index], energy=energy * len(rlxd_structures[index]))

In [None]:
diamond = bulk("C", "diamond", a=3.567)  # Lattice constant for diamond cubic carbon
diamond.calc = calc
diamond_energy = diamond.get_potential_energy()
diamond_energy_per_atom = diamond_energy / len(diamond)

graphite = read("graphite.vasp")
graphite.calc = calc
graphite_energy = graphite.get_potential_energy()
graphite_energy_per_atom = graphite_energy / len(graphite)

In [None]:
# Calculate energies per atom for the relaxed structures
energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in rlxd_structures]
min_energy = np.min(energies_per_atom)
rlxd_delta_en_per_atom = np.array(energies_per_atom) - min_energy
print("Relaxed min energy: ", np.min(energies_per_atom))

In [None]:
# Calculate energies per atom for the unrelaxed structures
energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in unrlxd_structures]
unrlxd_delta_en_per_atom = np.array(energies_per_atom) - min_energy
print("Unrelaxed min energy: ", np.min(energies_per_atom))

In [None]:
if abs( np.min(energies_per_atom) - min_energy ) > 5e-2:
  print("Minimum energy per atom is not zero. Check the energy calculation.")

In [None]:
## Set up the PCA
pca = PCA(n_components=2)

In [None]:
## Fit the PCA model to the unrelaxed or relaxed structures
rlxd_string = "rlxd"

In [None]:
## Get the 'super atom' descriptors for the unrelaxed structures
unrlxd_super_atoms = []
for structure in unrlxd_structures:
  unrlxd_super_atoms.append( np.mean(local_descriptor.get_features(structure), axis=0) )

In [None]:
## Get the 'super atom' descriptors for the relaxed structures
rlxd_super_atoms = []
for structure in rlxd_structures:
  rlxd_super_atoms.append( np.mean(local_descriptor.get_features(structure), axis=0) )

In [None]:
## Save pca model
import pickle
if True:
  pca.fit(np.squeeze([arr for arr in rlxd_super_atoms]))
  with open("pca_model_all_rlxd_"+str(seed)+".pkl", "wb") as f:
    pickle.dump(pca, f)

## Load pca model
with open("pca_model_all_"+rlxd_string+"_0.pkl", "rb") as f:
  pca = pickle.load(f)

In [None]:
# Get super atom descriptors for diamond and graphite
graphite_super_atoms = [ np.mean(local_descriptor.get_features(graphite), axis=0) ]
diamond_super_atoms = [ np.mean(local_descriptor.get_features(diamond), axis=0) ]

In [None]:
## Transform the unrelaxed and relaxed structures to the reduced space
unrlxd_X_reduced = pca.transform(np.squeeze([arr for arr in unrlxd_super_atoms]))
rlxd_X_reduced = pca.transform(np.squeeze([arr for arr in rlxd_super_atoms]))
graphite_X_reduced = pca.transform([np.squeeze([graphite_super_atoms])])
diamond_X_reduced = pca.transform([np.squeeze([diamond_super_atoms])])

In [None]:
## Get the index of the structure with the minimum energy
min_energy_index = np.argmin(rlxd_delta_en_per_atom)
print(min_energy_index)

In [None]:
## Plot the PCA
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 6))

plt.subplots_adjust(wspace=0.05, hspace=0)

## Get the maximum energy for the colourbar
max_en = min(3.5, max(np.max(unrlxd_delta_en_per_atom), np.max(rlxd_delta_en_per_atom)))

## Plot the PCA
axes[0].scatter(unrlxd_X_reduced[:, 0], unrlxd_X_reduced[:, 1], c=unrlxd_delta_en_per_atom, cmap="viridis", vmin = 0, vmax = max_en)
axes[1].scatter(rlxd_X_reduced[:, 0], rlxd_X_reduced[:, 1], c=rlxd_delta_en_per_atom, cmap="viridis", vmin = 0, vmax = max_en)

## Add the minimum energy structures to the plot
for ax in axes:
    ax.scatter(diamond_X_reduced[0,0], diamond_X_reduced[0,1], s=200, edgecolor=[1.0, 0.0, 0.0, 0.5], facecolor='none', linewidth=2, label='diamond')
    ax.scatter(graphite_X_reduced[0,0], graphite_X_reduced[0,1], s=200, edgecolor=[1.0, 0.0, 0.0, 1.0], facecolor='none', linewidth=2, label='graphite')
    if ax == axes[1]:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles[::-1], labels[::-1], facecolor='white', framealpha=1.0, edgecolor='black', fancybox=False, loc='upper right', fontsize=20, handletextpad=0.1)

## Add labels
fig.text(0.5, 0.0, 'Principal component 1', ha='center', fontsize=20)
axes[0].set_ylabel('Principal component 2', fontsize=20)
axes[0].set_title('Unrelaxed', fontsize=20)
axes[1].set_title('Relaxed', fontsize=20)
if rlxd_string == "rlxd":
    xlims = [-310, 310]
    ylims = [-53, 53]
else:
    xlims = [-5, 13]
    ylims = [-6.5, 13]

for ax in axes:
    ax.tick_params(axis='both', direction='in', length=6, labelsize=20)
    # ax.yaxis.set_major_locator(MultipleLocator(3))
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))
    ax.tick_params(axis='both', which='minor', length=3, direction='in')
    ax.set_xlim(xlims)
    ax.set_ylim(ylims)

## Unify tick labels
xticks = axes[0].get_xticks()
xticks = xticks[(xticks >= xlims[0]) & (xticks <= xlims[1])]

axes[1].set_xticks(xticks)
axes[1].set_yticklabels([])
axes[0].tick_params(axis='x', labelbottom=True, top=True)
axes[1].tick_params(axis='x', labelbottom=True, top=True)
axes[0].tick_params(axis='y', labelbottom=True, right=True)
axes[1].tick_params(axis='y', labelbottom=True, right=True)

## Make axes[0] and axes[1] the same width
axes[0].set_box_aspect(1.7)
axes[1].set_box_aspect(1.7)

## Add colorbar next to the axes
cbar = fig.colorbar(axes[1].collections[0], ax=axes, orientation='vertical', fraction=0.085, pad=0.02)
cbar.ax.tick_params(labelsize=20)
cbar.ax.yaxis.set_major_locator(MultipleLocator(1))
cbar.ax.yaxis.set_minor_locator(AutoMinorLocator(2))
cbar.set_label('Formation energy (eV/atom)', fontsize=20)

## Save the figure
plt.savefig('C_RAFFLE'+identifier+'_pca_'+rlxd_string+'_fit_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')

In [None]:
len(rlxd_structures)