In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# matplotlib.use("Agg")

from ase import Atoms
from ase.build import bulk
from ase.io import read
from agox.databases import Database
from agox.environments import Environment
from agox.utils.graph_sorting import Analysis
from raffle.generator import raffle_generator

import numpy as np
from sklearn.decomposition import PCA
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

In [None]:
## Set up the plotting environment
# matplotlib.rcParams.update(matplotlib.rcParamsDefault)
plt.rc('text', usetex=True)
plt.rc('font', family='cmr10', size=12)
plt.rcParams["axes.formatter.use_mathtext"] = True

In [None]:
## Set the plotting parameters
seed = "0-14"
identifier = ""
output_directory_prefix = "DOutput"
# min_energy = -9.064090728759766

In [None]:
## Set the descriptors
from agox.models.descriptors.fingerprint import Fingerprint
from agox.models.descriptors import Voronoi

template = read("../POSCAR_host_gb")
confinement_cell = template.cell.copy()
confinement_corner = np.array([0, 0, 0])
environment = Environment(
    template=template,
    symbols="C8",
    confinement_cell=confinement_cell,
    confinement_corner=confinement_corner,
    box_constraint_pbc=[True, True, True],  # Confinement is periodic in all directions.
)
descriptor = Fingerprint(environment=environment)
graph_descriptor = Voronoi(
    covalent_bond_scale_factor=1.3, n_points=8, angle_from_central_atom=20, environment=None
)


In [None]:
# return the index of the structure furthest away in the PCA space from the minimum energy structure
def get_furthest_structure_index(pcs, min_energy_index):
    distances = np.linalg.norm(pcs - pcs[min_energy_index], axis=1)
    furthest_index = np.argmax(distances)
    return furthest_index

In [None]:
# return the index of the structure furthest away from both the minimum energy structure and the furthest structure in the PCA space
def get_second_furthest_structure_index(pcs, min_energy_index, furthest_index):
    distances = np.linalg.norm(pcs - pcs[min_energy_index], axis=1) + \
                np.linalg.norm(pcs - pcs[furthest_index], axis=1)
    second_furthest_index = np.argmax(distances)
    return second_furthest_index 

In [None]:
## Set the calculators
from chgnet.model import CHGNetCalculator
from ase.calculators.singlepoint import SinglePointCalculator
calc = CHGNetCalculator()

In [None]:
# ## Load the unrelaxed structures
unrlxd_structures = read(output_directory_prefix+identifier+"/unrlxd_structures_seed"+str(seed)+".traj", index=":")

In [None]:
# ## Load the relaxed structures
rlxd_structures = read(output_directory_prefix+identifier+"/rlxd_structures_seed"+str(seed)+".traj", index=":")

In [None]:
# Calculate energies per atom for the relaxed structures
energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in rlxd_structures]
min_energy = np.min(energies_per_atom)
rlxd_delta_en_per_atom = np.array(energies_per_atom) - min_energy
print("Relaxed min energy: ", np.min(energies_per_atom))

In [None]:
# Calculate energies per atom for the unrelaxed structures
energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in unrlxd_structures]
unrlxd_delta_en_per_atom = np.array(energies_per_atom) - min_energy
print("Unrelaxed min energy: ", np.min(energies_per_atom))

In [None]:
if abs( np.min(energies_per_atom) - min_energy ) > 5e-2:
  print("Minimum energy per atom is not zero. Check the energy calculation.")

In [None]:
## Set up the PCA
pca = PCA(n_components=2)

In [None]:
## Fit the PCA model to the unrelaxed or relaxed structures
rlxd_string = "rlxd"

In [None]:
## Save pca model
import pickle
if False:
  pca.fit(np.squeeze([arr for arr in descriptor.get_features(rlxd_structures)]))
  with open("pca_model"+identifier+"_all_rlxd_"+str(seed)+".pkl", "wb") as f:
    pickle.dump(pca, f)

## Load pca model
with open("../DRAFFLE/pca_model_all_"+rlxd_string+"_0.pkl", "rb") as f:
  pca = pickle.load(f)

In [None]:
## Transform the unrelaxed and relaxed structures to the reduced space
# unrlxd_X_reduced = pca.transform(np.squeeze([arr for arr in descriptor.get_features(unrlxd_structures)]))
rlxd_X_reduced = pca.transform(np.squeeze([arr for arr in descriptor.get_features(rlxd_structures)]))

In [None]:
## Get the index of the structure with the minimum energy
min_energy_index = np.argmin(rlxd_delta_en_per_atom)
print("Minimum energy index: ", min_energy_index)

In [None]:
## Get the index of the structure furthest away in the PCA space from the minimum energy structure
furthest_idx = get_furthest_structure_index(rlxd_X_reduced, min_energy_index)
print("Furthest structure index:", furthest_idx)

In [None]:
gb1 = read("../graphene-gb1.xyz")
gb2 = read("../graphene-gb2.xyz")
gb3 = read("../graphene-gb3.xyz")

gb1_reduced = pca.transform(np.mean(descriptor.get_features(gb1), axis=0).reshape(1, -1))
gb2_reduced = pca.transform(np.mean(descriptor.get_features(gb2), axis=0).reshape(1, -1))
gb3_reduced = pca.transform(np.mean(descriptor.get_features(gb3), axis=0).reshape(1, -1))

In [None]:
# get the closest structure to the expected T-phase and tetrahedral structure
def get_closest_structure(X_reduced, expected_reduced, energy_per_atom):
    distances = np.linalg.norm(X_reduced - expected_reduced, axis=1)
    closest_indices = np.where(distances == np.min(distances))
    print("Closest indices: ", closest_indices)
    closest_index = closest_indices[0][np.argmin(energy_per_atom[closest_indices])]
    return closest_index

closest_gb1_index = get_closest_structure(rlxd_X_reduced, gb1_reduced, rlxd_delta_en_per_atom)
closest_gb2_index = get_closest_structure(rlxd_X_reduced, gb2_reduced, rlxd_delta_en_per_atom)
closest_gb3_index = get_closest_structure(rlxd_X_reduced, gb3_reduced, rlxd_delta_en_per_atom)

print("Closest GB1 index: ", closest_gb1_index)
print("Closest GB2 index: ", closest_gb2_index)
print("Closest GB3 index: ", closest_gb3_index)

print("Energy of closest GB1 structure: ", rlxd_delta_en_per_atom[closest_gb1_index])
print("Energy of closest GB2 structure: ", rlxd_delta_en_per_atom[closest_gb2_index])
print("Energy of closest GB3 structure: ", rlxd_delta_en_per_atom[closest_gb3_index])


In [None]:
## Plot the PCA
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 6))

plt.subplots_adjust(wspace=0.05, hspace=0)

## Get the maximum energy for the colourbar
max_en = min(0.4, max(np.max(unrlxd_delta_en_per_atom), np.max(rlxd_delta_en_per_atom)))

## Plot the PCA
axes[0].scatter(unrlxd_X_reduced[:, 0], unrlxd_X_reduced[:, 1], c=unrlxd_delta_en_per_atom, cmap="viridis", vmin = 0, vmax = max_en)
axes[1].scatter(rlxd_X_reduced[:, 0], rlxd_X_reduced[:, 1], c=rlxd_delta_en_per_atom, cmap="viridis", vmin = 0, vmax = max_en)

## Add the minimum energy structures to the plot
for ax in axes:
    if ax == axes[1]:
        ax.scatter(rlxd_X_reduced[closest_gb1_index, 0], rlxd_X_reduced[closest_gb1_index, 1], s=200, edgecolor='lightblue', facecolor='none', linewidth=2, label='GB-I')
        ax.scatter(rlxd_X_reduced[closest_gb2_index, 0], rlxd_X_reduced[closest_gb2_index, 1], s=200, edgecolor='grey', facecolor='none', linewidth=2, label='GB-II')
        ax.scatter(rlxd_X_reduced[closest_gb3_index, 0], rlxd_X_reduced[closest_gb3_index, 1], s=200, edgecolor='red', facecolor='none', linewidth=2, label='GB-III')
    if ax == axes[1]:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles[::-1], labels[::-1], facecolor='white', framealpha=1.0, edgecolor='black', fancybox=False, loc='upper right', fontsize=20, handletextpad=0.1)


## Add labels
fig.text(0.5, 0.0, 'Principal component 1', ha='center', fontsize=20)
axes[0].set_ylabel('Principal component 2', fontsize=20)
axes[0].set_title('Unrelaxed', fontsize=20)
axes[1].set_title('Relaxed', fontsize=20)
if rlxd_string == "rlxd":
    xlims = [-0.6, 1.3]
    ylims = [-0.5, 1]
else:
    xlims = [-5, 13]
    ylims = [-6.5, 13]

for ax in axes:
    ax.tick_params(axis='both', direction='in', length=6, labelsize=20)
    ax.yaxis.set_major_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))
    ax.tick_params(axis='both', which='minor', length=3, direction='in')
    ax.set_xlim(xlims)
    ax.set_ylim(ylims)

## Unify tick labels
xticks = axes[0].get_xticks()
xticks = xticks[(xticks >= xlims[0]) & (xticks <= xlims[1])]

axes[1].set_xticks(xticks)
axes[1].set_yticklabels([])
axes[0].tick_params(axis='x', labelbottom=True, top=True)
axes[1].tick_params(axis='x', labelbottom=True, top=True)
axes[0].tick_params(axis='y', labelbottom=True, right=True)
axes[1].tick_params(axis='y', labelbottom=True, right=True)

## Make axes[0] and axes[1] the same width
axes[0].set_box_aspect(1.7)
axes[1].set_box_aspect(1.7)

## Add colorbar next to the axes
cbar = fig.colorbar(axes[1].collections[0], ax=axes, orientation='vertical', fraction=0.085, pad=0.02)
cbar.ax.tick_params(labelsize=20)
cbar.ax.yaxis.set_major_locator(MultipleLocator(0.1))
cbar.ax.yaxis.set_minor_locator(AutoMinorLocator(2))
cbar.set_label('Formation energy (eV/atom)', fontsize=20)

## Save the figure
plt.savefig('graphene-gb_RSS'+identifier+'_pca_'+rlxd_string+'_fit_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')

In [None]:
generator = raffle_generator()

In [None]:
fingerprint_gb3 = generator.distributions.generate_fingerprint(rlxd_structures[closest_gb3_index])
fingerprint_gb2 = generator.distributions.generate_fingerprint(rlxd_structures[closest_gb2_index])
fingerprint_gb1 = generator.distributions.generate_fingerprint(rlxd_structures[closest_gb1_index])

In [None]:
# Create a figure with 3 subplots side by side
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot for each n-body function (2-body, 3-body, 4-body)
for j in range(3):
    # Calculate x-axis values
    x = np.arange(generator.distributions.cutoff_min[j],
                generator.distributions.cutoff_max[j] + generator.distributions.width[j],
                generator.distributions.width[j])

    # Plot on the respective subplot
    axes[j].plot(x, fingerprint_gb3[j], label='GB-III')
    axes[j].plot(x, fingerprint_gb2[j], label='GB-II')
    axes[j].plot(x, fingerprint_gb1[j], label='GB-I')

    # change line colours to be more distinct
    axes[j].lines[0].set_color('black')
    axes[j].lines[1].set_color('red')
    # axes[j].set_title(f'{j+2}-body fingerprint')
    axes[j].legend()

axes[0].set_ylabel('Distribution function (arb. units)', fontsize=20)
axes[0].set_xlabel('Bond length (Å)', fontsize=20)
axes[1].set_xlabel('3-body angle (radians)', fontsize=20)
axes[2].set_xlabel('4-body angle (radians)', fontsize=20)
axes[0].set_xlim(0, 6)
axes[1].set_xlim(0, np.pi)
axes[2].set_xlim(0, np.pi)

axes[0].set_ylim(0, None)
axes[1].set_ylim(0, 0.2)
axes[2].set_ylim(0, 0.1)

# set the legend font size
for ax in axes:
    for label in (ax.get_xticklabels() + ax.get_yticklabels()):
        label.set_fontsize(16)
    ax.legend(fontsize=16)

# reduce number of ticks to five at most
axes[0].xaxis.set_major_locator(plt.MaxNLocator(3))
axes[0].yaxis.set_major_locator(plt.MaxNLocator(3))

# set x ticks to 0, 1, 2, 3
axes[1].set_xticks([0, 1, 2, 3])
axes[2].set_xticks([0, 1, 2, 3])
axes[1].set_yticks([0, 0.1, 0.2])
axes[2].set_yticks([0, 0.05, 0.1])

# have the ticks point intwards and on both sides
for ax in axes:
    ax.tick_params(axis='both', which='major', direction='in', length=10, width=1)
    ax.tick_params(axis='both', which='minor', direction='in', length=5, width=1)
    ax.tick_params(axis='x', which='both', bottom=True, top=True)
    ax.tick_params(axis='y', which='both', left=True, right=True)

# add minor ticks to all axes
for ax in axes:
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))

plt.tight_layout()
plt.show()


In [None]:
# Generate a violin plot for the rlxd_delta_en_per_atom
import seaborn as sns
fig, ax = plt.subplots(figsize=(8, 6))
sns.violinplot(data=rlxd_delta_en_per_atom, ax=ax, inner="quartile", linewidth=1.25, color='lightblue', scale='width', cut=0.0)
ax.set_title('Relaxed Structures Energy Distribution', fontsize=20)
ax.set_ylabel('Energy (eV/atom)', fontsize=20)
ax.set_xlabel('Relaxed Structures', fontsize=20)
ax.tick_params(axis='both', which='major', direction='in', length=10, width=1, labelsize=20)
ax.tick_params(axis='both', which='minor', direction='in', length=5, width=1)
ax.yaxis.set_major_locator(MultipleLocator(0.1))
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(2))
plt.tight_layout()
# # add horizontal lines within the violin plots to mark the energies of gb1, gb2, gb3
# ax.axhline(rlxd_delta_en_per_atom[closest_gb1_index], color='lightblue', linestyle='--', linewidth=1.25, label='GB-I')
# ax.axhline(rlxd_delta_en_per_atom[closest_gb2_index], color='grey', linestyle='--', linewidth=1.25, label='GB-II')
# ax.axhline(rlxd_delta_en_per_atom[closest_gb3_index], color='red', linestyle='--', linewidth=1.25, label='GB-III')
plt.tight_layout()
plt.savefig('graphene-gb_RAFFLE'+identifier+'_violin_rlxd_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')
plt.show()

In [None]:
# get the RSS and RAFFLE relaxed structures and energies from the AGOX runs
rss_rlxd_structures = read("DOutput/rlxd_structures_seed0-14.traj", index=":")
rss_energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in rss_rlxd_structures]
rss_delta_en_per_atom = np.array(rss_energies_per_atom) - min_energy

raffle_rlxd_structures = read("../DRAFFLE/DOutput/rlxd_structures_seed0-14.traj", index=":")
raffle_energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in raffle_rlxd_structures]
raffle_delta_en_per_atom = np.array(raffle_energies_per_atom) - min_energy

In [None]:
import numpy as np
import pandas as pd

# Ensure they are flat lists
rss_list = np.ravel(rss_delta_en_per_atom).tolist()
raffle_list = np.ravel(raffle_delta_en_per_atom).tolist()

# # renormalise so that area under the curve is 1
# rss_list = rss_list / np.trapz(rss_list)
# raffle_list = raffle_list / np.trapz(raffle_list)

# Combine and build DataFrame
df = pd.DataFrame({
    'Energy per atom': rss_list + raffle_list,
    'Source': ['RSS'] * len(rss_list) + ['RAFFLE'] * len(raffle_list)
})


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Assume df is already defined with 'Energy per atom' and 'Source' columns
fig, (ax_kde, ax_rug_rss, ax_rug_raffle) = plt.subplots(
    3, 1, figsize=(6, 6), sharex=True, height_ratios=[6, 1, 1],
    gridspec_kw={"hspace": 0.01}
)

# KDE plot
for source, colour in zip(['RSS', 'RAFFLE'], ['royalblue', 'forestgreen']):
    subset = df[df['Source'] == source]
    if subset['Energy per atom'].std() > 0:
        sns.kdeplot(
            data=subset,
            x='Energy per atom',
            fill=True,
            alpha=0.3,
            linewidth=2,
            color=colour,
            ax=ax_kde,
            label=source,
            warn_singular=False,
            common_norm=False,
            # bw_method=1.0,  # Use 'density' to normalise the area under the curve
            bw_adjust=0.4,  # Adjust bandwidth for smoother curves
        )


# Rug plot
for source, colour, ax, in zip(['RSS', 'RAFFLE'], ['royalblue', 'forestgreen'], [ax_rug_rss, ax_rug_raffle]):
    xvals = df[df['Source'] == source]['Energy per atom']
    ax.plot(
        xvals, [0]*len(xvals),  # Y is categorical
        marker='|', linestyle='None',
        markersize=12, markeredgewidth=2,
        color=colour, alpha=0.4
    )
    # Y-axis of rug plot as labels
    # ax.set_yticks(['RSS', 'RAFFLE'])
    # ax.set_yticklabels(['original', 'modified'], fontsize=20)
    ax.tick_params(axis='x', which='major', direction='in', length=6, width=1, labelsize=16)
    ax.tick_params(axis='y', which='major', length=0)
    ax.set_ylim(-1, 1)
    # move x-axis to y=0
    ax.spines['bottom'].set_position(('data', 0))
    # make x-axis thicker
    ax.spines['bottom'].set_linewidth(1.25)
    # make ticks on the x-axis thicker and have them above and below the axis
    ax.tick_params(axis='x', which='major', direction='inout', length=10, width=1.25)
    # remove borders
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    # remove y-axis ticks
    ax.yaxis.set_ticks([])
    # set y-axis label
    ax.set_ylabel(source, fontsize=16, rotation=0, labelpad=20, ha='right', va='center')

ax.set_xlim(-0.03, 0.4)

# Remove KDE x-axis label and set density label
ax_kde.set_xlabel('')
ax_kde.set_ylabel('Density', fontsize=20)
ax_kde.legend(title='', fontsize=16)
ax_kde.tick_params(axis='both', which='minor', direction='in', length=4, width=1, labelsize=16)
ax_kde.tick_params(axis='both', which='major', direction='in', length=8, width=1, labelsize=16)
ax_kde.xaxis.set_minor_locator(AutoMinorLocator(2))
ax_kde.yaxis.set_minor_locator(AutoMinorLocator(2))
ax_kde.yaxis.set_major_locator(MultipleLocator(5))

# X-axis label at bottom only
ax_rug_raffle.set_xlabel('Formation energy (eV/atom)', fontsize=20)
for ax in [ax_rug_rss, ax_rug_raffle]:
    ax.tick_params(axis='both', which='minor', direction='inout', length=8, width=1.5, labelsize=16)
    ax.tick_params(axis='both', which='major', direction='inout', length=16, width=1.5, labelsize=16)

    # reduce spacing yaxis and yaxis label
    ax.yaxis.set_label_coords(-0.02, 0.5)  # Adjust y-axis label position


# add quartile lines to the KDE plot
for source, colour in zip(['RSS', 'RAFFLE'], ['royalblue', 'forestgreen']):
    subset = df[df['Source'] == source]
    q1 = subset['Energy per atom'].quantile(0.25)
    q2 = subset['Energy per atom'].quantile(0.5)
    q3 = subset['Energy per atom'].quantile(0.75)
    
    ax_kde.axvline(q1, color=colour, linestyle='--', linewidth=1.7, label=f'{source} Q1', dashes=(10, 3))
    ax_kde.axvline(q2, color=colour, linestyle='-.', linewidth=1.7, label=f'{source} Q2')
    ax_kde.axvline(q3, color=colour, linestyle=':',  linewidth=1.7, label=f'{source} Q3')
    # plot these only to the height of the rug at that point

plt.tight_layout()
# Save the figure
plt.savefig('graphene-GB'+identifier+'_kde_rug_rlxd_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')

plt.show()
