In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# matplotlib.use("Agg")

from ase import Atoms
from ase.build import bulk
from ase.io import read
from agox.databases import Database
from agox.environments import Environment
from agox.utils.graph_sorting import Analysis

import numpy as np
from sklearn.decomposition import PCA
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

In [None]:
## Set up the plotting environment
# matplotlib.rcParams.update(matplotlib.rcParamsDefault)
plt.rc('text', usetex=True)
plt.rc('font', family='cmr10', size=12)
plt.rcParams["axes.formatter.use_mathtext"] = True

In [None]:
## Set the plotting parameters
seed = 0
identifier = ""
output_directory_prefix = "DOutput"
# min_energy = -9.064090728759766

In [None]:
## Set the descriptors
from agox.models.descriptors.fingerprint import Fingerprint
from agox.models.descriptors import Voronoi

template = Atoms("", cell=np.eye(3) * 3.567, pbc=True)
diamond = bulk("C", "diamond", a=3.567)  # Lattice constant for diamond cubic carbon
confinement_cell = template.cell.copy()
confinement_corner = np.array([0, 0, 0])
environment = Environment(
    template=template,
    symbols="C8",
    confinement_cell=confinement_cell,
    confinement_corner=confinement_corner,
    box_constraint_pbc=[True, True, True],  # Confinement is periodic in all directions.
)
descriptor = Fingerprint(environment=environment)
graph_descriptor = Voronoi(
    covalent_bond_scale_factor=1.3, n_points=8, angle_from_central_atom=20, environment=None
)


In [None]:
## Set the calculators
from chgnet.model import CHGNetCalculator
calc = CHGNetCalculator()

In [None]:
## Load the unrelaxed structures
unrlxd_structures = read(output_directory_prefix+identifier+"/unrlxd_structures_seed"+str(seed)+".traj", index=":")
# for structure in unrlxd_structures:
#   structure.calc = calc

In [None]:
## Load the relaxed structures
rlxd_structures = read(output_directory_prefix+identifier+"/rlxd_structures_seed"+str(seed)+".traj", index=":")
# for structure in rlxd_structures:
#   structure.calc = calc

In [None]:
# Calculate energies per atom for the relaxed structures
energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in rlxd_structures]
min_energy = np.min(energies_per_atom)
rlxd_delta_en_per_atom = np.array(energies_per_atom) - min_energy
print("Relaxed min energy: ", np.min(energies_per_atom))

In [None]:
# Calculate energies per atom for the unrelaxed structures
energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in unrlxd_structures]
unrlxd_delta_en_per_atom = np.array(energies_per_atom) - min_energy
print("Unrelaxed min energy: ", np.min(energies_per_atom))

In [None]:
## Set up the PCA
pca = PCA(n_components=2)

In [None]:
## Fit the PCA model to the unrelaxed or relaxed structures
rlxd_string = "rlxd"

In [None]:
## Save pca model
import pickle
if False:
  pca.fit(np.squeeze([arr for arr in descriptor.get_features(unrlxd_structures)]))
  with open("pca_model"+identifier+"_all_unrlxd_"+str(seed)+".pkl", "wb") as f:
    pickle.dump(pca, f)

## Load pca model
with open("../DRAFFLE/pca_model_all_"+rlxd_string+"_0.pkl", "rb") as f:
  pca = pickle.load(f)

In [None]:
## Transform the unrelaxed and relaxed structures to the reduced space
unrlxd_X_reduced = pca.transform(np.squeeze([arr for arr in descriptor.get_features(unrlxd_structures)]))
rlxd_X_reduced = pca.transform(np.squeeze([arr for arr in descriptor.get_features(rlxd_structures)]))

In [None]:
## Get the index of the structure with the minimum energy
min_energy_index = np.argmin(rlxd_delta_en_per_atom)
print(min_energy_index)

In [None]:
## Plot the PCA
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 6))

plt.subplots_adjust(wspace=0.05, hspace=0)

## Get the maximum energy for the colourbar
max_en = min(3.5, max(np.max(unrlxd_delta_en_per_atom), np.max(rlxd_delta_en_per_atom)))

## Plot the PCA
axes[0].scatter(unrlxd_X_reduced[:, 0], unrlxd_X_reduced[:, 1], c=unrlxd_delta_en_per_atom, cmap="viridis", vmin = 0, vmax = max_en)
axes[1].scatter(rlxd_X_reduced[:, 0], rlxd_X_reduced[:, 1], c=rlxd_delta_en_per_atom, cmap="viridis", vmin = 0, vmax = max_en)

## Add the minimum energy structures to the plot
for ax in axes:
    ax.scatter(rlxd_X_reduced[min_energy_index, 0], rlxd_X_reduced[min_energy_index, 1], s=200, edgecolor='red', facecolor='none', linewidth=2, label='diamond')
    if ax == axes[1]:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles[::-1], labels[::-1], facecolor='white', framealpha=1.0, edgecolor='black', fancybox=False, loc='upper right', fontsize=20, handletextpad=0.1)
        

## Add labels
fig.text(0.5, 0.0, 'Principal component 1', ha='center', fontsize=20)
axes[0].set_ylabel('Principal component 2', fontsize=20)
axes[0].set_title('Unrelaxed')
axes[1].set_title('Relaxed')
if rlxd_string == "rlxd":
    xlims = [-11, 8]
    ylims = [-5, 6]
else:
    xlims = [-5, 13]
    ylims = [-6.5, 13]

for ax in axes:
    ax.tick_params(axis='both', direction='in', length=6, labelsize=20)
    ax.yaxis.set_major_locator(MultipleLocator(3))
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))
    ax.tick_params(axis='both', which='minor', length=3, direction='in')
    ax.set_xlim(xlims)
    ax.set_ylim(ylims)

## Unify tick labels
xticks = axes[0].get_xticks()
xticks = xticks[(xticks >= xlims[0]) & (xticks <= xlims[1])]

axes[1].set_xticks(xticks)
axes[1].set_yticklabels([])
axes[0].tick_params(axis='x', labelbottom=True, top=True)
axes[1].tick_params(axis='x', labelbottom=True, top=True)
axes[0].tick_params(axis='y', labelbottom=True, right=True)
axes[1].tick_params(axis='y', labelbottom=True, right=True)

## Make axes[0] and axes[1] the same width
axes[0].set_box_aspect(1.7)
axes[1].set_box_aspect(1.7)

## Add colorbar next to the axes
cbar = fig.colorbar(axes[1].collections[0], ax=axes, orientation='vertical', fraction=0.085, pad=0.02)
cbar.ax.tick_params(labelsize=20)
cbar.ax.yaxis.set_major_locator(MultipleLocator(1))
cbar.ax.yaxis.set_minor_locator(AutoMinorLocator(2))
cbar.set_label('Formation energy (eV/atom)', fontsize=20)

## Save the figure
plt.savefig('C_RSS'+identifier+'_pca_'+rlxd_string+'_fit_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')

In [None]:
# get the DRAFFLE relaxed structures and energies
raffle_rlxd_structures = read("../DRAFFLE/DOutput/rlxd_structures_seed0.traj", index=":")
raffle_energies_per_atom = [structure.get_potential_energy() / len(structure) for structure in raffle_rlxd_structures]
raffle_delta_en_per_atom = np.array(raffle_energies_per_atom) - min_energy

In [None]:
import numpy as np
import pandas as pd

# Ensure they are flat lists
rlxd_list = np.ravel(rlxd_delta_en_per_atom).tolist()
raffle_list = np.ravel(raffle_delta_en_per_atom).tolist()

# # renormalise so that area under the curve is 1
# rlxd_list = rlxd_list / np.trapz(rlxd_list)
# raffle_list = raffle_list / np.trapz(raffle_list)

# Combine and build DataFrame
df = pd.DataFrame({
    'Energy per atom': rlxd_list + raffle_list,
    'Source': ['RSS'] * len(rlxd_list) + ['RAFFLE'] * len(raffle_list)
})


In [None]:
# Generate a violin plot for the rlxd_delta_en_per_atom
import seaborn as sns
fig, ax = plt.subplots(figsize=(6, 6))
sns.violinplot(
    data=df,
    x='Source', hue='Source',  # Use 'Source' for x-axis to differentiate between RSS and RAFFLE
    y='Energy per atom', ax=ax,
    inner='quartile',
    linewidth=1.25,
    palette=['lightblue', 'lightgreen'],
    cut=0,
    legend=False,
)

# ax.set_title('Relaxed Structures Energy Distribution', fontsize=20)
ax.set_ylabel('Formation energy (eV/atom)', fontsize=20)
ax.set_xlabel('')
ax.tick_params(axis='both', which='major', direction='in', length=10, width=1, labelsize=20)
ax.tick_params(axis='both', which='minor', direction='in', length=5, width=1)
ax.set_ylim(0.0)
ax.yaxis.set_major_locator(MultipleLocator(1))
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(2))
plt.tight_layout()
plt.savefig('C_RSS'+identifier+'_violin_rlxd_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Assume df is already defined with 'Energy per atom' and 'Source' columns
fig, (ax_kde, ax_rug_rss, ax_rug_raffle) = plt.subplots(
    3, 1, figsize=(6, 6), sharex=True, height_ratios=[6, 1, 1],
    gridspec_kw={"hspace": 0.01}
)

# KDE plot
for source, colour in zip(['RSS', 'RAFFLE'], ['royalblue', 'forestgreen']):
    subset = df[df['Source'] == source]
    if subset['Energy per atom'].std() > 0:
        sns.kdeplot(
            data=subset,
            x='Energy per atom',
            fill=True,
            alpha=0.3,
            linewidth=2,
            color=colour,
            ax=ax_kde,
            label=source,
            warn_singular=False,
            common_norm=False,
            # bw_method=1.0,  # Use 'density' to normalise the area under the curve
            bw_adjust=0.4,  # Adjust bandwidth for smoother curves
        )


# Rug plot
for source, colour, ax, in zip(['RSS', 'RAFFLE'], ['royalblue', 'forestgreen'], [ax_rug_rss, ax_rug_raffle]):
    xvals = df[df['Source'] == source]['Energy per atom']
    ax.plot(
        xvals, [0]*len(xvals),  # Y is categorical
        marker='|', linestyle='None',
        markersize=12, markeredgewidth=2,
        color=colour, alpha=0.4
    )
    # Y-axis of rug plot as labels
    # ax.set_yticks(['RSS', 'RAFFLE'])
    # ax.set_yticklabels(['original', 'modified'], fontsize=20)
    ax.tick_params(axis='x', which='major', direction='in', length=6, width=1, labelsize=16)
    ax.tick_params(axis='y', which='major', length=0)
    ax.set_ylim(-1, 1)
    # move x-axis to y=0
    ax.spines['bottom'].set_position(('data', 0))
    # make x-axis thicker
    ax.spines['bottom'].set_linewidth(1.25)
    # make ticks on the x-axis thicker and have them above and below the axis
    ax.tick_params(axis='x', which='major', direction='inout', length=10, width=1.25)
    # remove borders
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    # remove y-axis ticks
    ax.yaxis.set_ticks([])
    # set y-axis label
    ax.set_ylabel(source, fontsize=16, rotation=0, labelpad=20, ha='right', va='center')

ax.set_xlim(-0.24, 2.25)

# Remove KDE x-axis label and set density label
ax_kde.set_xlabel('')
ax_kde.set_ylabel('Density', fontsize=20)
ax_kde.legend(title='', fontsize=16)
ax_kde.tick_params(axis='both', which='minor', direction='in', length=4, width=1, labelsize=16)
ax_kde.tick_params(axis='both', which='major', direction='in', length=8, width=1, labelsize=16)
ax_kde.xaxis.set_minor_locator(AutoMinorLocator(2))
ax_kde.yaxis.set_minor_locator(AutoMinorLocator(2))
ax_kde.yaxis.set_major_locator(MultipleLocator(1))

# X-axis label at bottom only
ax_rug_raffle.set_xlabel('Formation energy (eV/atom)', fontsize=20)
for ax in [ax_rug_rss, ax_rug_raffle]:
    ax.tick_params(axis='both', which='minor', direction='inout', length=8, width=1.5, labelsize=16)
    ax.tick_params(axis='both', which='major', direction='inout', length=16, width=1.5, labelsize=16)

    # reduce spacing yaxis and yaxis label
    ax.yaxis.set_label_coords(-0.02, 0.5)  # Adjust y-axis label position


# add quartile lines to the KDE plot
for source, colour in zip(['RSS', 'RAFFLE'], ['royalblue', 'forestgreen']):
    subset = df[df['Source'] == source]
    q1 = subset['Energy per atom'].quantile(0.25)
    q2 = subset['Energy per atom'].quantile(0.5)
    q3 = subset['Energy per atom'].quantile(0.75)
    
    ax_kde.axvline(q1, color=colour, linestyle='--', linewidth=1.7, label=f'{source} Q1', dashes=(10, 3))
    ax_kde.axvline(q2, color=colour, linestyle='-.', linewidth=1.7, label=f'{source} Q2')
    ax_kde.axvline(q3, color=colour, linestyle=':',  linewidth=1.7, label=f'{source} Q3')
    # plot these only to the height of the rug at that point

plt.tight_layout()
# Save the figure
plt.savefig('C'+identifier+'_kde_rug_rlxd_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')

plt.show()
