In [None]:
import matplotlib.pyplot as plt
import numpy as np

from sklearn.neighbors import KernelDensity
from scipy.constants import Boltzmann, Avogadro
from ase.geometry.cell import cell_to_cellpar

In [None]:
for SYSTEM_NAME in ['XXII', 'XXIII', 'XXVI']:
    if SYSTEM_NAME == 'XXII':
        T = 150
    else:
        T = 300
    PREFACTOR_P = 1e5 * Avogadro * 1e-33 
    kbT = (Boltzmann * Avogadro * T * 1e-3)
    DATA = np.load(f'data/MD_DATA_{SYSTEM_NAME}.npy', allow_pickle=True).item()
    energies, names = [], []
    MD_TERMS = {}
    for id_key, POLY_KEY in enumerate(DATA):
        
        norm = np.power(DATA[POLY_KEY]['n_molecules'], 1/3)
        cells = DATA[POLY_KEY]['cells'][0] / norm
        cell_lengths = np.linalg.norm(cells , axis=-1)
        cellparams = np.array([cell_to_cellpar(x, radians=True) for x in cells])
        nonzero_indices = np.nonzero(DATA[POLY_KEY]['cells'][0, 0] != 0)  
        kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(cellparams)
        log_dens = kde.score_samples(cellparams)
        max_index = np.argmax(log_dens)
        ll_vol = log_dens[max_index]
        mean_volume = np.linalg.det(DATA[POLY_KEY]['cells'][0][max_index] / norm)
        gibbs_term = PREFACTOR_P * mean_volume + kbT * ll_vol#np.log(p_vol) 
        print(POLY_KEY, mean_volume, gibbs_term, DATA[POLY_KEY]['V_pot_mu'])
    
        energies.append(DATA[POLY_KEY]['V_pot_mu'])
        names.append(POLY_KEY)
        MD_TERMS[POLY_KEY] = {}
        MD_TERMS[POLY_KEY]['gibbs_term'] = gibbs_term
        MD_TERMS[POLY_KEY]['V_pot_mu'] = DATA[POLY_KEY]['V_pot_mu']
        MD_TERMS[POLY_KEY]['cell'] = DATA[POLY_KEY]['cells'][0][max_index]
        #if id_key > 0:
        #    break
    np.save(f'data/MD_TERMS_{SYSTEM_NAME}.npy', MD_TERMS)

In [None]:
xs = np.linspace(1, 1.2, 1000)
sample = cellparams[max_index]
ll = kde.score_samples(np.concatenate((np.tile(sample[None, :-1], [1000, 1]), xs[:, None]), axis=-1))
sample

In [None]:
plt.scatter(xs, ll)