# Exploration/reproducing LJ cluster energies from Wales-Doye paper

2024-04-30

Paper doi: https://sci-hub.ee/10.1021/jp970984n

Process:
1. Download the data from the paper
2. Wrangle, clean, compute energy per atom and visualise how this changes with atom number
3. Reproduce some results: write algo for basin hopping, run Scipy basin hopping to verify

In [3]:
import numpy as np
import pandas as pd

import plotly.express as px

In [1]:
s = """
2 D∞h -1.000 000 2 57 Cs -288.342 625 10
3 D3h -3.000 000 2/3 58 C 3v -294.378 148 10
4 Td -6.000 000 2/3 59 C 2v -299.738 070 10
5 D3h -9.103 852 2/3 60 Cs -305.875 476 10
6 Oh -12.712 062 2/3 61 C 2v -312.008 896 10
7 D5h -16.505 384 2/3 62 Cs -317.353 901 10
8 Cs -19.821 489 2/3 63 C 1 -323.489 734 10
9 C2v -24.113 360 2/3 64 Cs -329.620 147 10
10 C3v -28.422 532 2 65 C 2 -334.971 532 12
11 C2v -32.765 970 2 66 C 1 -341.110 599 11/12
12 C5v -37.967 600 2 67 C s -347.252 007 10
13 Ih -44.326 801 2/3 68 C 1 -353.394 542 10
14 C3v -47.845 157 2/3 69 C5V -359.882 566 present-work
15 C2v -52.322 627 2 70 C 5v -366.892 251 10
16 Cs -56.815 742 2 71 C 5v -373.349 661 10
17 C2 -61.317 995 7 72 Cs -378.637 253 11
18 C5v -66.530 949 2 73 Cs -384.789 377 10
19 D5h -72.659 782 2 74 Cs -390.908 500 10
20 C2v -77.177 043 2 75 D 5h -397.492 331 14
21 C2v -81.684 571 2 76 Cs -402.894 866 14
22 Cs -86.809 782 10 77 C 2v -409.083 517 14
23 D3h -92.844 472 8 78 C s -414.794 401 present-work
24 Cs -97.348 815 9 79 C 2v -421.810 897 10
25 Cs -102.372 663 2 80 Cs -428.083 564 10
26 Td -108.315 616 2 81 C 2v -434.343 643 10
27 C2v -112.873 584 10 82 C 1 -440.550 425 10
28 Cs -117.822 402 10 83 C 2v -446.924 094 10
29 D3h -123.587 371 2 84 C 1 -452.657 214 10
30 C2v -128.286 571 10 85 C 3v -459.055 799 10
31 Cs -133.586 422 10 86 C 1 -465.384 493 10
32 C2v -139.635 524 10 87 Cs -472.098 165 10
33 Cs -144.842 719 10 88 Cs -479.032 630 16
34 C2v -150.044 528 10 89 C 3v -486.053 911 10
35 C1 -155.756 643 10 90 Cs -492.433 908 10
36 Cs -161.825 363 10 91 Cs -498.811 060 10
37 C1 -167.033 672 10 92 C 3v -505.185 309 10
38 Oh -173.928 427 13/14 93 C 1 -510.877 688 10
39 C5v -180.033 185 10 94 C 1 -517.264 131 10
40 Cs -185.249 839 10 95 C 1 -523.640 211 10
41 Cs -190.536 277 10 96 C1 -529.879 146 10
42 Cs -196.277 534 10 97 C1 -536.681 383 10
43 Cs -202.364 664 10 98 Cs -543.642 957 16
44 C1 -207.688 728 10 99 C 2v -550.666 526 10
45 C1 -213.784 862 10 100 Cs -557.039 820 10
46 C2V -220.680 330 10 101 C 2v -563.411 308 10
47 C1 -226.012 256 10 102 C 2v -569.363 652 15
48 Cs -232.199 529 10 103 Cs -575.766 131 15
49 C3v -239.091 864 10 104 C 2v -582.086 642 15
50 Cs -244.549 926 10 105 C 1 -588.266 501 10
51 C2v -251.253 964 10 106 C 1 -595.061 072 10
52 C3v -258.229 991 10 107 C s -602.007 110 present-work
53 C2v -265.203 016 10 108 C s -609.033 011 10
54 C5v -272.208 631 10 109 C 1 -615.411 166 10
55 Ih -279.248 470 4 110 Cs -621.788 224 10
56 C3v -283.643 105 10
"""

In [5]:
lines = s.replace("C ", "C").replace("D ", "D").strip("\n").split("\n")
df = pd.DataFrame([line.split() for line in lines])

# select and rename relevant columns
df = df[[0, 1, 2, 5, 6, 7]]
df.columns = ["n", "symmetry", "energy", "n2", "symmetry2", "energy2"]

# select two columns of data
df1 = df[['n', 'symmetry', 'energy']]
df2 = df[['n2', 'symmetry2', 'energy2']]
df2.columns = ['n', 'symmetry', 'energy']

# concatenate
dff = pd.concat([df1, df2], axis=0, ignore_index=True)
dff = dff.dropna()

# compute
dff['unit_energy'] = dff['energy'].astype(float) / dff['n'].astype(float)

In [39]:
df.head(10)

Unnamed: 0,n,symmetry,energy,n2,symmetry2,energy2
0,2,D∞h,-1.0,57,Cs,-288.342
1,3,D3h,-3.0,58,C3v,-294.378
2,4,Td,-6.0,59,C2v,-299.738
3,5,D3h,-9.103,60,Cs,-305.875
4,6,Oh,-12.712,61,C2v,-312.008
5,7,D5h,-16.505,62,Cs,-317.353
6,8,Cs,-19.821,63,C1,-323.489
7,9,C2v,-24.113,64,Cs,-329.62
8,10,C3v,-28.422,65,C2,-334.971
9,11,C2v,-32.765,66,C1,-341.11


In [6]:
px.scatter(dff, x="n", y="unit_energy") # , log_x=True)

## Basin hopping for LJ clusters

In [9]:
from scipy.optimize import basinhopping

In [27]:
def potential(r, epsilon=1.0, sigma=1.0):
    return 4 * epsilon * ((sigma / r) ** 12 - (sigma / r) ** 6)

def force(r, epsilon=1.0, sigma=1.0):
    return 4 * epsilon * (12 * (sigma / r) ** 13 - 6 * (sigma / r) ** 7)

def total_energy(R, epsilon=1.0, sigma=1.0):
    V = 0.0
    for i, _ in enumerate(R):
        for j in range(i):
            rij = np.linalg.norm(R[i] - R[j])
            V += potential(rij, epsilon, sigma)
    return V

def total_energy_bh(r, epsilon=1.0, sigma=1.0):
    """flat matrix of positions"""
    V = 0.0
    R = r.reshape(-1, 3)
    for i, _ in enumerate(R):
        for j in range(i):
            rij = np.linalg.norm(R[i] - R[j])
            V += potential(rij, epsilon, sigma)
    return V

In [60]:
def print_fun(x, f, accepted):
    print("at minimum %.4f accepted %d" % (f, int(accepted)))

In [78]:
# test minimal energy for 2 and 3 particle clusters

# 2 particles
R = np.array([[0.0, 0.0, 0.0], [2**(1/6), 0.0, 0.0]])
total_energy_bh(R.flatten())

-1.0

In [150]:
np.random.seed(42)

N = 20
R = np.random.uniform(0, 2, size=(N, 3))

In [151]:
# test energy and equivalence
total_energy(R) / 1e6, total_energy_bh(R.flatten()) / 1e6

(2485.388101314041, 2485.388101314041)

In [152]:
%%time

res = basinhopping(
    total_energy_bh,
    R.flatten(),
    niter=100,
    minimizer_kwargs=dict(method='L-BFGS-B', args=(1.0, 1.0)),
    callback=print_fun,
    niter_success=10
)

at minimum -72.2434 accepted 1
at minimum -73.9933 accepted 1
at minimum -72.3737 accepted 1
at minimum -72.3412 accepted 1
at minimum -75.2294 accepted 1
at minimum -75.2294 accepted 1
at minimum -71.2735 accepted 0
at minimum -75.6384 accepted 1
at minimum -70.6595 accepted 0
at minimum -74.7350 accepted 0
at minimum -74.0008 accepted 0
at minimum -74.0972 accepted 1
at minimum -73.9121 accepted 1
at minimum -74.4696 accepted 1
at minimum -73.0209 accepted 0
at minimum -74.8291 accepted 1
at minimum -72.0155 accepted 0
at minimum -74.7554 accepted 1
at minimum -74.8360 accepted 1
CPU times: user 1min 32s, sys: 1.23 s, total: 1min 33s
Wall time: 1min 42s


In [153]:
res

                    message: ['success condition satisfied']
                    success: True
                        fun: -75.63840473386608
                          x: [ 9.117e-01  1.679e+00 ...  5.531e-01
                               1.796e+00]
                        nit: 18
      minimization_failures: 0
                       nfev: 112728
                       njev: 1848
 lowest_optimization_result:  message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
                              success: True
                               status: 0
                                  fun: -75.63840473386608
                                    x: [ 9.117e-01  1.679e+00 ...
                                         5.531e-01  1.796e+00]
                                  nit: 187
                                  jac: [ 9.251e-04 -2.215e-03 ...
                                         2.231e-04 -9.805e-04]
                                 nfev: 12810
                                 njev: 

In [154]:
res_DB = {
    2: -1.0,
    3: -3.0,
    4: -6.0,
    5: -9.103852415707523,
    6: -12.71206225680801,
    7: -16.505384168011513,
    8: -19.821489192027727,
    9: -24.113360426738065,
    10: -28.422531875296897,
    15: -52.32262719388441,
    20: -75.63840473386608,
}

#### Notes:
- for N = 7 and CG, need 10 success iterations than for lower N
- for N >= 8 and CG, 100 iters not enough, stuck at a rather high local minimum, need to use LBFGS
- for N ~= 20, did not get correct result in 100 iters, error within 3%


In [125]:
# database of wrong results
# res_DB_wrong = {
#     2: -3.0651362881419146,
#     3: -4.099658416989308,
#     4: -6.134311998154777,
#     5: -11.307904824879119,
#     6: -17.551024824456363,
#     7: -20.655221197581174,
#     8: -23.75941779839724,
# }