In [2]:
import numpy as np
from pprint import pprint as pp
import re
import math
from scipy.optimize import minimize
from scipy.optimize import SR1, BFGS
from scipy.optimize import Bounds
from scipy.optimize import LinearConstraint
from scipy.optimize import NonlinearConstraint
from ase.units import kB
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from ase import Atoms
from clusterx.parent_lattice import ParentLattice
from clusterx.clusters.clusters_pool import ClustersPool
from clusterx.correlations import CorrelationsCalculator
import numdifftools as nd
import sys

pattern1 = re.compile("\n\n\n")
pattern2 = re.compile("\n\n")

In [3]:
# Read clusters.out
clusters = {}


with open('clusters.out','r') as fclusters:
    temp_clusters = fclusters.read().split('\n\n')

for idx, cluster in enumerate(temp_clusters):
    if cluster == '':
        continue
    line = cluster.split('\n')
    multiplicity = int(line[0])
    length = float(line[1])
    num_points = int(line[2])
    clusters[idx] = {'mult':multiplicity, 'length':length, 'type':num_points}
    
num_clusters = len(clusters)

In [4]:
# Read config.out
configs = {}

fconfig = open('config.out','r')
_ = next(fconfig)
temp_config = fconfig.read()#.split('\n\n')
temp_config = pattern1.split(temp_config)

for idx, config in enumerate(temp_config):
    if config == '':
        continue
    num_points = int(config[0])
    config = pattern2.split(config[2:])
    min_coords = []
    for _ in range(num_points):
        min_coords.append(config[_].split('\n')[0])
    configs[idx] = {'subclus': list(map(int,min_coords)), 'num_of_subclus': len(min_coords)}

In [5]:
# Read kb.out

kb = {}

fkb = open('kb.out','r')
_ = next(fkb)

temp_kb = fkb.read()
temp_kb = temp_kb.split('\n')

for idx, kbcoeff in enumerate(temp_kb):
    if kbcoeff == '':
        continue
    kb[idx] = float(kbcoeff)

fkb.close()

In [6]:
# Read configcoeff.out

configcoef = {}

with open('configcoef.out','r') as fsubmult:
    _ = next(fsubmult)
    temp_submult = fsubmult.read()
    temp_submult = pattern2.split(temp_submult)
    
for idx, submult in enumerate(temp_submult):
    submult = submult.split('\n')
    while("" in submult) :
        submult.remove("")
    configcoef[idx] = list(map(float,submult[1:]))

In [7]:
# Read vmat.out

vmat = {}

with open('vmat.out') as fvmat:
    _ = next(fvmat)
    temp_vmat = fvmat.read()
    #temp_vmat = temp_vmat.split('\n')
    temp_vmat = pattern2.split(temp_vmat)
    
    while("" in temp_vmat) :
        temp_vmat.remove("")
    
    for clus_idx, mat in enumerate(temp_vmat):
        mat = mat.split('\n')
        mat_float = np.empty(list(map(int, mat[0].split(' '))))
        for idx, row in enumerate(mat[1:]):
            mat_float[idx] = list(map(float,row.split(' ')[:-1]))
        
        vmat[clus_idx] = mat_float

In [8]:
# Read eci
eci = {}

with open('eci.out') as feci:
    _ = next(feci)
    temp_eci = feci.read()
    temp_eci = temp_eci.split('\n')

for idx, eci_val in enumerate(temp_eci):
    if eci_val == '':
        continue
    eci[idx] = float(eci_val)

In [26]:
#eci = {0: 0, 1: -0.0189, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0}
eci = {0: 1, 1: -0.5, 2: 0, 3: 0, 4: 0, 5: 0}

In [27]:
eci

{0: 1, 1: -0.5, 2: 0, 3: 0, 4: 0, 5: 0}

In [105]:
#corrs = np.array([1.    , 0.5   , 0.25  , 0.25  , 0.125 , 0.0625])
corrs = np.array([ 1.0, 0,  0.08,  0.23,-0.00054,  0.165])
#corrs = np.array([ 1.  , 0.0 ,  0.25 ,  -0.25 ,  0.125 ,  0.0])

## Optimisation

### Setting up F, F Jacobian and F Hessian

In [93]:
def F(corrs, vmat, kb, clusters, configs, configcoef,T):

    S = 0
    H = 0
    
    def clus_prob(cluster_idx):
        rho = np.matmul(vmat[cluster_idx],corrs)

        return rho
    
    def inner_sum(cluster_idx):
        isum = 0
        rho = clus_prob(cluster_idx)
        #print(rho)
        try:
            for i in range(configs[cluster_idx]['num_of_subclus']):
                isum += configcoef[cluster_idx][i] * rho[i] * math.log(rho[i])
        except ValueError as ve:
            print('Math Domain Error. Check the validity of the correlations')
            pass
        return isum 
            
    for cluster_idx, cluster in clusters.items():
        H += cluster['mult']*eci[cluster_idx]*corrs[cluster_idx]
        S += -kb[cluster_idx]*inner_sum(cluster_idx)
        
    #return kB*S
    return H + kB*T*S

In [94]:
T = 50
F(corrs, vmat, kb, clusters, configs, configcoef,T)

Math Domain Error. Check the validity of the correlations


1.0044906860393616

In [116]:
def F_jacobian(corrs, vmat, kb, clusters, configs, configcoef,T):
    
    def get_corr_sum(corrs,vmat,cluster_index,config_idx):
        corrsum = 0
        for corr_idx, corr in enumerate(corrs):
            corrsum += vmat[cluster_idx][config_idx][corr_idx]*corr
        #print(corrsum,cluster_idx)
        return corrsum
            
    
    F_jac = []
    for corr_idx, corr in enumerate(corrs):
        temp_S_jac = 0
        for cluster_idx, cluster in clusters.items():
            temp_config_sum = 0
            for config_idx, config in enumerate(configcoef[cluster_idx]):
                try:
                    corrsum = get_corr_sum(corrs,vmat,cluster_idx,config_idx)
                    temp_config_sum += config*vmat[cluster_idx][config_idx][corr_idx]*(1 + math.log(corrsum))
                except ValueError as ve:
                    print('Math Domain Error. Check the validity of the correlations')
                    print(f'corrs: {corrs} \n Correlation Sum: {corrsum}')
            
            temp_S_jac += kb[cluster_idx]*temp_config_sum
        
        F_jac.append(cluster['mult']*eci[cluster_idx] + kB*T*temp_S_jac)
        
    return np.array(F_jac)

In [117]:
F_jacobian(corrs, vmat, kb, clusters, configs, configcoef,T)

array([-7.89627893e-03,  2.38136917e-06,  6.41465063e-04,  1.77478766e-03,
       -5.49244548e-06,  5.13618208e-04])

In [114]:
def F_hessian(corrs, vmat, kb, clusters, configs, configcoef,T):
    F_hess = np.empty([len(corrs),len(corrs)])
    
    def get_corr_sum(corrs,vmat,cluster_index,config_idx):
        corrsum = 0
        for corr_idx, corr in enumerate(corrs):
            corrsum += vmat[cluster_idx][config_idx][corr_idx]*corr
        #print(corrsum,cluster_idx)
        return corrsum
    
    for corr_idx_1, corr_1 in enumerate(corrs):
        for corr_idx_2, corr_2 in enumerate(corrs):
            temp_hess = 0
            for cluster_idx, cluster in clusters.items():
                temp_config_sum = 0
                for config_idx, config in enumerate(configcoef[cluster_idx]):
                    corrsum = get_corr_sum(corrs,vmat,cluster_idx,config_idx)
                    temp_config_sum += config*vmat[cluster_idx][config_idx][corr_idx_1]*vmat[cluster_idx][config_idx][corr_idx_2]/corrsum
                temp_hess += kb[cluster_idx]*temp_config_sum
            F_hess[corr_idx_1][corr_idx_2] = temp_hess
    
    return F_hess
                    

In [123]:
F_hessian(corrs, vmat, kb, clusters, configs, configcoef,T)

array([[ 1.11291011e+00, -8.99827492e-04, -9.90097804e-02,
        -3.96110791e-01,  1.19131116e-03, -8.41405958e-02],
       [-8.99827492e-04,  3.39965943e+00,  4.86883156e-04,
         2.38811034e-05, -1.02350722e+00,  1.83448658e-03],
       [-9.90097804e-02,  4.86883156e-04,  1.91328293e+00,
        -1.44566519e-01,  1.23802887e-03, -1.26072037e-01],
       [-3.96110791e-01,  2.38811034e-05, -1.44566519e-01,
         2.05165389e+00,  8.38574910e-04, -3.89114114e-01],
       [ 1.19131116e-03, -1.02350722e+00,  1.23802887e-03,
         8.38574910e-04,  2.36064046e+00, -1.26351423e-03],
       [-8.41405958e-02,  1.83448658e-03, -1.26072037e-01,
        -3.89114114e-01, -1.26351423e-03,  1.11346620e+00]])

### Setting up Constraints

In [29]:
def constraint_rhos_sum(corrs, vmat, clusters, configcoef,):
    
    rho_sum = []

    def clus_prob(cluster_idx):
        rho = np.matmul(vmat[cluster_idx],corrs)
        return rho
    
    for cluster_idx, _ in clusters.items():
        rho = clus_prob(cluster_idx)
        rho_sum.append(np.sum(configcoef[cluster_idx]*rho))
    
    return np.sum(1 - np.array(rho_sum))

def constraint_singlet(corrs,FIXED_CORR_1):
    return corrs[1] - FIXED_CORR_1   

def constraint_zero(corrs):
    return 1 - corrs[0]  

In [33]:
def get_random_corr():
    
    conc = 0.0
    a=0.5
    pri = Atoms(positions=[[0,0,0]],cell=[[a,a,a],[a,-a,a],[a,a,-a]],pbc=(1,1,1))
    plat = ParentLattice(pri,site_symbols=[['Cs','Cl',]])

    cpool = ClustersPool(plat, npoints=[0,1,2,2,3,4], radii=[0,0,1.0,0.866,1.0,1.0],)
    corrcal = CorrelationsCalculator("trigonometric", plat, cpool)
    
    #return corrcal.get_binary_random_structure_correlations(0.5)
    while conc <= 1.0:
        yield corrcal.get_binary_random_structure_correlations(conc)
        conc += 0.25

In [32]:
def callback(corrs, res):
    for cluster_idx in clusters:
        print(np.matmul(vmat[cluster_idx],res.x))

    print('=====================')
    

### Perform the Optimisation

In [55]:
results = pd.DataFrame(columns = ['T', '1-point_corr', 'F','corrs'])

for T in tqdm([10000]):#tqdm(np.logspace(0.01, 5.0, num=10)):
    for x in tqdm(np.linspace(-1,1,17)):
        FIXED_CORR_1 = x
        #corrs0 = get_random_corr()
        #corrs0 = np.array([ 1.  , FIXED_CORR_1 ,  0.25 ,  -0.25 ,  0.125 ,  0.0])
        corrs0 = np.array([1.    , 0.5   , 0.25  , 0.25  , 0.125 , 0.0625])
        
        linear_constraints = []

        for cluster_idx, _ in clusters.items():
            if cluster_idx == 0:
                linear_constraints.append(LinearConstraint(vmat[cluster_idx],
                                                           [1]*len(configcoef[cluster_idx]),
                                                           [1]*len(configcoef[cluster_idx])))
            else:
                linear_constraints.append(LinearConstraint(vmat[cluster_idx],
                                                           [0]*len(configcoef[cluster_idx]),
                                                           [1]*len(configcoef[cluster_idx])))
        
        bounds_corrs = Bounds([1, FIXED_CORR_1,*[-1]*(len(clusters)-2)],
                              [1, FIXED_CORR_1,*[1]*(len(clusters)-2)]
                             )
        
        options = {'verbose' : 1,
                   'maxiter' : 5000,
                   'xtol'    : 1e-15,
                   'initial_constr_penalty' : 2,
                  }
        
        res = minimize(F, 
                       corrs0, 
                       args=(vmat, kb, clusters, configs, configcoef,T),
                       method='trust-constr', 
                       options=options,
                       jac='3-point', hess=BFGS(),
                       constraints=[{'fun': constraint_rhos_sum, 'type': 'eq', 'args': [vmat, clusters, configcoef,]},
                                    *linear_constraints, 
                                    {'fun': constraint_singlet, 'type': 'eq', 'args': [FIXED_CORR_1]},
                                    {'fun': constraint_zero, 'type':'eq'},
                                   ],
                       bounds=bounds_corrs,
                       #callback=callback
                      )
        
        #rhos = []
        #for cluster_idx in clusters:
        #    rhos.append(np.matmul(vmat[cluster_idx],res.x))
        
        results = results.append({'T' : T, 
                                  '1-point_corr' : x, 
                                  'F' : res.fun, 
                                  'corrs': res.x,
                                  #'rhos': np.array(rhos)
                                 }, 
                                 
                                 ignore_index = True
                                )
#

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]


Singular Jacobian matrix. Using SVD decomposition to perform the factorizations.


delta_grad == 0.0. Check if the approximated function is linear. If the function is linear better results can be obtained by defining the Hessian as zero instead of using quasi-Newton approximations.



`gtol` termination condition is satisfied.
Number of iterations: 41, function evaluations: 416, CG iterations: 0, optimality: 8.15e-09, constraint violation: 7.96e-09, execution time:  0.4 s.
`xtol` termination condition is satisfied.
Number of iterations: 199, function evaluations: 468, CG iterations: 0, optimality: 2.14e-03, constraint violation: 2.22e-16, execution time:  0.6 s.
`xtol` termination condition is satisfied.
Number of iterations: 206, function evaluations: 507, CG iterations: 0, optimality: 4.18e-03, constraint violation: 1.11e-16, execution time: 0.68 s.
`xtol` termination condition is satisfied.
Number of iterations: 199, function evaluations: 416, CG iterations: 0, optimality: 8.57e-03, constraint violation: 0.00e+00, execution time: 0.61 s.
`xtol` termination condition is satisfied.
Number of iterations: 204, function evaluations: 481, CG iterations: 0, optimality: 2.01e-03, constraint violation: 0.00e+00, execution time: 0.64 s.
`xtol` termination condition is sati

In [56]:
    for cluster_idx in clusters:
        print(np.matmul(vmat[cluster_idx],res.x))

    print('=====================')

[1.]
[3.30393862e-09 9.99999997e-01]
[-3.39552220e-09  6.69946082e-09  9.99999990e-01]
[-1.82520832e-09  5.12914694e-09  9.99999992e-01]
[-1.55105720e-10 -3.24041645e-09  9.93987727e-09 -1.67010256e-09
  8.36956336e-09  9.99999982e-01]
[ 1.56289165e-09 -1.71799740e-09  4.78948617e-11 -1.52241903e-09
  9.89198240e-09  9.99999972e-01]


In [52]:
results[results['1-point_corr'] == 0]['corrs'].values

array([array([ 1.00000000e+00,  1.11222454e-32, -2.94352001e-02,  2.98108459e-01,
               5.12900283e-02,  3.05682967e-01])                                 ],
      dtype=object)

In [53]:
np.stack(results[results['1-point_corr'] == 0.0]['corrs'].values)[:,1]

array([1.11222454e-32])

In [57]:
fig = go.Figure()

for T in results['T'].unique():
    fig.add_trace(go.Scatter(x = results[results['T'] == T]['1-point_corr'],
                             y = results[results['T'] == T]['F'],
                             mode='markers+lines',
                             name=f'T = {T}',
                            )
                 )

fig.update_layout(
    title="F vs 1-point Corr",
    xaxis_title="1-point Corr",
    yaxis_title="F",
    legend_title="Temperature",
    template='seaborn'
)
#fig.update_traces(texttemplate='%{text:.2s}', textposition='top center')
fig.show()

In [None]:
np.stack(results['corrs'].values)[:,3]

In [None]:
results

In [None]:
fig = go.Figure()
#spec = 
for T in results['T'].unique():
    fig.add_trace(go.Scatter(x = results[results['1-point_corr'] == 0.0]['T'],
                             y = results[results['1-point_corr'] == 0.0]['F'],
                             #y = results[results['1-point_corr'] == 0.0]['corrs'],
                             #y =np.stack(results[results['1-point_corr'] == 0.0]['corrs'].values)[:,2],
                             mode='markers+lines',
                             name=f'T = {T}',
                            )
                 )

fig.update_layout(
    title="F vs 1-point Corr",
    xaxis_title="1-point Corr",
    yaxis_title="F",
    legend_title="Temperature",
    template='seaborn'
)
#fig.update_traces(texttemplate='%{text:.2s}', textposition='top center')
fig.show()

In [None]:
np.linspace(-1,1,26)

In [None]:
pp(np.logspace(0.01, 5.0, num=10))

In [None]:
temp_df = results[results['1-point_corr'] == 0.0]
temp_df

In [None]:
a = np.array([-0.1089001,   0.3044507,   0.3749987,  -0.12499498, -0.05446074, -0.14107723])

In [None]:
results['corrs'].values

In [None]:
eci

In [None]:
pp(clusters)

In [None]:
np.linspace(-1,1,17)

In [None]:
temp = next(corrs0)

In [None]:
temp

In [None]:
np.gradient(temp,F)

In [None]:
next(get_random_corr())