In [1]:
%load_ext aiida
%aiida

In [2]:
import pathlib
import pandas as pd
import json
import numpy as np
from itertools import combinations

In [3]:
data_path = pathlib.Path('/Users/treents/project/aiida-cwf/data')

In [4]:
codes = ['fleur', 'wien2k', 'sirius_cp2k']

indices = []
df_central_volumes = []

for code in codes:
    for data_file in (data_path / code).iterdir():
        if 'pbesol' in data_file.name.lower():
            functional = 'pbesol'
        elif 'pbe' in data_file.name.lower():
            functional = 'pbe'
        elif 'lda' in data_file.name.lower():
            functional = 'lda'
        else:
            continue
        try:
            with open(data_file, 'r') as f:
                data = json.load(f)
        except:
            continue
        central_volumes = {k: v['min_volume'] for k,v in data['BM_fit_data'].items() if v is not None}
        df_central_volumes.append(pd.Series(central_volumes.values(), index=central_volumes.keys()))
        indices.append((code, functional, data_file.name))
        
        print(data_file.name, len(central_volumes), sep=':\t')
        # pd.DataFrame.from_dict(data, orient='index').to_csv(data_file.with_name(f'{data_file.stem}_{functional}.csv'))
df_central_volumes = pd.concat(df_central_volumes, keys=indices, names=['code', 'functional', 'filename'], axis=1).sort_index(axis=1)

results-unaries-PBEsol-fleur_centralVolume.json:	412
results-unaries-LDA-VWN-fleur_centralVolume.json:	412
unaries_prec3_lda.json:	412
unaries_prec3_pbesol.json:	412
results-unaries-verification-v1-cp2k_PBEsol_v2.json:	382
LDA_v3.json:	379
PBEsol_v3.json:	379
LDA_v1.json:	256
results-unaries-verification-v1-cp2k_PBE.json:	380
PBEsol_v1.json:	268


In [18]:
df_central_volumes

code,fleur,fleur,sirius_cp2k,sirius_cp2k,sirius_cp2k,sirius_cp2k,sirius_cp2k,sirius_cp2k,wien2k,wien2k,mean,median
functional,lda,pbesol,lda,lda,pbe,pbesol,pbesol,pbesol,lda,pbesol,lda,lda
filename,results-unaries-LDA-VWN-fleur_centralVolume.json,results-unaries-PBEsol-fleur_centralVolume.json,LDA_v1.json,LDA_v3.json,results-unaries-verification-v1-cp2k_PBE.json,PBEsol_v1.json,PBEsol_v3.json,results-unaries-verification-v1-cp2k_PBEsol_v2.json,unaries_prec3_lda.json,unaries_prec3_pbesol.json,Unnamed: 11_level_2,Unnamed: 12_level_2
Ac-X/BCC,41.291856,42.875699,,41.350655,45.895638,,42.820513,42.820513,41.3712,42.8651,41.337904,41.350655
Ac-X/Diamond,114.392270,119.551727,,114.454714,129.576955,,119.387542,119.387542,114.6270,119.5860,114.491328,114.454714
Ac-X/FCC,40.659391,42.307589,,40.691468,45.500860,,42.266212,42.266212,40.7351,42.3140,40.695320,40.691468
Ac-X/SC,44.435010,46.115078,,44.364978,49.760991,,45.999801,45.999801,44.4039,46.0584,44.401296,44.403900
Ag-X/BCC,16.103086,16.764519,15.997339,16.139327,17.978911,16.758430,16.756601,16.756601,16.1122,16.7645,16.087988,16.107643
...,...,...,...,...,...,...,...,...,...,...,...,...
Zn-X/SC,16.051438,16.866885,15.905865,16.067631,18.180604,16.863613,16.863963,16.863963,16.0711,16.8638,16.024008,16.059535
Zr-X/BCC,21.090717,21.828021,20.886065,21.110404,22.843383,21.830519,21.830251,21.830251,21.1123,21.8313,21.049871,21.100561
Zr-X/Diamond,55.338310,57.846390,54.487066,55.430393,61.896721,57.844927,57.844927,57.844927,55.4494,57.8623,55.176292,55.384351
Zr-X/FCC,21.546222,22.250352,21.349380,21.563006,23.211860,22.249262,22.249091,22.249091,21.5659,22.2496,21.506127,21.554614


In [5]:
code_differences = {}
for ((code1, functional1, file1), (code2, functional2, file2)) in combinations(df_central_volumes.columns, 2):
    df1 = df_central_volumes[(code1, functional1, file1)]
    df2 = df_central_volumes[(code2, functional2, file2)]
    if df1.shape[0] != df2.shape[0] or functional1 != functional2 or code1 == code2:
        continue
    
    if (code1 == 'sirius_cp2k' and 'v3' not in file1) or (code2 == 'sirius_cp2k' and 'v3' not in file2):
        continue
        
    
    code_differences[(code1, code2, functional1, f'{file1} - {file2}')] = (df1 - df2) / df1 * 100

    # print(f'{code1} {functional1} vs {code2} {functional2}')
    # print(np.corrcoef(df1, df2)[0,1])

In [6]:
df_code_differences = pd.DataFrame(code_differences).sort_index(axis=1).round(3)
df_code_differences.columns = pd.MultiIndex.from_tuples(
    [(' - '.join(col[:2]).strip(), *col[2:]) for col in df_code_differences.columns.values]
)
df_code_differences = df_code_differences.loc[
    df_code_differences.abs().sum(axis=1).sort_values(ascending=False).index
]

In [7]:
mean_lda = df_central_volumes.loc(axis=1)[:, 'lda'].mean(axis=1)
median_lda = df_central_volumes.loc(axis=1)[:, 'lda'].median(axis=1)
df_central_volumes[('mean', 'lda', '')] = mean_lda
df_central_volumes[('median', 'lda', '')] = median_lda

In [8]:
relative_differences = df_central_volumes.loc(axis=1)[:, 'lda'].sub(
    df_central_volumes[('median', 'lda', '')].values, axis=0
    ).div(df_central_volumes.loc(axis=1)[:, 'lda']).mul(100)#.drop('mean', axis=1)

In [9]:
relative_differences

code,fleur,sirius_cp2k,sirius_cp2k,wien2k,mean,median
functional,lda,lda,lda,lda,lda,lda
filename,results-unaries-LDA-VWN-fleur_centralVolume.json,LDA_v1.json,LDA_v3.json,unaries_prec3_lda.json,Unnamed: 5_level_2,Unnamed: 6_level_2
Ac-X/BCC,-0.142397,,0.000000,0.049661,-0.030846,0.0
Ac-X/Diamond,-0.054587,,0.000000,0.150302,0.031980,0.0
Ac-X/FCC,-0.078894,,0.000000,0.107111,0.009464,0.0
Ac-X/SC,0.070012,,-0.087732,0.000000,-0.005865,0.0
Ag-X/BCC,-0.028300,-0.689513,0.196318,0.028284,-0.122171,0.0
...,...,...,...,...,...,...
Zn-X/SC,-0.050444,-0.966119,0.050393,0.071964,-0.221705,0.0
Zr-X/BCC,-0.046671,-1.026982,0.046627,0.055604,-0.240805,0.0
Zr-X/Diamond,-0.083200,-1.646787,0.083061,0.117312,-0.377081,0.0
Zr-X/FCC,-0.038948,-0.961311,0.038918,0.052331,-0.225457,0.0


In [10]:
# df_relative_differences = relative_differences.abs().sort_values(
#     by=('sirius_cp2k', 'lda', ''), axis=0, ascending=False
#     ).drop([('mean', 'lda', ''), ('median', 'lda', '')], axis=1)

In [11]:
def highlight_cells(val):
    """
    Return a background color based on the cell value.
    """
    if -1 <= val <= 1:
        return 'background-color: lightgreen'  # Within ±1
    elif -2 <= val < -1 or 1 < val <= 2:
        return 'background-color: #FFFF33'  # Within ±2 but outside ±1
    elif pd.isna(val):
        return 'background-color: #999999'
    else:
        return 'background-color: #CC3333'  # Outside ±2
    


In [15]:
styled_df = df_code_differences.sort_index().round(3).style.map(highlight_cells)

In [16]:
styled_df.to_excel("central_volumes_differences_v2.xlsx", engine="openpyxl")