In [1]:
import numpy as np
import pandas as pd
from ast import literal_eval

In [2]:
metal = "Cr" # Fe Mn Ru Re Cr Co Ni Cu

In [3]:
# Metal Oxidation State (OS) determined by cell2mol or BVS (for only cases that cell reconstruction finished successfully)
df_bvs = pd.read_csv(f"results/BVS_result_{metal}.txt",delimiter="\t") 

# Metal OS extracted from .cif file
df_cif = pd.read_csv(f"results/cif_charge_{metal}.txt",delimiter="\t") 

In [4]:
# Check the metal element
print(len(df_bvs))
print(len(df_bvs['refcode'].unique()))
df_bvs['metal'].unique()

1311
1311


array(['Cr'], dtype=object)

In [5]:
# Check the metal element
print(len(df_cif))
print(len(df_cif['refcode'].unique()))
df_cif['metal'].unique()

1465
1465


array(['Cr'], dtype=object)

In [6]:
df = df_bvs.merge(df_cif, on='refcode', how='left')
df = df.drop(['metal_y', 'unique_indices'], axis=1)
df = df.rename(columns={'metal_x': 'metal'})
df['dictionary'] = df.dictionary.apply(lambda x: literal_eval(str(x)))
cols = ['refcode', 'metal', 'charge_cif', 'charge_cell2mol', 'charge_BVS', 'dictionary']
df = df[cols] 
df = df.rename(columns={'charge_cif': 'Cif', 'charge_cell2mol' :'cell2mol', 'charge_BVS' : 'BVS'})

In [7]:
thre_max = 0.5

df['bvs_min'] = df.dictionary.apply(lambda x: sorted(x, key=x.get)[0])
df['min'] = df.dictionary.apply(lambda x: x[sorted(x, key=x.get)[0]])
df['bvs_second_min'] = df.dictionary.apply(lambda x: sorted(x, key=x.get)[1])
df['second_min'] = df.dictionary.apply(lambda x: x[sorted(x, key=x.get)[1]])

condition = (lambda x: 
 x[sorted(x, key=x.get)[1]]- x[sorted(x, key=x.get)[0]]  # (second_min_delta - min_delta)
 if (x[sorted(x, key=x.get)[0]] <= thre_max and x[sorted(x, key=x.get)[1]] <= thre_max) 
 else None)
df['diff'] = df.dictionary.apply(condition)

In [8]:
# pd.set_option('display.max_colwidth', None)

In [9]:
df

Unnamed: 0,refcode,metal,Cif,cell2mol,BVS,dictionary,bvs_min,min,bvs_second_min,second_min,diff
0,ACAYOT,Cr,1,9999,3,"{2: 1.362228, 3: 0.185311, 4: 9999, 5: 9999, 6...",3,0.185311,2,1.362228,
1,ACEGOH,Cr,3,3,2,"{2: 0.021766, 3: 1.172873, 4: 9999, 5: 9999, 6...",2,0.021766,3,1.172873,
2,ACRTUR,Cr,3,3,7777,"{2: 1.286757, 3: 0.097025, 4: 0.001089, 5: 1.2...",4,0.001089,3,0.097025,0.095936
3,ACUDOR,Cr,0,0,9999,"{2: 9999, 3: 9999, 4: 9999, 5: 9999, 6: 9999}",2,9999.000000,3,9999.000000,
4,ACUDUX,Cr,0,0,9999,"{2: 9999, 3: 9999, 4: 9999, 5: 9999, 6: 9999}",2,9999.000000,3,9999.000000,
...,...,...,...,...,...,...,...,...,...,...,...
1306,ZUMMIG,Cr,0,0,9999,"{2: 9999, 3: 9999, 4: 9999, 5: 9999, 6: 9999}",2,9999.000000,3,9999.000000,
1307,ZURVIR,Cr,2,2,2,"{2: 0.170744, 3: 0.943479, 4: 9999, 5: 9999, 6...",2,0.170744,3,0.943479,
1308,ZURVUD,Cr,2,3,3,"{2: 0.920124, 3: 0.201946, 4: 9999, 5: 9999, 6...",3,0.201946,2,0.920124,
1309,ZUWJAE,Cr,3,3,8888,"{2: 0.529229, 3: 0.603857, 4: 9999, 5: 9999, 6...",2,0.529229,3,0.603857,


In [10]:
print(len(df['refcode'].unique()))
df['metal'].unique()

1311


array(['Cr'], dtype=object)

# cell2mol 

In [11]:
print(f"Total cases : {len(df)}")

# cell2mol works
print(f"\ncell2mol works : {len(df[df['cell2mol'] != 9999])}")
print(f"cell2mol works, Same metal charge by cell2mol and cif : {len( df[(df['cell2mol'] == df['Cif']) & (df['cell2mol'] != 9999)] )}")
print(f"cell2mol works, Different metal charge by cell2mol and cif : : {len( df[(df['cell2mol'] != df['Cif']) & (df['cell2mol'] != 9999) ] )}")

# cell2mol doesn't work ==> cell2mol code : 9999
print(f"\ncell2mol doesn't work : {len(df[df['cell2mol'] == 9999])}") 

Total cases : 1311

cell2mol works : 1063
cell2mol works, Same metal charge by cell2mol and cif : 994
cell2mol works, Different metal charge by cell2mol and cif : : 69

cell2mol doesn't work : 248


# BVS

## Missing bond valence paramenters for all oxidation states

In [12]:
print(f"Total cases : {len(df)}")
print(f"Missing bond valence paramenters for all oxidation states : {len(df[(df['min'] == 9999.0)])}\n") # BVS code : 9999

Total cases : 1311
Missing bond valence paramenters for all oxidation states : 705



## Only one OS available by BVS method

In [13]:
print(f"Only one OS available by BVS : {len( df[(df['min'] !=9999.0) & (df['second_min'] == 9999.0)] )}")
print(f"Only one OS available by BVS, delta >  0.5 : {len( df[(df['min'] > 0.5) & (df['min'] != 9999.0) & (df['second_min'] == 9999.0)])}") # BVS code : 8888
print(f"Only one OS available by BVS, delta <=  0.5 : {len( df[(df['min'] <= 0.5) & (df['min'] != 9999.0) & (df['second_min'] == 9999.0)])}")

Only one OS available by BVS : 73
Only one OS available by BVS, delta >  0.5 : 21
Only one OS available by BVS, delta <=  0.5 : 52


## More than one OS available by BVS method

In [14]:
print(f"More than one OS available by BVS : {len(df[(df['min'] !=9999.0) & (df['second_min'] != 9999.0)])}")
print(f"OS with smallest Δ > 0.5, Another OS with 2nd smallest Δ > 0.5 : {len( df[(df['min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0)] )}") # BVS code : 8888
print(f"OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ > 0.5 : {len( df[(df['min'] <= 0.5) & (df['second_min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0)] )}")
print(f"OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ ≤ 0.5 : {len( df[(df['min'] <= 0.5) & (df['second_min'] <= 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) ] )}") # BVS code : 7777

More than one OS available by BVS : 533
OS with smallest Δ > 0.5, Another OS with 2nd smallest Δ > 0.5 : 67
OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ > 0.5 : 427
OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ ≤ 0.5 : 39


## BVS vs Cif

In [15]:
print(f"Same metal charge by BVS and cif : { len ( df[(df['Cif'] == df['bvs_min']) & (df['min'] != 9999.0)] ) }")
print(f"Different metal charge by BVS and cif : { len ( df[(df['Cif'] != df['bvs_min']) & (df['min'] != 9999.0)] ) }")

Same metal charge by BVS and cif : 492
Different metal charge by BVS and cif : 114


In [16]:
print(f"Only one OS, BVS = cif : { len ( df[(df['min'] !=9999.0) & (df['second_min'] == 9999.0)& (df['Cif'] == df['bvs_min'])] ) }")
print(f"Only one OS, BVS != cif : { len ( df[(df['min'] !=9999.0) & (df['second_min'] == 9999.0)& (df['Cif'] != df['bvs_min'])] ) }")

print(f"Only one OS, delta >  0.5, BVS = cif : { len ( df[(df['min'] > 0.5) & (df['min'] != 9999.0) & (df['second_min'] == 9999.0) & (df['Cif'] == df['bvs_min'])] ) }")
print(f"Only one OS, delta >  0.5, BVS != cif : { len ( df[(df['min'] > 0.5) & (df['min'] != 9999.0) & (df['second_min'] == 9999.0) & (df['Cif'] != df['bvs_min'])] ) }")

print(f"Only one OS, delta <= 0.5, BVS = cif : { len ( df[(df['min'] <= 0.5) & (df['min'] != 9999.0) & (df['second_min'] == 9999.0) & (df['Cif'] == df['bvs_min'])] ) }")
print(f"Only one OS, delta <= 0.5, BVS != cif : { len ( df[(df['min'] <= 0.5) & (df['min'] != 9999.0) & (df['second_min'] == 9999.0) & (df['Cif'] != df['bvs_min'])] ) }")

Only one OS, BVS = cif : 53
Only one OS, BVS != cif : 20
Only one OS, delta >  0.5, BVS = cif : 1
Only one OS, delta >  0.5, BVS != cif : 20
Only one OS, delta <= 0.5, BVS = cif : 52
Only one OS, delta <= 0.5, BVS != cif : 0


In [17]:
print(f"More than one OS, BVS = cif : { len( df[(df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] == df['bvs_min']) ] ) }")
print(f"More than one OS, BVS != cif : { len( df[(df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) ] ) }")
print("")
print(f"OS with smallest Δ > 0.5, Another OS with 2nd smallest Δ > 0.5, BVS = cif : { len( df[(df['min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] == df['bvs_min'])]) }")
print(f"OS with smallest Δ > 0.5, Another OS with 2nd smallest Δ > 0.5, BVS != cif : { len( df[(df['min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min'])]) }")
print(f"Another OS by BVS = cif : { len( df[(df['min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) & (df['Cif'] == df['bvs_second_min']) ]) }")
print(f"Another OS by BVS != cif : { len( df[(df['min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) & (df['Cif'] != df['bvs_second_min']) ]) }")

print("")
print(f"OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ > 0.5 , BVS = cif : { len(df[(df['min'] <= 0.5) & (df['second_min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] == df['bvs_min'])]) }")
print(f"OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ > 0.5 , BVS != cif : { len(df[(df['min'] <= 0.5) & (df['second_min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min'])]) }")
print(f"Another OS by BVS = cif : { len( df[(df['min'] <= 0.5) & (df['second_min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) & (df['Cif'] == df['bvs_second_min']) ]) }")
print(f"Another OS by BVS != cif : { len( df[(df['min'] <= 0.5) & (df['second_min'] > 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) & (df['Cif'] != df['bvs_second_min']) ]) }")


print("")
print(f"OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ ≤ 0.5, BVS = cif : { len(df[(df['min'] <= 0.5) & (df['second_min'] <= 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] == df['bvs_min'])]) }")
print(f"OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ ≤ 0.5, BVS != cif : { len(df[(df['min'] <= 0.5) & (df['second_min'] <= 0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min'])]) }")
print(f"Another OS by BVS = cif : { len( df[(df['min'] <= 0.5) & (df['second_min'] <=  0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) & (df['Cif'] == df['bvs_second_min']) ]) }")
print(f"Another OS by BVS != cif : { len( df[(df['min'] <= 0.5) & (df['second_min'] <=  0.5) & (df['min'] !=9999.0) & (df['second_min'] != 9999.0) & (df['Cif'] != df['bvs_min']) & (df['Cif'] != df['bvs_second_min']) ]) }")

More than one OS, BVS = cif : 439
More than one OS, BVS != cif : 94

OS with smallest Δ > 0.5, Another OS with 2nd smallest Δ > 0.5, BVS = cif : 5
OS with smallest Δ > 0.5, Another OS with 2nd smallest Δ > 0.5, BVS != cif : 62
Another OS by BVS = cif : 3
Another OS by BVS != cif : 59

OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ > 0.5 , BVS = cif : 408
OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ > 0.5 , BVS != cif : 19
Another OS by BVS = cif : 13
Another OS by BVS != cif : 6

OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ ≤ 0.5, BVS = cif : 26
OS with smallest Δ ≤ 0.5, Another OS with 2nd smallest Δ ≤ 0.5, BVS != cif : 13
Another OS by BVS = cif : 12
Another OS by BVS != cif : 1
