In [1]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import codecs, json
from ase.visualize import view
from ase.io.trajectory import Trajectory
from ase.formula import Formula

def collate_csv_files(directory_path):
    # Find all CSV files in the given directory
    csv_files = glob.glob(os.path.join(directory_path, '*hbond.csv'))

    # Initialize an empty list to hold individual DataFrames
    dataframes = []

    # Iterate over each CSV file and read them into DataFrames
    for file in csv_files:
        try:
            # Read CSV file into DataFrame
            df = pd.read_csv(file)
            dataframes.append(df)
        except Exception as e:
            print(f"Error reading {file}: {e}")

    # Concatenate all DataFrames into a single DataFrame
    if dataframes:
        combined_df = pd.concat(dataframes, ignore_index=True)
    else:
        combined_df = pd.DataFrame()

    return combined_df

In [2]:
# csv_files = glob.glob(os.path.join("./csv_files/", '*hbond.csv'))

parent_dir = "./alpha-PbO2-4comp/"  # your parent directory
csv_files = glob.glob(os.path.join(parent_dir, "csv_files", "*hbond.csv"))

# Initialize an empty list to hold individual DataFrames
dataframes = []

for i in range (len(csv_files)):
    csv_file = glob.glob(os.path.join(parent_dir,"csv_files/", str(i).zfill(3)+'*hbond.csv'))
    Ei_file = glob.glob(os.path.join(parent_dir,"Ei_dicts/", str(i).zfill(3)+'*.json'))
    traj_file = glob.glob(os.path.join(parent_dir,"trajectories/", str(i).zfill(3)+'*.traj'))
    if len(Ei_file) == 0:
        continue
    obj_text = codecs.open(Ei_file[0], 'r', encoding='utf-8').read()
    b_new = json.loads(obj_text)
    Ei_list = np.array(b_new)
    traj = Trajectory(traj_file[0])

    df_Eis = pd.DataFrame(list(zip(traj[-1].get_chemical_symbols(), Ei_list)), columns=['Element', 'Ei'])
    
    #Turn dictionary keys into list
    symbols = list(Formula(traj[-1].get_chemical_formula()).count().keys())
    
    colors = ['blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan']
    
    # Define conditions and colors
    
    conditions = {
        group: (df_Eis['Element'] == group, color) for group, color in zip(symbols, colors)
    }

    cation_sum = 0.0
    ncat = 0
    
    for label, (condition, color) in conditions.items():
        filtered_data = df_Eis.loc[condition, 'Ei']
        if label == "O":
            O_sigma = np.std(filtered_data)
        else:
            cation_sum = cation_sum + np.std(filtered_data)
            ncat = ncat+1
#        print(label, np.std(filtered_data))
    cation_sigma = cation_sum/ncat
    
    # Iterate over each CSV file and read them into DataFrames

    try:
        # Read CSV file into DataFrame
        df = pd.read_csv(csv_file[0])
        df["O sigma"] = O_sigma
        df["Cation sigma"] = cation_sigma
        dataframes.append(df)
    except Exception as e:
        print(f"Error reading {csv_file[0]}: {e}")


    # Concatenate all DataFrames into a single DataFrame
    if dataframes:
        combined_df = pd.concat(dataframes, ignore_index=True)
    else:
        combined_df = pd.DataFrame()
    
combined_df

Unnamed: 0,Formula,Energy,sigma bond length,Bond length,O sigma,Cation sigma
0,"['Ge', 'Ti', 'Sn', 'Hf']",-6082.692117,0.096572,2.030407,0.357529,0.038340
1,"['Ge', 'Ti', 'Sn', 'Zr']",-5945.960922,0.102665,2.036595,0.350118,0.047105
2,"['Ge', 'Ti', 'Sn', 'Pb']",-5141.669870,0.121046,2.059649,0.451350,0.042278
3,"['Ge', 'Ti', 'Sn', 'Ce']",-5751.738381,0.147834,2.068038,0.307319,0.067593
4,"['Ge', 'Ti', 'Sn', 'Rh']",-5348.842398,0.069796,2.002193,0.364189,0.036194
...,...,...,...,...,...,...
813,"['Pb', 'Rh', 'Ru', 'Pt']",-4766.473377,0.099219,2.068647,0.183460,0.056858
814,"['Pb', 'Rh', 'Ru', 'Nb']",-5470.787199,0.117130,2.079392,0.478405,0.072446
815,"['Pb', 'Rh', 'Ru', 'V']",-5153.737493,0.123975,2.048221,0.237143,0.062711
816,"['Pb', 'Rh', 'Ir', 'Pt']",-4705.781039,0.084402,2.065083,0.162554,0.065970


In [3]:
#summary_df = collate_csv_files("./csv_files/")
summary_df = combined_df

summary_df["DeltaH"] = np.nan
# summary_df

In [4]:
# Binary oxide file
file_path = './min_enth_AO2_binary_oxide_enthalpies.csv'

# Load the binary oxides file into a Pandas DataFrame
binary_df = pd.read_csv(file_path, index_col=0)
# binary_df.columns

In [5]:
binary_df = binary_df.loc[binary_df.groupby("Cation")["EperFormula"].idxmin()]
binary_df

Unnamed: 0,EperFormula,Cation,File
18,-26.191137,Ce,./Binary_oxides/CeO2-fluorite-ICSD_CollCode721...
13,-19.352317,Ge,./Binary_oxides/GeO2-rutile-ICSD_CollCode92551...
5,-30.516988,Hf,./Binary_oxides/HfO2-baddeleyite-ICSD_CollCode...
19,-21.096925,Ir,./Binary_oxides/IrO2-rutile-ICSD_CollCode84577...
113,-21.345044,Mn,./Binary_oxides/mp_Mn-O_cif_files/mp-1182678_M...
62,-28.884972,Nb,./Binary_oxides/mp_Nb-O_cif_files/mp-1595_Nb2O...
164,-16.038234,Pb,./Binary_oxides/mp_Pb-O_cif_files/mp-22633_Pb3...
1,-17.297321,Pt,./Binary_oxides/PtO2-Pnnm-ICSD_CollCode4415.cif
25,-19.392168,Rh,./Binary_oxides/RhO2-rutile-ICSD_CollCode28498...
21,-22.177257,Ru,./Binary_oxides/RuO2-rutile-ICSD_CollCode11765...


In [6]:
from ase.visualize import view
from ase.io.trajectory import Trajectory
traj = Trajectory("./alpha-PbO2-4comp/trajectories/403TiZrRhPtalphaPbO24comp.traj")
print(len(traj))
view(traj[-1])

1


<Popen: returncode: None args: ['/home/ubc_oadicks/anaconda3/envs/2024-06-24...>

In [None]:
from ase.formula import Formula
Formula(traj[0].get_chemical_formula()).count()

In [None]:
import ast
for index, row in summary_df.iterrows():
    comp = ast.literal_eval(row['Formula'])
    energy_mix = float(row['Energy'])
    deltaH = energy_mix
    numcat = 64.0
    numformula = 512.0/2.0
    for el in comp:
        enbin = binary_df[binary_df['Cation'] == el]['EperFormula'].values[0]
        deltaH = deltaH - (numcat * enbin)
    summary_df.loc[index, 'DeltaH'] = (deltaH/numformula)
    

In [None]:
kB = 8.6173303E-5 #eV/K
ncat = 4
summary_df['T/K'] = summary_df['DeltaH'].apply(lambda x: x/(kB * np.log(ncat)))
summary_df

In [None]:
csvfilestr = "summary_alpha-PbO2.csv"
summary_df.to_csv(csvfilestr, index=False)

In [None]:
import matplotlib.pyplot as plt

# Plotting
plt.figure(figsize=(8, 8))
plt.scatter(summary_df['DeltaH'], summary_df['Cation sigma'])

# Customizing the plot
plt.xlabel('$\Delta$H (eV/formula unit)')
plt.ylabel('E$_{i}$ $\sigma$ (eV)')

plt.title("4-component alpha-PbO$_{2}$")
plt.legend()
plt.grid(True)

# plt.xlim(-1.0, 1.0)  # Set the x-axis boundary from 0 to 10
# plt.ylim(0, 0.12)  # Set the y-axis boundary from 0 to 10

plt.fill_betweenx([0.0, 0.1], 0.0, 0.4, color='green', alpha=0.3, label="Filled square")

# Show plot
plt.savefig('deltaH_v_Eisigma_alphaPbO2_4comp.png')
plt.show()

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(summary_df.sort_values('Cation sigma'))
csvfilestr = "summary_alphaPbO2_deltaHvsigma_ordered_Ei_minbin.csv"
summary_df.sort_values('Cation sigma').to_csv(csvfilestr, index=False)