In [10]:
! pip install seaborn

In [None]:
import glob
import os
import subprocess
import numpy as np

# Get the list of .txt files starting with 'eos-'
txt_files = [file for file in os.listdir() if file.startswith('eos-') and file.endswith('.txt')]

# Create the new folder if it doesn't exist
output_folder = 'pairs-Delta-out'
os.makedirs(output_folder, exist_ok=True)

# Iterate over the pairs of files
for i in range(len(txt_files)):
    for j in range(i+1, len(txt_files)):
        file1 = txt_files[i]
        file2 = txt_files[j]
        
        # Run the command 'python calcDelta file1 file2'
        command = f'python calcDelta.py {file1} {file2}'
        subprocess.run(command, shell=True)
        
        # Extract the data into a new file
        output_file = f'{output_folder}/Delta-out-{file1}-{file2}'
        subprocess.run(f'mv Delta-out.txt {output_file}', shell=True)
        

# Go through all files, and extract the mean delta value into a matrix representing the delta values between all pairs of eos files
delta_matrix = []
for i in range(len(txt_files)):
    row = []
    for j in range(len(txt_files)):
        
        file1 = txt_files[i]
        file2 = txt_files[j]
        if i == j:
            row.append(0)
            continue
        
        output_file = f'{output_folder}/Delta-out-{file1}-{file2}'
        # Read the file and extract the mean value
        try:
            with open(output_file, 'r') as f:
                data = f.readlines()
                mean_line = [line for line in data if "np.mean" in line][0]
                mean_values = mean_line.split()
                value = float(mean_values[1])
        except FileNotFoundError:
            row.append(0)
            continue
            # pass

        row.append(value)
    delta_matrix.append(row)
    
delta_matrix = np.array(delta_matrix)
print(delta_matrix)
# lower_triangle = np.tril(delta_matrix)
delta_matrix += delta_matrix.T

    

In [None]:
import pandas as pd
import glob

# Get a list of all files starting with 'Delta-out-' and ending with '.txt'
file_list = glob.glob('./pairs-Delta-out/Delta-out-*.txt')

print(file_list)
# Create an empty dataframe to store the extracted data
df = pd.DataFrame()

# Iterate over each file
for file in file_list:
    if not "QTM" in file:
        continue
    # Extract the string from the filename
    code_name = ".".join(file.split('/')[-1].split('.')[:-1]).split('-')[3]
    if code_name.endswith('.txt'):
        code_name = code_name[:-4]
    # print(code_name)
    if code_name == "QTM":
        code_name = ".".join(file.split('/')[-1].split('.')[-1]).split('-')[3]
    
    # Read the file and extract the data
    data = pd.read_csv(file, delimiter='\t', skiprows=6, header=None, names=['Element', code_name], usecols=[0,1]) 
    data = data.iloc[:-6]
    display(data)
    
    # Add the data to the dataframe. If it's the first file, create the dataframe
    if df.empty:
        df = data.copy()
    else:
        df = df.merge(data, on='Element', how='outer')
    
print(df)
# Remove rows with null values
df = df.dropna()
df = df.set_index('Element')
        


# Print the resulting dataframe
display(df)


In [None]:
# Plot the violin plot
# make the resolution higher
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 600
# set font size to 12
plt.rcParams.update({'font.size': 13})


plt.figure(figsize=(7, 4))

# Set the title and labels
# plt.title('Distribution of elementwise $\Delta$ values')
# plt.xlabel('Code')
plt.ylabel('$\Delta$ Value (meV/atom)')

# Convert all data to float, except the first column
for column in df.columns:
    if column != 'Element':
        df[column] = df[column].replace('N/A', float('nan')).replace(' N/A ', float('nan')).astype(float)
        
# Remove rows with null values
df = df.dropna()

# Calculate the mean delta value
mean_delta = df.mean()
# sort the mean_delta
mean_delta = mean_delta.sort_values()
display(mean_delta)
df = df[mean_delta.index]

print(df.columns)

# Rename QTM+SG15v1.2 to QE+SG15v1.2
df = df.rename(columns={'QTM+SG15v1.2': 'QE+SG15v1.2'})
print(df.columns)

# Plot the violin plot only for QE+SG15v1.2, WIEN2k, ABINIT, and VASP
df = df[['QE+SG15v1.2', 'WIEN2k', 'ABINIT+ONCV', 'VASPv5.4']]
# print(df.columns)

# Select only non-element columns for the violin plot
non_element_columns = df.columns[:]


# Plot the violin plot
sns.violinplot(data=df[non_element_columns], cut=0) # cut=0 removes the tails of the violin plot below the x-axis (optional)

# Add the mean delta value as text below the violins
for i, column in enumerate(non_element_columns):
    plt.text(i, -0.035 * (df.max().max() - df.min().min()), f"{mean_delta[column]:.3f} meV", ha='center', va='top')

# Extend the y-axis by 10% to create space below the violins
plt.ylim(bottom=df.min().min() - 0.1 * (df.max().max() - df.min().min()))
plt.tight_layout()
plt.savefig('delta_violin.png')
plt.savefig('delta_violin.pdf')

