In [1]:
import pandas as pd
import os
import re

In [2]:
pwd = os.getcwd()
print(pwd)

e:\research\HEA_phase_classification\weighted_features\classification\new_alloy_modified_ranges


In [3]:
# Load the data from the Excel file
df = pd.read_excel(pwd + "/data/weighted_features_new_alloys.xlsx")
df.head()

Unnamed: 0,Composition,EquiTibrium conditions,phase_count,PhaseType
0,Al0.25MoNbTiV,AC,s,BCC
1,Al0.5MoNbTiV,AC,S,BCC
2,Al0.75MoNbTiV,AC,S,BCC
3,Al1.5MoNbTiV,AC,S,BCC
4,AlMoNbTiV,AC,s,BCC


This script will output the unique elements in all the alloys and their maximum atomic fractions.

In [4]:
# Regular expression to capture element and atomic fraction
pattern = re.compile(r'([A-Z][a-z]*)(\d*\.?\d*)')

# Dictionary to hold max atomic fractions
max_atomic_fractions = {}

# Loop through the dataframe
for composition in df["Composition"]:
    matches = pattern.findall(composition)
    for match in matches:
        element = match[0]
        fraction = float(match[1]) if match[1] else 1  # if no number is found, assume 1
        if element not in max_atomic_fractions:
            max_atomic_fractions[element] = fraction
        else:
            max_atomic_fractions[element] = max(max_atomic_fractions[element], fraction)

print(max_atomic_fractions)

{'Al': 1.5, 'Mo': 2.0, 'Nb': 1.54, 'Ti': 2.0, 'V': 3.0, 'Ta': 1.54, 'Zr': 1.5, 'Hf': 1, 'Cr': 1.5, 'Co': 1, 'W': 1, 'Si': 0.5}


In [5]:
# Convert the max_atomic_fractions to DataFrame
df_max_atomic_fractions = pd.DataFrame.from_dict(max_atomic_fractions, orient='index', columns=['Max Atomic Fraction'])
df_max_atomic_fractions = df_max_atomic_fractions.sort_values(by="Max Atomic Fraction", ascending=False)
df_max_atomic_fractions

Unnamed: 0,Max Atomic Fraction
V,3.0
Mo,2.0
Ti,2.0
Nb,1.54
Ta,1.54
Al,1.5
Zr,1.5
Cr,1.5
Hf,1.0
Co,1.0


In [6]:
import itertools
import pandas as pd

# Define the base elements and other elements
base_elements = ['V', 'Mo', 'Ti', 'Nb', 'Ta', 'Al']
other_elements = ['Zr', 'Cr', 'W', 'Hf', 'Co']

def generate_distributions(remaining, num_elements):
    if num_elements == 1:
        return [[remaining]]
    distributions = []
    for i in range(10, remaining - 10*(num_elements-1) + 1, 5):  # Adjusted min to 10% and increment to 5
        for rest in generate_distributions(remaining - i, num_elements - 1):
            if len(rest) == 1 and rest[0] < 10:  # Ensure no value is less than 10%
                continue
            distributions.append([i] + rest)
    return distributions

data = []

# Loop through the base elements
for base in base_elements:
    # Vary the base element's fraction
    for base_fraction in range(30, 46, 5):  # Adjusted range to 30-45% with increment of 5
        remaining_fraction = 100 - base_fraction
        # Choose four elements from the 'other_elements' list
        for comb in itertools.combinations(other_elements, 4):
            # Generate the possible distributions of the remaining_fraction among the 4 elements
            for distribution in generate_distributions(remaining_fraction, 4):
                HEA = {base: base_fraction}
                for el, frac in zip(comb, distribution):
                    HEA[el] = frac
                data.append(HEA)

# Create a dataframe from the data
df_HEA = pd.DataFrame(data).fillna(0)

# Convert the dataframe rows to HEA formulas
def row_to_formula(row):
    max_val = row.max()
    formula = ""
    for idx, val in row.iteritems():
        if val != 0:
            norm_val = round(val / max_val, 2)
            formula += f"{idx}{norm_val if norm_val != 1.0 else ''}"
    return formula

df_HEA['HEA_formula'] = df_HEA.apply(row_to_formula, axis=1)

# Select only the HEA_formula column
df_HEA = df_HEA[['HEA_formula']]

df_HEA.head()

  for idx, val in row.iteritems():


Unnamed: 0,HEA_formula
0,V0.75Zr0.25Cr0.25W0.25Hf
1,V0.86Zr0.29Cr0.29W0.43Hf
2,VZr0.33Cr0.33W0.67Hf
3,VZr0.33Cr0.33W0.83Hf0.83
4,VZr0.33Cr0.33WHf0.67


In [7]:
print(df_HEA.shape)

(5850, 1)


In [8]:
from pymatgen.core import Composition

In [9]:
# Define an empty list to store the elements of each composition
elements_list = []

# Loop over the compositions in the 'Composition' column
for composition in df_HEA['HEA_formula']:
    # Create a Pymatgen Composition object from the composition string
    pymatgen_comp = Composition(composition)
    # Extract the elements from the Composition object
    elements = [element.symbol for element in pymatgen_comp.elements]
    # Append the list of elements to the elements_list
    elements_list.append(elements)

# Add a new column to the DataFrame containing the elements without mole fractions
df_HEA['Elements'] = elements_list

# Print the updated DataFrame
df_HEA.head()

Unnamed: 0,HEA_formula,Elements
0,V0.75Zr0.25Cr0.25W0.25Hf,"[V, Zr, Cr, W, Hf]"
1,V0.86Zr0.29Cr0.29W0.43Hf,"[V, Zr, Cr, W, Hf]"
2,VZr0.33Cr0.33W0.67Hf,"[V, Zr, Cr, W, Hf]"
3,VZr0.33Cr0.33W0.83Hf0.83,"[V, Zr, Cr, W, Hf]"
4,VZr0.33Cr0.33WHf0.67,"[V, Zr, Cr, W, Hf]"


In [10]:
df_HEA.to_excel(pwd + "/data/df_HEA.xlsx", index=False)