In [1]:
import sys
import re
import numpy as np
import pandas as pd

from collections import OrderedDict
from decimal import Decimal, ROUND_HALF_UP


# References
# [1] S.Guo and C.T.Liu, Progress in Natural Science: Materials International, 21 (2011) 433-446.
# [2] O.N. Senkov and D.B. Miracle, Materials Research Bulletin, 36 (12) (2001) 2183-2198.
# [3] A. Takeuchi and A. Inoue, Materials Transactions, 46 (12) (2005) 2817-2829.
# The dictionary of mixing enthalpy (heat of reaction) of binary system






def unfold_brackets(mat_formula):
    # In the case of compositions with parentheses
    if re.search(r'\(|\)', mat_formula):
        strlist = re.findall(r'[A-Z][a-z]*|[0-9.]+|\(|\)', mat_formula)

        # convert the coefficients of the composition to float type
        for i in range(len(strlist)):
            if re.search(r'[0-9.]+', strlist[i]):
                strlist[i] = float(strlist[i])

        # make the list containing the coefficients just after the parentheses
        bracketnumbers = []
        for i in range(len(strlist) - 1):
            if strlist[i] == ")" and type(strlist[i + 1]) == str:
                bracketnumbers.append(1.0)
            elif strlist[i] == ")" and type(strlist[i + 1]) == float:
                bracketnumbers.append(strlist[i + 1])

        # If the last letter is ')', we set the coefficient 1.0.
        if strlist[len(strlist) - 1] == ")":
            bracketnumbers.append(1.0)

        # make the dictionary containing the coefficients corresponding to
        # the elements in the parentheses
        inbracketcoef_dict = OrderedDict()
        count = 0
        inbracket = False
        for j in range(len(strlist)):
            if strlist[j] == "(":
                count += 1
                inbracket = True
            elif strlist[j] == ")":
                inbracket = False

            # dealing with the elements in the parentheses only
            while (inbracket):
                inbracketcoef_dict.setdefault(count, {})

                if type(strlist[j]) == str and re.search('[A-Z][a-z]*', strlist[j]):
                    if type(strlist[j + 1]) == str:
                        inbracketcoef_dict[count][strlist[j]] = 1.0
                    elif type(strlist[j + 1]) == float:
                        inbracketcoef_dict[count][strlist[j]] = strlist[j + 1]
                break

        # calculate the ratio of the coefficients in the parentheses
        for i in inbracketcoef_dict:
            temp = 0
            for j in inbracketcoef_dict[i].values():
                temp += j
            for j in inbracketcoef_dict[i]:
                inbracketcoef_dict[i][j] = inbracketcoef_dict[i][j] / temp

        # distribute the coefficients just after the parentheses to the elements in the parentheses
        for i in inbracketcoef_dict:
            for j in inbracketcoef_dict[i]:
                inbracketcoef_dict[i][j] *= bracketnumbers[i - 1]
                inbracketcoef_dict[i][j] = float(Decimal(str(inbracketcoef_dict[i][j])).quantize(Decimal('1e-4'),
                                                                                                 rounding=ROUND_HALF_UP))

        # make the list containing the coefficients out of the parentheses
        outofbracketcoef_dict = OrderedDict()
        inbracket = False
        for j in range(len(strlist) - 1):
            if strlist[j] == "(":
                inbracket = True
            elif strlist[j] == ")":
                inbracket = False

            # dealing with the elements out of the parentheses only
            while (not inbracket):
                if type(strlist[j]) == str and re.search('[A-Z][a-z]*', strlist[j]):
                    if type(strlist[j + 1]) == str:
                        outofbracketcoef_dict[strlist[j]] = 1.0
                    elif type(strlist[j + 1]) == float:
                        outofbracketcoef_dict[strlist[j]] = strlist[j + 1]
                break

        if type(strlist[len(strlist) - 1]) == str and re.search('[A-Z][a-z]*', strlist[len(strlist) - 1]):
            outofbracketcoef_dict[strlist[len(strlist) - 1]] = 1.0

        # append the elements and their coefficients in the parentheses to the new formula
        newformula = ""
        for i in inbracketcoef_dict:
            for inkey, invalue in inbracketcoef_dict[i].items():
                newformula = newformula + inkey + str(invalue)

        # append the elements and their coefficients out of the parentheses to the new formula
        for outkey, outvalue in outofbracketcoef_dict.items():
            newformula = newformula + outkey + str(outvalue)

        return newformula
    # In the case of compositions without parentheses
    else:
        return mat_formula


# Make the dictionary to collect the coefficients corresponding to the elements

# Make the dictionary to collect the coefficients corresponding to the elements
def makecoef_dict(mat_formula):
    elemlist = re.findall('[A-Z][^A-Z]*', mat_formula)

    elemcoef_dict = OrderedDict()
    for elem in elemlist:
        newlist = re.findall('[A-Z][a-z]*', elem)
        if elem == newlist[0]:
            elemcoef_dict[newlist[0]] = 1.0
        else:
            l = len(newlist[0])
            elemcoef_dict[newlist[0]] = float(elem[l:])

    return elemcoef_dict


# Calculate the sum of the coefficients and concentrations of the elements
def makeconcent_dict(elemcoef_dict):
    coefsum = 0
    for value in elemcoef_dict.values():
        coefsum += value

    elemconcent_dict = OrderedDict()
    for i in elemcoef_dict:
        elemconcent_dict[i] = float(Decimal(str(elemcoef_dict[i] / coefsum)).quantize(Decimal('1e-4'),
                                                                                      rounding=ROUND_HALF_UP))

    return elemconcent_dict

In [24]:
binary_mixH_dict = {"CoFe": -1, "CoNi": 0, "FeNi": -2, "CoSi": -38, "FeSi": -35, "NiSi": -40, "AlCo": -19,
                    "AlFe": -11, "AlNi": -22, "CoCr": -4, "CrFe": -1, "CrNi": -7, "CoMo": -5, "CrMo": 0, "FeMo": -2,
                    "MoNi": -7,
                    "CoNb": -25, "CrNb": -7, "FeNb": -16, "NbNi": -30, "CoTi": -28, "CrTi": -7, "FeTi": -17,
                    "NiTi": -35,
                    "AlCr": -10, "AlC": -36, "CCo": -42, "CCr": -61, "CFe": -50, "CNi": -39, "AlMo": -5, "AlNb": -18,
                    "AlSi": -19,
                    "CrSi": -37, "AlTi": -30, "AlV": -16, "AlZr": -44, "CoV": -14, "CoZr": -41, "CrV": -2, "CrZr": -12,
                    "FeV": -7,
                    "FeZr": -25, "NiV": -18, "NiZr": -49, "TiV": -2, "TiZr": 0, "VZr": -4, "CoMn": -5, "CrMn": 2,
                    "FeMn": 0,
                    "MnNi": -8, "MnV": -1, "AlMn": -19, "NiMo": -7, "CoCu": 6, "CrCu": 12, "CuFe": 13, "AlCu": -1,
                    "CuNi": 4,
                    "AlCa": 0,
                    "CuTi": -9, "CuMn": 4, "CuV": 5, "AlB": 0, "BCo": -24, "BCr": -31, "BCu": 0, "BFe": -26, "BNi": -24,
                    "AlY": -38,
                    "CoY": -22, "CrY": 11, "CuY": -22, "NiY": -31, "TiY": 15, "CoSn": 0, "CuSn": 7, "FeSn": 11,
                    "NiSn": -4, "CuNb": 3,
                    "CuSi": -19, "CuZr": -23, "MnSn": -7, "CuMo": 19, "AlLi": -4, "AlMg": -2, "AlSn": 4, "AlZn": 1,
                    "LiMg": 0,
                    "LiSn": -18, "LiZn": -7, "MgSn": -9, "MgZn": -4, "SnZn": 1, "AlSc": -38, "LiSc": 12, "LiTi": 34,
                    "MgSc": -3,
                    "MgTi": 16, "ScTi": 8, "CuLi": -5, "CuMg": -3, "CuZn": 1, "NbTi": 2, "NbV": -1, "AlHf": -39,
                    "AlTa": -19, "HfNb": 4,
                    "HfTa": 3, "HfTi": 0, "HfZr": 0, "NbTa": 0, "NbZr": 4, "TaTi": 1, "TaZr": 3, "MoNb": -6, "MoTa": -5,
                    "MoTi": -4,
                    "MoZr": -6, "MoV": 0, "TaV": -1, "CrHf": -9, "CrTa": -7, "HfMo": -4, "HfSi": -77, "MoSi": -35,
                    "NbSi": -56,
                    "SiTi": -66, "SiZr": -84, "HfC": -123, "MoC": -67, "NbC": -102, "TiC": -109, "ZrC": -131, "ZrTi": 0,
                    "HfV": -2,
                    "SiV": -48, "MoW": 0, "NbW": -8, "TaW": -7, "VW": -1, "TiW": -6, "NbMo": -6, "TiMo": -4, "VMo": 0,
                    "MnTi": -8, "CoAl": -19,
                    "CrAl": -10, "CuAl": -1, "FeAl": -11, "MnAl": -19, "NiAl": -22, "TiAl": -30, "VAl": -16, "MnZn": -6,
                    "NiZn": -9,
                    "CuCo": 6, "CuCr": 12, "NiCo": 0, "NiCr": -7, "NiFe": -2, "ZrTa": 3, "ZrNb": 4, "ZrSi": -84,
                    "TaNb": 0, "TaSi": -56,
                    "TiNb": 2, "TiSi": -66, "NiNb": -30, "PdPt": 2, "PdCu": -14, "PdNi": 0, "PdP": -36.5, "PtCu": -12,
                    "PtNi": -5,
                    "PtP": -34.5, "CuP": -17.5, "NiP": -34.5, "SrCa": 1, "SrYb": 1, "SrMg": -4, "SrZn": -21, "CaYb": 0,
                    "CaMg": -6,
                    "CaZn": -22, "YbMg": -6, "YbZn": -21, "SrCu": -9, "CaCu": -13, "YbCu": -12, "MgCu": -3, "ZnCu": 1,
                    "LiSr": 0,
                    "LiCa": -1, "LiYb": -1, "MgSr": -4, "MgCa": -6, "MgYb": -6, "ErTb": 0, "ErDy": 0, "ErNi": -34,
                    "ErAl": -38,
                    "TbDy": 0, "TbNi": -32, "TbAl": -39, "DyNi": -32, "DyAl": -38, "ZrNi": -49, "MgNi": -4, "ZrHf": 0,
                    "ZrCu": -23,
                    "HfCu": -17, "HfNi": -42, "TiCu": -9, "TiNi": -35, "ZrFe": -25, "HfFe": -21, "TiFe": -17,
                    "ZrCo": -41, "HfCo": -35,
                    "TiCo": -28, "SiTa": -56, "FeCo": -1, "FeCr": -1, "SiCr": -37, "SiAl": -19, "WNb": -8, "WMo": 0,
                    "WTa": -7, "WV": -1,
                    "FeCu": 13, "NiCu": 4, "NbCr": -7, "NbFe": -16, "NbMn": -4, "NbCo": -25, "CrCo": -4, "MnCo": -5,
                    "TiCr": -7,
                    "TiMn": -8, "VCr": -2, "VCu": 5, "VFe": -7, "VMn": -1, "VCo": -14, "VNi": -18, "VY": 17, "YZr": 9,
                    "ZrV": -4,
                    "ZrBe": -43, "TiBe": -30, "VBe": -16, "CuBe": 0, "NiBe": -4, "ZrAl": -44, "HfAl": -39, "FeC": -50,
                    "FeB": -26,
                    "FeY": -1, "CoC": -42, "CoB": -24, "CrC": -61, "CrB": -31, "MoB": -34, "MoY": 24, "CB": -10,
                    "CY": -117, "BY": -50,
                    "MgAg": -10, "MgGd": -6, "CuAg": 2, "CuGd": -22, "AgGd": -29, "ZrY": 9, "YSc": 1, "YAl": -38,
                    "YCo": -22, "ScAl": -38,
                    "ScCo": -30, "CoEr": -24, "CrEr": 8, "MoEr": 20, "CEr": -118, "BEr": -52,
                    # appended on September 26th, 2020
                    "AgAl": -4, "AgCo": 19, "AgCr": 27, "AgCu": 2, "AgNi": 15, "AlAu": -22, "AuCo": 7, "AuCr": 0,
                    "AuCu": -9, "AuNi": 7,
                    "NiMn": -8, "AlNd": -38, "CoNd": -20, "CuNd": -22, "FeNd": 1, "NdNi": -30, "CoZn": -5, "TiZn": -15,
                    "CoGe": -21.5,
                    "CrGe": -18.5, "FeGe": -15.5, "GeMn": -31.5, "GeNi": -23.5, "MnNb": -4, "CoPd": -1, "CrPd": -15,
                    "FePd": -4,
                    "NiPd": 0, "MnMo": 5, "MnZr": -15, "DyGd": 0, "DyLu": 0, "DyTb": 0, "DyTm": 0, "GdLu": 0, "GdTb": 0,
                    "GdTm": 0,
                    "LuTb": 0, "LuTm": 0, "TbTm": 0, "DyY": 0, "GdY": 0, "LuY": 0, "TbY": 0, "BeCu": 0, "BeNi": -4,
                    "BeTi": -30, "BeV": -16,
                    "BeZr": -43, "CoHf": -35, "CuHf": -17, "FeHf": -21, "MoPd": -15, "MoRh": -15, "MoRu": -14,
                    "PdRh": 2, "PdRu": 6,
                    "RhRu": 1, "CaSi": -51, "LiSi": -30, "MgSi": -26, "ZnSi": -18, "MnSi": -45, "YSi": -73, "AlW": -2,
                    "CrW": 1, "CuW": 22, "FeW": 0, "MnW": 6, "CoW": -1, "NiW": -3, "HfW": -6, "ZrW": -9,
                    "SiW": -31, "ReW": -4,
                    "CW": -60, "MoRe": -7, "NbRe": -26, "TiRe": -25, "ZrRe": -35, "CRe": -42,
                    "TaRe": -24, "HfY": 11, "CaY": 11, "LiY": 8, "MgY": -6, "ZrSc": 4, "HfSc": 104, "CrMg": 24,
                    "FeMg": 18,
                    "FeZn": 4, "CrZn": 5, "MgMn": 10, "MgZr": 6, "ZnZr": -29, "CMn": -66, "CTa": -101, "MnPd": 7}

properties = ['radius', 'VEC', 'allen_en', 'ion', 'c11',
              'GGA', 'melting_point']

AllDict = [{}] * len(properties)

def init_dict(filename):
    data = pd.read_csv(filename)
    symbol = data['symbol']
    for i in range(0, len(properties)):
        values = data[properties[i]]
        AllDict[i] = dict(zip(symbol, values))
init_dict("elementfull.csv")

# Calculate the mixing entropy

def calc_Smix(element_dict):
    Smix = 0
    for i in element_dict:
        Smix += element_dict[i] * np.log(element_dict[i])
    Smix = -8.314 * Smix
    Smix = float(Decimal(str(Smix)).quantize(
        Decimal('0.01'), rounding=ROUND_HALF_UP))
    return Smix

# Calculate the mixing enthalpy (in kJ/mol)
def calc_Hmix(element_dict):
    Hmix = 0
    for i in range(len(element_dict)):
        for j in range(i, len(element_dict)):
            if i != j:
                binary_name1 = list(element_dict.keys())[i] + list(element_dict.keys())[j]
                binary_name2 = list(element_dict.keys())[j] + list(element_dict.keys())[i]
                if binary_name1 in binary_mixH_dict:
                    Hmix += binary_mixH_dict[binary_name1] * element_dict[list(element_dict.keys())[i]] \
                    * element_dict[list(element_dict.keys())[j]]
                elif binary_name2 in binary_mixH_dict:
                     Hmix += binary_mixH_dict[binary_name2] * element_dict[list(element_dict.keys())[i]] \
                    * element_dict[list(element_dict.keys())[j]]
                else:
                    print(binary_name1)
                    Hmix = 0
    Hmix = Hmix * 4
    Hmix = float(Decimal(str(Hmix)).quantize(
        Decimal('0.01'), rounding=ROUND_HALF_UP))

    return Hmix

def cacl_delta(radius_diff, Smix):
    delta = Smix  * 1.0 / (radius_diff ** 2)
    delta = float(Decimal(str(delta)).quantize(
        Decimal('0.01'), rounding=ROUND_HALF_UP))
    return delta


def cacl_property(element_dict,dict_index):
    property_dict = AllDict[dict_index]
    proper = 0
    for i in element_dict:
        proper += element_dict[i] * property_dict[i]
    return proper
def cacl_property_diff(element_dict, proper,dict_index):
    property_dict = AllDict[dict_index]
    proper_diff = 0
    for i in element_dict:
        proper_diff += element_dict[i] * (1 - property_dict[i] / proper) ** 2
    proper_diff = np.sqrt(proper_diff) * 100
    proper_diff = float(Decimal(str(proper_diff)).quantize(
        Decimal('0.01'), rounding=ROUND_HALF_UP))
    return proper_diff

In [25]:
def main(inputfile, outputfile):
    try:
        with open(inputfile, "r") as inputfile:
           
            with open(outputfile, "w+", encoding="utf_8") as outputfile:
                headers = []
                for proper in properties:
                    headers.append(proper)
                    headers.append(proper + "_diff")
                temp = inputfile.readline()
                outputfile.write(temp.strip() + "," + ",".join(str(h) for h in headers)+ ",Hmix,Smix,delta\n")
                for oneline in inputfile:
                    temp_list = oneline.strip().split(',')
                    materialname = temp_list[0]
                   
                    newmatname = unfold_brackets(materialname)
                    # The dictionary containing the coefficients corresponding to the elements
                    elemcoef_dict = makecoef_dict(newmatname)
                    # The dictionary containing the concentrations corresponding to the elements
                    elemconcent_dict = makeconcent_dict(elemcoef_dict)
                    row_values = []
                    for p in properties:
                        p_value = cacl_property(elemconcent_dict,properties.index(p))
                        p_diff_value = cacl_property_diff(elemconcent_dict,p_value,properties.index(p))
                        row_values.append(p_value)
                        row_values.append(p_diff_value)
                    Hmix = calc_Hmix(elemconcent_dict)
                    Smix = calc_Smix(elemconcent_dict)     
                    delta = cacl_delta(row_values[1],Smix)
                    row_values.append(Hmix)
                    row_values.append(Smix)
                    row_values.append(delta)

                    rows_str = [str(i) for i in row_values]
                    line = ",".join(rows_str)
                    outputfile.write(oneline.strip()+"," + line + "\n")
                
                    
    except FileNotFoundError:
        print("Input file not found!\n")
        sys.exit()

In [26]:
element_properties_file = 'elementfull.csv'
init_dict(element_properties_file)
inputfile = 'Composition.csv'
outputfile = 'YieldStress_database.csv'
main(inputfile, outputfile)

In [27]:
df = pd.read_csv(inputfile)
df

Unnamed: 0,Composition,YS (MPa)
0,Al0.0154Co0.1231Cr0.1231Cu0.1231Fe0.1231Mn0.12...,1862.0
1,Al0.0174Cu0.3275Mn0.3275Ni0.3275,330.0
2,Al0.0196Co0.1961Cr0.1961Fe0.1961Mn0.1961Ni0.1961,1631.0
3,Al0.0196Cr0.1961Fe0.3922Mn0.2451Ni0.1471,354.0
4,Al0.0303Co0.1212Cr0.1212Cu0.1212Fe0.1212Mn0.12...,1465.0
...,...,...
338,Nb0.303Re0.0909Ti0.303Zr0.303,1244.0
339,Nb0.303Ta0.0909Ti0.303Zr0.303,882.0
340,Nb0.303Ti0.303V0.0909Zr0.303,866.0
341,Nb0.3333Ta0.3333Ti0.3333,573.0
