# Material to class

How to infer a superconducting class from the material formula?



## 1st level classification

This code classify each materials in the 1st level of classification

In [None]:
import pymatgen as pg
import re

## This map define the rules for selecting the classes. 
    # and_compunds is satisfied if ALL of the contained compounds are present
    # or_compounds is satisfied if ANY of the contained compound is present

composition_map = [
    {"and_compounds": ["O", "Cu"], "name": "Cuprate"},
    {"and_compounds": ["Fe", "P"], "name": "Iron-pnictide"},
    {"and_compounds": ["Fe", "As"], "name": "Iron-pnictide"},
    {"and_compounds": ["Fe", 'S'], "name": "Iron-chalcogenides"},
    {"and_compounds": ["Fe", 'Se'], "name": "Iron-chalcogenides"},
    {"and_compounds": ["Fe", 'Te'], "name": "Iron-chalcogenides"},
    {"and_compounds": ["H"], "name": "Hydrides"},
    {"and_compounds": ["C"], "name": "Carbides"},
    {"and_compounds": ["N"], "name": "Nitrides"},
    {"and_compounds": ["F"], "name": "Fluorides"},
    # comment here   Ch is not an atomic symbol but "Ch" and others like "copper" would be usuful later for sub-classes
    {"or_compounds": ["S", "Se", "Te"], "name": "Chalcogenides"},
    {"or_compounds": ["P", "As"], "name": "Pnictides"},
    {"and_compounds": ["B"], "name": "Borides"},
    {"and_compounds": ["O"], "name": "Other oxides"},
    #alloys---> that does not satisfy none of above
]

def get_class(c): 
    output = ''
    
    try: 
        dc = pg.Composition(c, strict=False).as_dict().keys()
    except Exception as ce:
        print ("Exception when parsing "+str(c)+". Error: " + str(ce))
        # Trying with some tricks
        c_with_replacements = re.sub(r'[+-][ZXYzxy]', '', c)
        try: 
            print("Trying to parse " + str(c_with_replacements))
            dc = pg.Composition(c_with_replacements, strict=False).as_dict().keys()
        except Exception as ce:
            print ("Exception when parsing "+str(c_with_replacements)+". Error: " + str(ce))
            # We give up... skipping this record
            return output
        
    input_formula = list(dc)
    
    # print(" Input Formula: " + str(input_formula))
    
    for composition in composition_map:
        and_compounds = []
        if 'and_compounds' in composition: 
            and_compounds = composition['and_compounds']

        or_compounds = []
        if 'or_compounds' in composition: 
            or_compounds = composition['or_compounds']
            
        output_class = composition['name']
        
        if len(and_compounds) > 0: 
            if all(elem in input_formula for elem in and_compounds):
                output = output_class
                break
        elif len(or_compounds) > 0: 
            if any(elem in input_formula for elem in or_compounds): 
                output = output_class
                break
               
    if output == '':
        output = "Alloy"
        
    return output

## Tests

### Processing list of elements

In [None]:
df = pd.read_csv('testinput.csv', usecols={'name'})
# print(df)


with open('output.csv', 'w') as f:
    writer=csv.writer(f)
    for c in df['name']:
        output_class = get_class(c)
        # print(str(c) + ", " + str(output_class))
        writer.writerow([c, output_class])


### Test for invalid cases


In [None]:
df = pd.read_csv('test-cases.csv', usecols={'name'} , sep=",")
# print(df)


for c in df['name']:
    output_class = get_class(c)
    print(str(c) + ", " + str(output_class))
        

## Tagging approach

We try to assign multiple tags to each material based on their composition

In [None]:
## This map define the rules for selecting the classes. 
    # and_compunds is satisfied if ALL of the contained compounds are present
    # or_compounds is satisfied if ANY of the contained compound is present

composition_map_1st_level = [
    {"and_compounds": ["O", "Cu"], "name": "Cuprate"},
    {"and_compounds": ["Fe", "P"], "name": "Iron-pnictide"},
    {"and_compounds": ["Fe", "As"], "name": "Iron-pnictide"},
    {"and_compounds": ["Fe", 'S'], "name": "Iron-chalcogenides"},
    {"and_compounds": ["Fe", 'Se'], "name": "Iron-chalcogenides"},
    {"and_compounds": ["Fe", 'Te'], "name": "Iron-chalcogenides"},
    {"and_compounds": ["H"], "name": "Hydrides"},
    {"and_compounds": ["C"], "name": "Carbides"},
    {"and_compounds": ["N"], "name": "Nitrides"},
    {"and_compounds": ["F"], "name": "Fluorides"},
    # comment here   Ch is not an atomic symbol but "Ch" and others like "copper" would be usuful later for sub-classes
    {"or_compounds": ["S", "Se", "Te"], "name": "Chalcogenides"},
    {"or_compounds": ["P", "As"], "name": "Pnictides"},
    {"and_compounds": ["B"], "name": "Borides"},
    {"and_compounds": ["O"], "name": "Oxides"},
        #alloys---> that does not satisfy none of above
    {"not_compounds": ["O"," B", "C", "N", "F", "P", "S", "As", "Se", "Te"], "name": "Alloys"}
]

Tag assigmnent

In [None]:
import pymatgen as pg
import re

def assign_tags(material_formula, composition_map): 
    output_tags = []
    
    try: 
        dc = pg.Composition(material_formula, strict=False).as_dict().keys()
    except Exception as ce:
        print ("Exception when parsing "+str(material_formula)+". Error: " + str(ce))
        # Trying with some tricks
        material_formula_with_replacements = re.sub(r'[+-][ZXYzxy]', '', material_formula)
        try: 
            print("Trying to parse " + str(material_formula_with_replacements))
            dc = pg.Composition(material_formula_with_replacements, strict=False).as_dict().keys()
        except Exception as ce:
            print ("Exception when parsing "+str(material_formula_with_replacements)+". Error: " + str(ce))
            # We give up... skipping this record
            return output_tags
        
    input_formula = list(dc)
    # print(" Input Formula: " + str(input_formula))
    
    for composition in composition_map:
        and_compounds = []
        if 'and_compounds' in composition: 
            and_compounds = composition['and_compounds']

        or_compounds = []
        if 'or_compounds' in composition: 
            or_compounds = composition['or_compounds']

            
        not_compounds = []
        if 'not_compounds' in composition: 
            not_compounds = composition['not_compounds']

        output_class = composition['name']
        
        if len(and_compounds) > 0: 
            if all(elem in input_formula for elem in and_compounds if type(elem) == str):
                output_tags.append(output_class)
                continue
        elif len(or_compounds) > 0: 
            if any(elem in input_formula for elem in or_compounds if type(elem) == str): 
                output_tags.append(output_class)
                continue
        elif len(not_compounds) > 0: 
            if not any(elem in input_formula for elem in not_compounds if type(elem) == str): 
                output_tags.append(output_class)
                continue
                
    return set(output_tags)

### Testing



### Test cases

In [None]:
df = pd.read_csv('test-cases.csv', usecols={'name'} , sep=",")
# print(df)


for c in df['name']:
    output_class = assign_tags(c)
    print(str(c) + ", " + str(output_class))
        

### Supecon small

In [None]:
df = pd.read_csv('test-cases-from-supercon-big.csv', usecols={'name'} , sep=",")
# print(df)

with open('output-supercon-big.csv', 'w') as f:
    writer=csv.writer(f)
    for c in df['name']:
        output_class = assign_tags(c)
        writer.writerow([c, output_class])
        

In [None]:
#"Fe 1 Te 0.4 Se 0.6"

material_name_sample = "Bi2Sr2CaCu2O8+x"

tags = assign_tags(material_name_sample, composition_map_1st_level)
print("First level tags for " + material_name_sample + ": " + str(tags))

#There are differen type of definitions
# - based on ingredient atom 
# - structure based (R-123)
#     For atom round number is 1, and for second atom rounded number is 2, nd for Cu having rounded number 3, Y: 0.8   Ba: 1.8  Cu: 3
# - T', Tprime is only for (Nd,Ce)CuO and (Pr,La,Ce)CuO to my knowledge

#C60 ---> C: 60  for example Cs3C60
#MCh2 ---> Ch: 2  for exmple VSe2
# transition metal:   Sc, Ti, V, Cr, Mn, Fe, Co, Ni, Cu ,Zn, Y, Zr, Nb, Mo, Tc, Ru ,Rh, Pd, Ag, Cd, La, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg
# pyrochlore oxides:  A2 B2 O7
# spinel oxides: A1 B2 O4
# Heavy fermions: "Ce", "Pr", "Nd", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "U"

second_level_composition = {
    'Cuprate': [
        {"and_compounds": ["Bi"], "name": "Bi-based"},
        {"and_compounds": ["Hg"], "name": "Hg-based"},
        {"and_compounds": ["Tl"], "name": "Tl-based"},
        {"and_compounds": ["La"], "name": "La-based"}, 
        {"and_compounds": ["Nd", "Cu", "O"], "name": "T'"}, 
        {"and_compounds": ["Nd,","Ce", "Cu", "O"], "name": "T'"},
        {"and_compounds": ["Pr", "Ce", "Cu", "O"], "name": "T'"},
        {"and_compounds": ["Pr","Ce", "La", "Cu", "O"], "name": "T'"},
    ], 
    'Iron-pnictide': [],
    'Iron-chalcogenides': [],
    'Hydrides':[
        {"and_compounds": ["H", "S"], "name": "Sulfure Hydrate"}
    ],
    'Carbides': [
        {"and_compounds": ["B", "C"], "name": "Borocarbides"},
        {"and_compounds": ["O", "C"], "name": "organics"}
    ],
    'Chalcogenides': [
        {"and_compounds": ["Bi", {"S":2}], "name": "BiCh2"},
        {"and_compounds": ["Bi", {"Se":2}], "name": "BiCh2"},
        {"and_compounds": ["Bi", {"Te":2}], "name": "BiCh2"},        
    ],
    'Oxides': [
        {"or_compounds": ["Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu" ,"Zn", "Y", "Zr", "Nb", "Mo", "Tc", "Ru" ,"Rh", "Pd", "Ag", "Cd", "La", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg"], "name": "Transition Metal-Oxides"}
        #{"and_compounds": ["O", "C"], "name": "Pyrochlore Oxides"},
        #{"and_compounds": ["O", "C"], "name": "Pyrochlore Oxides"}
    ], 
    'Alloys': [
        {"or_compounds": ["Ce", "Pr", "Nd", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "U"], "name": "possible Heavy-fermions"}
    ]
    
}


for tag in tags: 
    composition = second_level_composition[tag]
    if composition is not None:         
        second_level_tags = assign_tags(material_name_sample, composition)
        
        print(tag + " -> " + str(second_level_tags))
        
    else: 
        print("The tag: " + tag + " does not have any assigned composition map. ")
        
        

In [None]:
dc = pg.Composition("LaOBiS1.8Se0.2", strict=False).as_dict()

print(dc)


In [None]:
import math
math.ceil(1.8)
math.floor(1.8)

In [None]:
type('a')