In [19]:
import numpy as np

## Build up elementary property dictionary
- function buildDict creates a dictionary of element name and a type of attribute
- get a dictionary list with differnet elementary properties

In [1]:
def buildDict (attribute_index, refsheet, data_type):
    attribute = dict()
    if (data_type == 'd') :
        for row_index in range(1, refsheet.nrows):
            element_name = refsheet.cell(row_index, 0).value
            data = refsheet.cell(row_index, attribute_index).value
            if (data == '' or data == ' '):
                attribute[element_name] = -1.111
            else:
                try:
                    attribute[element_name] = float(data)
                except:
                    print (data, row_index, attribute_index)
            
    else:
        for row_index in range(1, refsheet.nrows):
            element_name = refsheet.cell(row_index, 0).value
            attribute[element_name] = refsheet.cell(row_index, attribute_index).value           
    return attribute

In [13]:
from xlrd import open_workbook
refbk = open_workbook('Elementary_property.xlsx')
refst = refbk.sheet_by_index(0)
dicList = []
for attribute_index in range(1, 26):
    dicList.append(buildDict(attribute_index, refst, 'd'))
for attribute_index in range(27, refsheet.ncols):
    dicList.append(buildDict(attribute_index, refst, 's'))

## Get Composition infomation 
- function getComp returns a list of atoms with elementary properties at one type of site
- function findNum parse the name of the perovskite to get the number of atoms
- for every compound in the spreadsheet, generate a list of attribute values

In [17]:
def getComp (row_index, sheetname, col_index, compositionname, dicList):
    sites = []
    for index in range(3):
        if (col_index+index >= sheetname.ncols): 
            break
        if (sheetname.cell(row_index, col_index+index).value != ''):
            name = sheetname.cell(row_index, col_index+index).value
            num = findNum(compositionname, name)
            properties = [name, num]
            for attrDic in dicList:
                properties.append(attrDic[name])
            sites.append(properties)
    return sites

In [18]:
def findNum (composition, element):
    if element == '':
        return 0
    pos = composition.find(element)+len(element)
    num = 0
    while (pos < len(composition) and composition[pos].isdigit()): 
        num = num*10 + int(composition[pos])
        pos += 1
    return num

In [57]:
from xlrd import open_workbook
book = open_workbook('PV_stability_data_forWei.xlsx')
sheet = book.sheet_by_index(0)
examples = []
for row_index in range(1, sheet.nrows):  # skip header line
    composition = sheet.cell(row_index, 0).value
    stability = sheet.cell(row_index, 6).value
    bandgap = sheet.cell(row_index, 2).value
    charge_transfer_gap = sheet.cell(row_index, 3).value
    O_p_band_center = sheet.cell(row_index, 4).value
    logK = sheet.cell(row_index, 5).value
    Asites = getComp(row_index, sheet, 8, composition, dicList)
    Bsites = getComp(row_index, sheet, 11, composition, dicList)
    if (len(Bsites) == 0):
        Bsites = Asites
    Xsites = getComp(row_index, sheet, 14, composition, dicList)
    Astatics = np.array(Asites)[:,1:27].astype(float)
    Bstatics = np.array(Bsites)[:,1:27].astype(float)
    Xstatics = np.array(Xsites)[:,1:27].astype(float)
    Acoef = Astatics[:,0] / np.sum(Astatics[:,0], axis=0)
    Bcoef = Bstatics[:,0] / np.sum(Bstatics[:,0], axis=0)
    Xcoef = Xstatics[:,0] / np.sum(Xstatics[:,0], axis=0)
    Asite_avg = np.mean(Astatics, axis=0) 
    Asite_weighted = np.dot(Acoef, Astatics)
    Asite_std = np.std(Astatics, axis=0)
    Asite_max = np.amax(Astatics, axis=0)
    Asite_min = np.amin(Astatics, axis=0)
    Asite_ptp = np.ptp(Astatics, axis=0)
    Bsite_avg = np.mean(Bstatics, axis=0) 
    Bsite_weighted = np.dot(Bcoef, Bstatics)
    Bsite_std = np.std(Bstatics, axis=0)
    Bsite_max = np.amax(Bstatics, axis=0)
    Bsite_min = np.amin(Bstatics, axis=0)
    Bsite_ptp = np.ptp(Bstatics, axis=0)
    Xsite_avg = np.mean(Xstatics, axis=0)
    example = np.r_[Atot, Asite_avg, Asite_weighted, Asite_std, Asite_max, Asite_min, Asite_ptp,
               Btot, Bsite_avg, Bsite_weighted, Bsite_std, Bsite_max, Bsite_min, Bsite_ptp,
               Xsite_avg, bandgap, charge_transfer_gap, O_p_band_center, logK, stability]
    examples.append(example)



## Write into csv file

In [58]:
np.savetxt("stability_output_with_descriptors.csv", np.array(examples), delimiter=',')

In [59]:
np.array(examples).shape


(2040, 345)

[]