In [1]:
# ---------------- converting Element% to Oxide mass%


# --- import modules

import os
import glob
import pandas as pd
import numpy as np
import re # 're' stands for regular expressions.

# --- set working directory

base_dir = "C:/Users/u0125722/Documents/Python_Scripts/_DATA"

In [2]:
 #--- look into working directory and create a list with selected files - .txt
os.chdir(base_dir)
txt_files = glob.glob('*.txt')

print(txt_files)
len(txt_files)

['BU19TS-Area10_.txt', 'BU19TS-Area11b_.txt', 'BU19TS-Area11_.txt', 'BU19TS-Area12detail_EDS.txt', 'BU19TS-Area12_.txt', 'BU19TS-Area13_EDS.txt', 'BU19TS-Area14_.txt', 'BU19TS-Area15_.txt', 'BU19TS-Area1_.txt', 'BU19TS-Area2_.txt', 'BU19TS-Area3_.txt', 'BU19TS-Area5detalhe_.txt', 'BU19TS-Area8_.txt', 'BU19TS-Area9_.txt', 'BU19TS-Coltan_.txt', 'BU19TS-Map2_.txt', 'BU19TS-Map3_.txt']


17

In [3]:
        # - EDS data processing function made in collaboration with Bram Paredis

def EDS(i):
    # -- Create a dataframe from the txt file 
    df = pd.read_csv(txt_files[i], header=None, sep='\s+', names=range(30))

    # -- Get filename info to use for further auto df construction
    filename_info = re.split("-|_", txt_files[i])


        # ----------------


    # -- Specify the names of the point measurements
    table_names = [f"pt{i}" for i in range(1, 30)]

    # -- Perform a check to see which rows in the dataframe belong to a certain
    # -- point measurement (called group here)
    groups = df[0].isin(table_names).cumsum()
    
    # Let's keep track of with how many 'point measurements we're dealing
    n_groups = groups.unique().size


        # ----------------


    # -- Create a dictionary of dataframes in which the names of the point
    # -- measurements will be used as the dict keys and the according 
    # -- dataframes will be used as dict values - dict = {pt1:values}. 
    tables = {g.iloc[0,0]: g.iloc[1:].dropna(axis=1, how='all') 
              for k,g in df.groupby(groups)}

    # -- Only select the values we're interested in
    tables = {k: v.iloc[1:, :2].T for k,v in tables.items()}

    # -- Change column names to first row of df and strip '*'
    for df in tables.values():
        df.columns = df.iloc[0, :].str.strip("*")

    # -- Drop the now redundant row with column name info
    tables = {k: v.drop(0) for k,v in tables.items()}


        # ----------------

        
    df_area = pd.DataFrame()

    for k,v in tables.items():
        df_area = pd.concat((df_area, v),sort=False)


        # ----------------


    # -- Add additional info to df
    df_area["Analysis"] = table_names[:n_groups]
    df_area["Sample"] = filename_info[0]
    df_area["Area"] = filename_info[1]
    
    return df_area

In [55]:
i = 0
b = pd.DataFrame()
for file in txt_files:
    print(file)
    a = EDS(i)
    b = pd.concat((b, a),sort=False, ignore_index = True)
    i = i + 1
   
        
b = b[["Sample", "Area", "Analysis", "O", "F", "Na", "Mg", "Al", "Si", "P", "Cl", 
       "Ca", "Mn", "Fe", "Ti", "Sr", "Nb", "Ba", "Ta", "C", "Total"]]

#b.to_csv('out_spectra.csv', index=False)
b

BU19TS-Area10_.txt
BU19TS-Area11b_.txt
BU19TS-Area11_.txt
BU19TS-Area12detail_EDS.txt
BU19TS-Area12_.txt
BU19TS-Area13_EDS.txt
BU19TS-Area14_.txt
BU19TS-Area15_.txt
BU19TS-Area1_.txt
BU19TS-Area2_.txt
BU19TS-Area3_.txt
BU19TS-Area5detalhe_.txt
BU19TS-Area8_.txt
BU19TS-Area9_.txt
BU19TS-Coltan_.txt
BU19TS-Map2_.txt
BU19TS-Map3_.txt


Unnamed: 0,Sample,Area,Analysis,O,F,Na,Mg,Al,Si,P,...,Ca,Mn,Fe,Ti,Sr,Nb,Ba,Ta,C,Total
0,BU19TS,Area10,pt1,50.07,,,,,49.93,,...,,,,,,,,,,100.00
1,BU19TS,Area10,pt2,50.50,,,,25.02,,24.48,...,,,,,,,,,,100.00
2,BU19TS,Area10,pt3,50.07,,,,,49.93,,...,,,,,,,,,,100.00
3,BU19TS,Area10,pt4,50.07,,,,,49.93,,...,,,,,,,,,,100.00
4,BU19TS,Area10,pt5,51.34,,,,24.53,,24.13,...,,,,,,,,,,100.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,BU19TS,Map3,pt14,38.72,,,,,,20.97,...,38.09,1.50,,,,,,,,100.00
190,BU19TS,Map3,pt15,47.24,,,,11.84,,16.32,...,0.96,17.66,5.98,,,,,,,100.00
191,BU19TS,Map3,pt16,45.56,,,1.12,16.87,,22.43,...,,0.79,13.23,,,,,,,100.00
192,BU19TS,Map3,pt17,45.91,,,1.19,17.11,,22.21,...,,0.80,12.78,,,,,,,100.00


In [66]:
    # --- Separate data from attributes and fill NA values with 0.00 to allow calculations
b_data = b.drop(["Sample", "Area", "Analysis", "O", "C", "Total"], axis=1).fillna(0.0000)

#Convert data to float to allow calculations
b_data = b_data.astype(float)

print(b_data)

       F   Na    Mg     Al     Si      P    Cl     Ca     Mn     Fe   Ti   Sr  \
0    0.0  0.0  0.00   0.00  49.93   0.00  0.00   0.00   0.00   0.00  0.0  0.0   
1    0.0  0.0  0.00  25.02   0.00  24.48  0.00   0.00   0.00   0.00  0.0  0.0   
2    0.0  0.0  0.00   0.00  49.93   0.00  0.00   0.00   0.00   0.00  0.0  0.0   
3    0.0  0.0  0.00   0.00  49.93   0.00  0.00   0.00   0.00   0.00  0.0  0.0   
4    0.0  0.0  0.00  24.53   0.00  24.13  0.00   0.00   0.00   0.00  0.0  0.0   
..   ...  ...   ...    ...    ...    ...   ...    ...    ...    ...  ...  ...   
189  0.0  0.0  0.00   0.00   0.00  20.97  0.72  38.09   1.50   0.00  0.0  0.0   
190  0.0  0.0  0.00  11.84   0.00  16.32  0.00   0.96  17.66   5.98  0.0  0.0   
191  0.0  0.0  1.12  16.87   0.00  22.43  0.00   0.00   0.79  13.23  0.0  0.0   
192  0.0  0.0  1.19  17.11   0.00  22.21  0.00   0.00   0.80  12.78  0.0  0.0   
193  0.0  0.0  0.00  12.04   0.00  16.31  0.00   0.90  17.65   5.85  0.0  0.0   

      Nb   Ba   Ta  
0    0

In [67]:
# --- Multiply wt% ELEMENT by numerical value below for equivalent expressed as OXIDE.

#idx = pd.Index(["F", "Na", "Mg", "Al", "Si", "P", "Cl", "Ca", "Mn", "Fe", "Ti", "Sr", "Nb", "Ba", "Ta"], name='element')
conversion =  np.array([1.0000, 1.3480, 1.6582, 1.8895, 2.1392, 2.2916, 1.0000, 1.3992, 1.2912, 1.2865, 1.6681, 1.1826, 1.4305, 1.1165, 1.2211])

oxides = b_data.mul(conversion, axis=1)

total_sum = oxides.sum(axis=1)
print(total_sum)

oxides.columns = ['F', 'Na2O', 'MgO', 'Al2O3', 'SiO2', 'P2O5', 'Cl', 'CaO', 'MnO', 'FeO', 'TiO2', 'SrO',
                 'Nb2O5', 'BaO', 'Ta2O5']

oxides
#result.to_csv('out_oxides.csv', index=False)  

0      106.810256
1      103.373658
2      106.810256
3      106.810256
4      101.645743
          ...    
189    104.007180
190     91.609686
191    103.174080
192    102.673469
193     91.700561
Length: 194, dtype: float64


Unnamed: 0,F,Na2O,MgO,Al2O3,SiO2,P2O5,Cl,CaO,MnO,FeO,TiO2,SrO,Nb2O5,BaO,Ta2O5
0,0.0,0.0,0.000000,0.000000,106.810256,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.000000,47.275290,0.000000,56.098368,0.00,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.000000,0.000000,106.810256,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.000000,0.000000,106.810256,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.000000,46.349435,0.000000,55.296308,0.00,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,0.0,0.0,0.000000,0.000000,0.000000,48.054852,0.72,53.295528,1.936800,0.000000,0.0,0.0,0.0,0.0,0.0
190,0.0,0.0,0.000000,22.371680,0.000000,37.398912,0.00,1.343232,22.802592,7.693270,0.0,0.0,0.0,0.0,0.0
191,0.0,0.0,1.857184,31.875865,0.000000,51.400588,0.00,0.000000,1.020048,17.020395,0.0,0.0,0.0,0.0,0.0
192,0.0,0.0,1.973258,32.329345,0.000000,50.896436,0.00,0.000000,1.032960,16.441470,0.0,0.0,0.0,0.0,0.0


In [59]:
def normalize(data, total=None):
    """Normalize data to 100%"""
    if total is None:
        total = data.sum(axis=1)
    return data.divide(total, axis=0) * 100

In [71]:
ox_normalized = normalize(oxides)

ox_normalized['total'] = ox_normalized.sum(axis=1)
ox_normalized



Unnamed: 0,F,Na2O,MgO,Al2O3,SiO2,P2O5,Cl,CaO,MnO,FeO,TiO2,SrO,Nb2O5,BaO,Ta2O5,total
0,0.0,0.0,0.000000,0.000000,100.0,0.000000,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
1,0.0,0.0,0.000000,45.732434,0.0,54.267566,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
2,0.0,0.0,0.000000,0.000000,100.0,0.000000,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
3,0.0,0.0,0.000000,0.000000,100.0,0.000000,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
4,0.0,0.0,0.000000,45.598993,0.0,54.401007,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,0.0,0.0,0.000000,0.000000,0.0,46.203399,0.69226,51.242162,1.862179,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
190,0.0,0.0,0.000000,24.420649,0.0,40.824190,0.00000,1.466255,24.891027,8.397878,0.0,0.0,0.0,0.0,0.0,100.0
191,0.0,0.0,1.800049,30.895226,0.0,49.819284,0.00000,0.000000,0.988667,16.496774,0.0,0.0,0.0,0.0,0.0,100.0
192,0.0,0.0,1.921877,31.487535,0.0,49.571166,0.00000,0.000000,1.006063,16.013358,0.0,0.0,0.0,0.0,0.0,100.0


In [73]:
el_to_ox = b.drop(["O", "F", "Na", "Mg", "Al", "Si", "P", "Cl", 
       "Ca", "Mn", "Fe", "Ti", "Sr", "Nb", "Ba", "Ta", "C", "Total"], axis=1)

el_to_ox = pd.concat([el_to_ox,ox_normalized], axis=1)

el_to_ox.to_csv('oxides.csv', index=False)
el_to_ox

Unnamed: 0,Sample,Area,Analysis,F,Na2O,MgO,Al2O3,SiO2,P2O5,Cl,CaO,MnO,FeO,TiO2,SrO,Nb2O5,BaO,Ta2O5,total
0,BU19TS,Area10,pt1,0.0,0.0,0.000000,0.000000,100.0,0.000000,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
1,BU19TS,Area10,pt2,0.0,0.0,0.000000,45.732434,0.0,54.267566,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
2,BU19TS,Area10,pt3,0.0,0.0,0.000000,0.000000,100.0,0.000000,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
3,BU19TS,Area10,pt4,0.0,0.0,0.000000,0.000000,100.0,0.000000,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
4,BU19TS,Area10,pt5,0.0,0.0,0.000000,45.598993,0.0,54.401007,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,BU19TS,Map3,pt14,0.0,0.0,0.000000,0.000000,0.0,46.203399,0.69226,51.242162,1.862179,0.000000,0.0,0.0,0.0,0.0,0.0,100.0
190,BU19TS,Map3,pt15,0.0,0.0,0.000000,24.420649,0.0,40.824190,0.00000,1.466255,24.891027,8.397878,0.0,0.0,0.0,0.0,0.0,100.0
191,BU19TS,Map3,pt16,0.0,0.0,1.800049,30.895226,0.0,49.819284,0.00000,0.000000,0.988667,16.496774,0.0,0.0,0.0,0.0,0.0,100.0
192,BU19TS,Map3,pt17,0.0,0.0,1.921877,31.487535,0.0,49.571166,0.00000,0.000000,1.006063,16.013358,0.0,0.0,0.0,0.0,0.0,100.0
