Convert existing ASTER spectral library files to ECOSTRESS Spectral library files
Owner: Susan Meerdink   
Creation Date: 10/9/17   
https://github.com/susanmeerdink/ASTER-Spectral-Library   
This file converts existing ASTER spectral library files to the new ECOSTRESS spectral library format. The existing ASTER files have many different format types including: 
1. No leading space and one space between wavelength and spectrum
2. Leading space and tab in between wavelength and spectrum
3. No leading space and tab in between wavelength and spectrum

In [1]:
# Import Functions
import numpy as np
import os
import glob
from shutil import copyfile

In [2]:
def read_aster_file(filename):
    """
    Function for reading in existing ASTER spectral library file.
    This function formats the metadata fields into then new format, but 
    does not format spectra
    
    Parameters:
    -----------
    1) filename: The filename for one ASTER spectral library text file
    
    Returns:
    --------
    1) arrayNewMeta: string array that contains 20 rows with each row a new metadata field pulled from aster file
    2) arrayNewSpec: string array that contains two columns, the first being wavelength and the second reflectance
    """
    inFile = open(filename, 'r')  # Open ASD spectra file
    numRow = 0  # Keeps a counter of the number of rows 
    arrayOrig = []  # empty array to hold file data
    arrayNewMeta = ['s', 's', 's', 's', 's', 's', 's']  # empty array to hold newly formatted data
    arrayNewSpec = []  # empty array to hold spectra
    
    # Loop through file and pull out every line
    for line in inFile:
        if numRow < 26:
            arrayOrig.append(line.rstrip('\n'))
        else:
            numLine = line.rstrip('\n').split()
            arrayNewSpec.append(numLine)
        numRow += 1
    
    # Edit metadata to fit new formatting
    arrayNewMeta[0:8] = arrayOrig[0:8]
    # Some files have an additional line of metadata to ignore that is called Collected By:
    if 'Collected' in arrayOrig[9]:  
        arrayNewMeta.append(arrayOrig[8])
    else:
        arrayNewMeta.append(arrayOrig[8] + arrayOrig[9])  # Combine the two origin lines into one
    arrayNewMeta.append('Collection Date: N/A')  # Add in new line for collection date
    arrayNewMeta.append(arrayOrig[10] + arrayOrig[11] + arrayOrig[12] + \
                        arrayOrig[13] + arrayOrig[14] + arrayOrig[15])
    arrayNewMeta[11:19] = arrayOrig[16:25]
    # Some files do not have the appropriate ancillary data filename, replace with accuracte filename
    if filename in arrayOrig[25]:
        arrayNewMeta[19] = arrayOrig[25]
    else:
        newname = os.path.basename(filename)
        arrayNewMeta[19] = 'Additional Information: ' + str.replace(newname, 'spectrum', 'ancillary')
        
    return arrayNewMeta, arrayNewSpec

In [3]:
def format_spec(origSpec):
    """
    This function formats the aster spectral library spectra into the ecostress
    spectral library format. 
    
    Parameters:
    -----------
    1) origSpec: string array that contains two columns, the first being wavelength and the second reflectance
    
    Returns:
    --------
    1) arraySpec: a numpy array of strings that have only three decimal points for values
    """ 
    arraySpec = np.chararray([len(origSpec), 2], itemsize=6)

    # Loop through columns (wavelengths then reflectance)
    for c in [0, 1]:     
        # Loop through rows
        for i in range(len(origSpec)):
            if len(origSpec[i]) > 0:
                tempNum = round(float(origSpec[i][c]), 3)  # Convert to float and round to three decimal places

                # If the number is greater than 9 don't add leading space
                if tempNum >= 10:  
                    if len(str(tempNum)) < 6:
                        if 6 - len(str(tempNum)) == 3:
                            tempStr = str(tempNum) + "000"
                        elif 6 - len(str(tempNum)) == 2:
                            tempStr = str(tempNum) + "00"
                        else:
                            tempStr = str(tempNum) + "0"
                    else:
                        tempStr = str(tempNum)
                 # If the number is less than 10 add a leading space
                else: 
                    if len(str(tempNum)) < 5:
                        if 5 - len(str(tempNum)) == 3:
                            tempStr = " " + str(tempNum) + "000"
                        elif 5 - len(str(tempNum)) == 2:
                            tempStr = " " + str(tempNum) + "00"
                        else:
                            tempStr = " " + str(tempNum) + "0"
                    else: 
                        tempStr = " " + str(tempNum)
                arraySpec[i, c] = tempStr
  
    return arraySpec

In [4]:
def write_ecostress_file(currentDir, outDir, filename):
    """
    This function will process a single ASTER text file and output a single
    ECOSTRESS text file. 
    
    Parameters:
    -----------
    1) outDir: a file path for the folder that the output file will be located
    2) currentDir: a file path for the folder that input file is located
    2) filename: The filename for one ASTER spectral library text file
    
    Output File:
    ------------
    1) Outputs a text file containing the metadata, ASD, and Nicolet spectra for one sample.
    See readme for formatting information.
    """
    meta, specOrig = read_aster_file(currentDir + filename)
    spec = format_spec(specOrig)
    
    outFile = open(outDir + filename, 'w')  # open new file for output
    
    # Loop through metadata
    for i in range(len(meta)):
        outFile.write(meta[i] + '\n')
    
    outFile.write('\n')  # Add one empty line between metadata and spectra
    
    # Loop through spectra
    for j in range(len(spec)):
        outFile.write(spec[j, 0] + '\t' + spec[j, 1] + '\n')
    
    outFile.close()  # Close file   

In [5]:
def convert_aster_files(currentDir, outDir):
    """
    This function finds all the ASTER text files in a folder and calls the 
    write_ecostress_file to format them into ECOSTRESS text files.
    
    Parameters:
    -----------
    1) outDir: a file path for the folder that the output file will be located
    2) currentDir: a file path for the folder that input file is located
    """
    os.chdir(currentDir) # Set this to the current directory
    filelist = []
    count = 0
    error = 0
    copy = 0
    
    # Find all the ASTER text files in directory
    for file in glob.glob("*.spectrum.txt"):
        # print("Processing %s" %file)
        try:
            write_ecostress_file(currentDir, outDir, file)
            count += 1
        except:
            copyfile(file, currentDir + 'Error Files\\' + file)
            print('Error processing %s' %file)
            error += 1
            continue
    
    # Find all the ASTER ancillary text files in directory
    for file in glob.glob("*.ancillary.txt"):
        copyfile(file, outDir + file)
        copy += 1
    
    print("%i ASTER spectrum files formatted to ECOSTRESS files" %count)
    print("%i ASTER ancillary files copied to new directory" %copy)
    print("%i errors while formatting ASTER files" %error)

In [6]:
# Example Files
directory = "C:\\Users\\Susan\\Documents\\GitHub\\ASTER-Spectral-Library\\"
inDir = directory + "Example Inputs\\"
outDir = directory + "Example Outputs\\"

convert_aster_files(inDir, outDir)

4 ASTER spectrum files formatted to ECOSTRESS files
2 ASTER ancillary files copied to new directory
0 errors while formatting ASTER files


In [7]:
# Processing ASTER Spectral Library Files
directory = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\"
inDir = directory + "ASTER Spectral Library Files\\"
outDir = directory + "ECOSTRESS Spectral Library Files\\"

convert_aster_files(inDir, outDir)

2445 ASTER spectrum files formatted to ECOSTRESS files
2145 ASTER ancillary files copied to new directory
0 errors while formatting ASTER files
