Convert to ECOSTRESS Spectral Library   
Susan Meerdink   
12/8/2016   
https://github.com/susanmeerdink/ASTER-Spectral-Library    
This code converts a csv of spectra and metadata into ECOSTRESS spectral library format. This project contains example spectra and metadata in the repo.    
This code takes four inputs:  
1. Metadata for spectra csv - no commas in metadata fields
2. ASD Spectra csv - must have the same order as metadata
3. Nicolet Spectra csv - must have the same order as metadata
4. Output directory for ECOSTRESS spectral library files

In [1]:
# Import Functions
import numpy as np
import os

In [2]:
def read_file(filename):
    """
    Function for reading in metadata, ASD, or Nicolet spectra from csv.
    
    Parameters:
    -----------
    1) filename: The filename of the csv containing asd spectra information.
    
    Returns:
    --------
    1) array: numpy array that contains the parsed file info
    2) header: string array containing the names
    """
    inFile = open(filename, 'r')  # Open file
    numRow = 0  # Keeps a counter of the number of rows
    array = []  # empty array to hold file data
    
    # Loop through file
    for line in inFile:
        strLine = line.split(",")

        if numRow == 0:  # if this is the first row, then grab column names
            strLine[len(strLine)-1] = strLine[len(strLine) - 1].rstrip('\n')  # removing newline character from last field
            header = strLine  # Store the first line for output files
            numRow += 1  # advance counter
        else:  # if it isn't the first row
            array.append(strLine)
            
    return array, header

In [3]:
def format_spec(header, spec):
    """
    This function formats the aster spectral library spectra into the ecostress
    spectral library format. 
    
    Parameters:
    -----------
    1) origSpec: string array that contains two columns, the first being wavelength and the second reflectance
    
    Returns:
    --------
    1) arraySpec: a numpy array of strings that have only three decimal points for values
    """ 
    arraySpec = np.chararray([len(spec)-1, 2], itemsize=7)
    origSpec = np.chararray([len(spec), 2], itemsize=15)
    origSpec[:, 0] = header
    origSpec[:, 1] = spec
    
    # Loop through rows
    for i in range(1, len(origSpec)):
        if origSpec[i, 1] != "-9999":  # Some samples don't have ASD & Nicolet and the spaces are left blank            
            # Loop through columns (wavelengths then reflectance)
            for c in [0, 1]: 
                tempNum = round(float(origSpec[i, c]), 3)  # Convert to float and round to three decimal places
                # If the number is greater than 9 don't add leading space
                if tempNum >= 10:  
                    if len(str(tempNum)) < 7:
                        if 7 - len(str(tempNum)) == 4:
                            tempStr = str(tempNum) + "0000"
                        elif 7 - len(str(tempNum)) == 3:
                            tempStr = str(tempNum) + "000"
                        elif 7 - len(str(tempNum)) == 2:
                            tempStr = str(tempNum) + "00"
                        else:
                            tempStr = str(tempNum) + "0"
                    else:
                        tempStr = str(tempNum)
                 # If the number is less than 10 add a leading space
                else: 
                    if len(str(tempNum)) < 6:
                        if 6 - len(str(tempNum)) == 4:
                            tempStr = " " + str(tempNum) + "0000"
                        elif 6 - len(str(tempNum)) == 3:
                            tempStr = " " + str(tempNum) + "000"
                        elif 6 - len(str(tempNum)) == 2:
                            tempStr = " " + str(tempNum) + "00"
                        else:
                            tempStr = " " + str(tempNum) + "0"
                    else: 
                        tempStr = " " + str(tempNum)
                arraySpec[i-1, c] = tempStr
        else:
            arraySpec[i-1, [0, 1]] = ""
        
    return arraySpec

In [4]:
def write_file(outDir, meta, asd, nicolet, anc, 
               headersMeta, headersASD, headersNicolet, headersAnc):
    """
    Function for writing an ECOSTRESS Spectral Library file.
    
    Parameters:
    -----------
    1) outDir: a file path for the folder that the output file will be located
    2) meta: a numpy array containing strings associated with the metadata fields for one sample
    3) asd: a numpy array containing asd spectra for one sample
    4) nicolet: a numpy array containing nicolet spectra for one sample
    5) anc: a numpy array containing ancillary data for one sample
    6) headersMeta: a string array containing the first row of meta csv
    7) headersASD: a string array containing the first row of ASD csv (wavelengths)
    8) headersNicolet: a string array containing the first row of Nicolet csv (wavelengths)
    9) headersAnc: a string array containing the first row of ancillary csv
    
    Output File:
    ------------
    1) Outputs a text file containing the metadata, ASD, and Nicolet spectra for one sample.
    See readme for formatting information.
    """
    # Output file name format: location.instrument.type.class.genus.species.samplenumber.filetype.txt
    # Example file name format: jpl.asdnicolet.npv.bark.abies.concolor.vh311.spectrum.txt
    if 'non' in meta[1]:
        outFileName = outDir + ('nonphotosyntheticvegetation' + '.' + 
                                meta[2] + '.' + meta[3] + '.' + meta[4] + '.' + meta[7] + '.' +
                                meta[5] + '.' + meta[6] + '.asdnicolet.spectrum.txt').lower()
    else:
        outFileName = outDir + (meta[1] + '.' + meta[2] + '.' + meta[3] + '.' + meta[4] + '.' + meta[7] + '.' +
                                meta[5] + '.' + meta[6] + '.asdnicolet.spectrum.txt').lower()
   
    outFile = open(outFileName, 'w')  # open file
    
    # Output Metadata
    for i in range(len(meta)):  # loop through metadata fields
        if i == len(headersMeta) - 1:
            # If the last column of metadata csv is set to True there will be an ancillary file associated
            if 'TRUE' in meta[i]:  
                # Output data to spectral library file
                addinfoLine = str.replace(os.path.basename(outFileName), 'spectrum', 'ancillary')
                outFile.write(headersMeta[i] + ': ' + addinfoLine + '\n')
                outFile.write('\n')
                
                # Create and output ancillary data file
                ancFile = open(outDir + addinfoLine, 'w')
                for j in range(1, len(anc)):
                    ancFile.write(headersAnc[j] + ': ' + anc[j] + '\n')
                ancFile.close()  # close file so it can be variable reused
                
            # If the last column of metadata csv is set to False, leave blank
            else:
                outFile.write(headersMeta[i] + ': \n')
                outFile.write('\n')
        else:
            outFile.write(headersMeta[i] + ': ' + meta[i] + '\n')

    # Output ASD data
    asdFormatted = format_spec(headersASD, asd)
    for j in range(asdFormatted.shape[0]):  # loop through asd fields
        if asdFormatted[j, 0] != "":
            outFile.write(asdFormatted[j, 0] + "\t" + asdFormatted[j, 1] + '\n')

    # Output Nicolet data
    nicFormatted = format_spec(headersNicolet, nicolet)
    for k in range(nicFormatted.shape[0]):  # loop through nicolet fields
        if nicFormatted[k, 0] != "":
            outFile.write(nicFormatted[k, 0] + "\t" + nicFormatted[k, 1] + '\n')
            
    outFile.close()  # close file so it can be variable reused

In [5]:
def create_spectral_library_files(metaFilename, asdFilename, nicoletFilename, ancFilename, outDir):
    """
    Function for reading in metadata, ASD, or Nicolet spectra from csv.
    
    Parameters:
    -----------
    1) metaFilename: file path for csv containing metadata
    2) asdFilename: file path for csv containing asd spectra
    3) nicoletFilename: file path for csv containing nicolet spectra
    4) ancFilename: file path for csv containing ancillary data 
    5) outDir: file path for the output location of spectra library files
    
    Returns:
    --------
    """
    # Read in files and grab data/headers
    arrayMeta, headersMeta = read_file(metaFilename)
    arrayASD, headersASD = read_file(asdFilename)
    arrayNicolet, headersNicolet = read_file(nicoletFilename)
    arrayAnc, headersAnc = read_file(ancFilename)
    
    # Loop through each row and create output file
    for row in range(len(arrayMeta)):
        meta = arrayMeta[row]
        asd = arrayASD[row]
        nicolet = arrayNicolet[row]
        anc = arrayAnc[row]
        write_file(outDir, meta, asd, nicolet, anc, headersMeta, headersASD, headersNicolet, headersAnc)
    
    print 'Finished converting %s samples to ECOSTRESS spectral library files' % len(arrayMeta)

In [6]:
# Example Files
directory = "C:\\Users\\Susan\\Documents\\GitHub\\ASTER-Spectral-Library\\"
inMetaFileName = directory + "Example Inputs\\Example_Metadata.csv"
inASDFileName = directory + "Example Inputs\\Example_Spectra_ASD.csv"
inNicoletFileName = directory + "Example Inputs\\Example_Spectra_Nicolet.csv"
inAncFilename = directory + "Example Inputs\\Example_Ancillary.csv"
outDir = directory + "Example Outputs\\"

create_spectral_library_files(inMetaFileName, inASDFileName, inNicoletFileName, inAncFilename, outDir)

Finished converting 14 samples to ECOSTRESS spectral library files


In [12]:
# Huntington Gardens Spectra
directory = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\Input Spectral Library Files\\"
inMetaFileName = directory + "Huntington_Gardens_Metadata.csv"
inASDFileName = directory + "Huntington_Gardens_Spectra_ASD.csv"
inNicoletFileName = directory + "Huntington_Gardens_Spectra_Nicolet.csv"
inAncFilename = directory + "Huntington_Gardens_Ancillary.csv"
outDir = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\ECOSTRESS Spectral Library Files\\"

create_spectral_library_files(inMetaFileName, inASDFileName, inNicoletFileName, inAncFilename, outDir)

Finished converting 206 samples to ECOSTRESS spectral library files


In [11]:
# HyspIRI Spectra
directory = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\Input Spectral Library Files\\"
inMetaFileName = directory + "HyspIRI_Metadata.csv"
inASDFileName = directory + "HyspIRI_Spectra_ASD.csv"
inNicoletFileName = directory + "HyspIRI_Spectra_Nicolet.csv"
inAncFilename = directory + "HyspIRI_Ancillary.csv"
outDir = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\ECOSTRESS Spectral Library Files\\"

create_spectral_library_files(inMetaFileName, inASDFileName, inNicoletFileName, inAncFilename, outDir)

Finished converting 292 samples to ECOSTRESS spectral library files


In [14]:
# Harvard Spectra
directory = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\Input Spectral Library Files\\"
inMetaFileName = directory + "Harvard_Metadata.csv"
inASDFileName = directory + "Harvard_Spectra_ASD.csv"
inNicoletFileName = directory + "Harvard_Spectra_Nicolet.csv"
inAncFilename = directory + "Harvard_Ancillary.csv"
outDir = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\ECOSTRESS Spectral Library Files\\"

create_spectral_library_files(inMetaFileName, inASDFileName, inNicoletFileName, inAncFilename, outDir)

Finished converting 39 samples to ECOSTRESS spectral library files


In [10]:
# Miscellanous Vegetation Spectra
directory = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\Input Spectral Library Files\\"
inMetaFileName = directory + "Veg_Misc_Metadata.csv"
inASDFileName = directory + "Veg_Misc_Spectra_ASD.csv"
inNicoletFileName = directory + "Veg_Misc_Spectra_Nicolet.csv"
inAncFilename = directory + "Veg_Misc_Ancillary.csv"
outDir = "F:\\Dropbox\\Analysis\\ECOSTRESS Spectral Library\\ECOSTRESS Spectral Library Files\\"

create_spectral_library_files(inMetaFileName, inASDFileName, inNicoletFileName, inAncFilename, outDir)

Finished converting 55 samples to ECOSTRESS spectral library files
