Code that corrects files after they have been converted to ECOSTRESS files.
right now this is for files with duplicate wavelengths - happens when original files do not have enough decimal points.

In [1]:
import os
import glob
import numpy as np
from collections import Counter

In [2]:
def format_spec(spectra):
    """
    This function formats the aster spectral library spectra into the ecostress
    spectral library format. 
    
    Parameters:
    -----------
    1) origSpec: string array that contains two columns, the first being wavelength and the second reflectance
    
    Returns:
    --------
    1) arraySpec: a numpy array of strings that have only three decimal points for values
    """ 
    arraySpec = np.chararray([len(spectra)-1, 2], itemsize=7)
    origSpec = np.chararray([len(spectra), 2], itemsize=15)
    origSpec[:, 0] = spectra[:, 0]
    origSpec[:, 1] = spectra[:, 1]
    
    # Loop through rows
    for i in range(1, len(origSpec)):
        if origSpec[i, 1] != "-9999":  # Some samples don't have ASD & Nicolet and the spaces are left blank            
            # Loop through columns (wavelengths then reflectance)
            for c in [0, 1]: 
                tempNum = round(float(origSpec[i, c]), 3)  # Convert to float and round to three decimal places
                # If the number is greater than 9 don't add leading space
                if tempNum >= 10:  
                    if len(str(tempNum)) < 7:
                        if 7 - len(str(tempNum)) == 4:
                            tempStr = str(tempNum) + "0000"
                        elif 7 - len(str(tempNum)) == 3:
                            tempStr = str(tempNum) + "000"
                        elif 7 - len(str(tempNum)) == 2:
                            tempStr = str(tempNum) + "00"
                        else:
                            tempStr = str(tempNum) + "0"
                    else:
                        tempStr = str(tempNum)
                 # If the number is less than 10 add a leading space
                else: 
                    if len(str(tempNum)) < 6:
                        if 6 - len(str(tempNum)) == 4:
                            tempStr = " " + str(tempNum) + "0000"
                        elif 6 - len(str(tempNum)) == 3:
                            tempStr = " " + str(tempNum) + "000"
                        elif 6 - len(str(tempNum)) == 2:
                            tempStr = " " + str(tempNum) + "00"
                        else:
                            tempStr = " " + str(tempNum) + "0"
                    else: 
                        tempStr = " " + str(tempNum)
                arraySpec[i-1, c] = tempStr
        else:
            arraySpec[i-1, [0, 1]] = ""
        
    return arraySpec

In [6]:
def fix_files(currentDir, listFiles):
    os.chdir(currentDir) # Set this to the current directory
    
    for f in listFiles:
        print('Processing ' + f)
        inFile = open(f, 'r+')  # Open ECOSTRESS file
        lines = inFile.readlines()  # Read in file
        spectra = np.zeros([len(lines)- 21, 2])
        
        for i in range(21, len(lines)):
            wave = float(lines[i].split()[0].strip())
            spec = float(lines[i].split()[1].strip())
            spectra[i-21, 0] = wave
            spectra[i-21, 1] = spec
        
        duplicates = [item for item, count in Counter(spectra[:, 0]).iteritems() if count > 1]
        
        for d in duplicates:
            indices = np.where(spectra[:,0] == d)
            mean = np.mean(spectra[indices, 1])
            spectra = np.delete(spectra, indices, axis=0)
            spectra = np.insert(spectra, indices[0][0], [d, mean], axis=0)
        
        spectra[spectra[:,0].argsort()]
        
        formatted_spec = format_spec(spectra)
        
        # Overwrite existing spectra
        lines[16] = 'First X Value: ' + str(formatted_spec[0,0]) + '\n'
        lines[17] = 'Last X Value: ' + str(formatted_spec[-1, 0]) + '\n'
        lines[18] = 'Number of X Values: ' + str(len(formatted_spec)) + '\n'
        inFile.seek(0)
        for i in range(0, 21):
            inFile.write(lines[i])

        for j in range(len(formatted_spec)):
            inFile.write(formatted_spec[j, 0] + '\t' + formatted_spec[j, 1] + '\n')
        
        inFile.truncate()
        inFile.close()        
        

In [8]:
problemFiles = ['mineral.borate.none.coarse.all.nmnh102876-2.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.nmnh123943.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.nmnh137833.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.nmnh160075.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.nmnh170164.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.nmnh96075-3.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.none.usgs.perknic.spectrum.txt',
                'mineral.borate.none.coarse.all.ulexite.usgs.perknic.spectrum.txt',
                'mineral.carbonate.none.coarse.all.nmnh105598.usgs.perknic.spectrum.txt',
                'mineral.carbonate.none.coarse.all.traonac.usgs.perknic.spectrum.txt',
                'mineral.chloride.none.coarse.all.nmnh83904.usgs.perknic.spectrum.txt',
                'mineral.chloride.none.coarse.all.nmnh98011.usgs.perknic.spectrum.txt',
                'mineral.sulfate.none.coarse.all.eugstec.usgs.perknic.spectrum.txt',
                'mineral.sulfate.none.coarse.all.gds147.usgs.perknic.spectrum.txt',
                'mineral.sulfate.none.coarse.all.gypsumc.usgs.perknic.spectrum.txt',
                'mineral.sulfate.none.coarse.all.nmnh92669-4.usgs.perknic.spectrum.txt',
                'mineral.sulfate.none.coarse.all.none.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup1.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup2.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup3.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup4.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup5.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup6.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup7.usgs.perknic.spectrum.txt',
                'rock.igneous.felsic.solid.all.cup8.usgs.perknic.spectrum.txt',
                'rock.igneous.intermediate.fine.all.andesite_h1.jhu.becknic.spectrum.txt',
                'rock.igneous.mafic.solid.all.cup9.usgs.perknic.spectrum.txt',
                'rock.metamorphic.schist.coarse.all.schist9.jhu.becknic.spectrum.txt',
                'rock.sedimentary.travertine.fine.all.travertine_1.jhu.becknic.spectrum.txt',
                'soil.inceptisol.ustocrept.none.all.82p2671.jhu.becknic.spectrum.txt',
                'water.frost.none.fine.all.frost_snw_.jhu.becknic.spectrum.txt',
                'water.snow.coarsegranular.coarse.all.coarse_snw_.jhu.becknic.spectrum.txt',
                'water.snow.finegranular.fine.all.fine_snw_.jhu.becknic.spectrum.txt',
                'water.snow.mediumgranular.medium.all.medgran_snw_.jhu.becknic.spectrum.txt']
inDir = "F:\\Dropbox\Analysis\\ECOSTRESS Spectral Library\\ECOSTRESS Spectral Library Files\\"
fix_files(inDir, problemFiles)

Processing mineral.borate.none.coarse.all.nmnh102876-2.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.nmnh123943.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.nmnh137833.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.nmnh160075.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.nmnh170164.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.nmnh96075-3.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.none.usgs.perknic.spectrum.txt
Processing mineral.borate.none.coarse.all.ulexite.usgs.perknic.spectrum.txt
Processing mineral.carbonate.none.coarse.all.nmnh105598.usgs.perknic.spectrum.txt
Processing mineral.carbonate.none.coarse.all.traonac.usgs.perknic.spectrum.txt
Processing mineral.chloride.none.coarse.all.nmnh83904.usgs.perknic.spectrum.txt
Processing mineral.chloride.none.coarse.all.nmnh98011.usgs.perknic.spectrum.txt
Processing mineral.sulfate.none.coarse.all.eugstec.us