In [3]:
#! /usr/bin/env python3
# 
# This program reads in a file containing several columns of data, 
# and returns a file with decimal converted value and selected data fields.
# The process is: Read in each line of the example file, split it into
# separate components, and write certain output to a separate file

import re # Load regular expression module


# Functions must be defined before they are used
def decimalat(DegString):
    # This function requires that the re module is loaded
    # Take a string in the format "34 56.78 N" and return decimal degrees
    SearchStr='(\d+) ([\d\.]+) (\w)'
    Result = re.search(SearchStr, DegString) #(pattern, stringToSearchIn)

    # Get the captured character groups, as defined by the parentheses
    # in the regular expression, convert the numbers to floats, and 
    # assign them to variables with meaningful names
    Degrees = float(Result.group(1))
    Minutes = float(Result.group(2))
    Compass = Result.group(3).upper() # make sure it is capital too

    # Calculate the decimal degrees
    DecimalDegree = Degrees + Minutes/60

    # If the compass direction indicates the coordinate is South or
    # West, make the sign of the coordinate negative

    if Compass == 'S' or Compass == 'W':
        DecimalDegree = -DecimalDegree  
    return DecimalDegree
# End of the function definition

# Set the input file name
InFileName = 'Marrus_claudanielis.txt'

# Derive the output file name from the input file name
OutFileName = 'dec_' + InFileName

# Give the option to write to a file or just print to screen
WriteOutFile = True

# Open the input file
InFile = open(InFileName, 'r')

HeaderLine = 'dive\tdepth\tlatitude\tlongitude\tdate\tcomment'
print (HeaderLine)

# Open the output file. Do this outside the loop
if WriteOutFile:
    # Open the output file
    OutFile = open(OutFileName, 'w')
    OutFile.write(HeaderLine + '\n')

# Initialize the counter used to keep track of line numbers
LineNumber = 0

# Loop over each line in the file
for Line in InFile:
    # Check the line number, don't consider if it is first line
    if LineNumber > 0:
        # Remove the line ending characters
        # print line  # uncomment for debugging
        Line=Line.strip('\n')

        # Split the line into a list of ElementList, using tab as a delimiter
        ElementList = Line.split('\t')

        # Returns a list in this format:
        # ['Tiburon 596', '19-Jul-03', '36 36.12 N', '122 22.48 W', '1190', 'holotype']
        # print "ElementList:", ElementList  # uncomment for debugging

        Dive    = ElementList[0]
        Date    = ElementList[1]
        Depth   = ElementList[4]
        Comment = ElementList[5]

        LatDegrees = decimalat(ElementList[2])
        LonDegrees = decimalat(ElementList[3])
        # Create string to 5 decimal places, padded to 10 total characters
        # (using line continuation character \)
        OutString = "%s\t%4s\t%10.5f\t%10.5f\t%9s\t%s" % \
                         (Dive,Depth,LatDegrees,LonDegrees,Date,Comment)
        print (OutString)
        if WriteOutFile:
            OutFile.write(OutString + '\n') # remember the line feed!

    # another way to say LineNumber=LineNumber+1...
    LineNumber += 1 # this is outside the if, but inside the for loop

# Close the files
InFile.close()
if WriteOutFile:
    OutFile.close()

dive	depth	latitude	longitude	date	comment
Tiburon 596	1190	  36.60200	-122.37467	19-Jul-03	holotype
JSL II 1411	 518	  39.94000	 -70.23833	16-Sep-86	paratype
JSL II 930	 686	  40.08383	 -69.05017	18-Aug-84	Youngbluth (1989)
Ventana 1575	 767	  36.70400	-122.04200	11-Mar-99	
Ventana 1777	 934	  36.71000	-122.04500	16-Jun-00	
Ventana 2243	1001	  36.70800	-122.06400	 9-Sep-02	
Tiburon 515	1156	  36.70000	-122.03300	24-Nov-02	
Tiburon 531	1144	  24.31700	-109.20300	13-Mar-03	
Tiburon 547	1126	  24.23400	-109.66700	31-Mar-03	
JSL II 3457	 862	  40.29617	 -68.11133	26-Sep-03	Francesc Pages (pers.comm)
