added week1 code. Run _test_functions_wk1.py to run

nmatzke · Jun 2, 2009 · 4d963a6 · 4d963a6
1 parent acadc1b
commit 4d963a6
Show file tree

Hide file tree

Showing 9 changed files with 49,049 additions and 0 deletions.
diff --git a/Bio/Geography/_test_functions_wk1.py b/Bio/Geography/_test_functions_wk1.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+import os
+from geogUtils import readshpfile, summarize_shapefile, point_inside_polygon, shapefile_points_in_poly
+
+print ""
+print ""
+print "Running _test_shp.py..."
+print ""
+
+
+
+# =====================================
+# Processing shapefiles
+# =====================================
+
+print "1. Opening and summarizing shapefiles."
+
+# Get path/name of shapefile
+fn = 'placept_shp/placept.shp'
+
+# Get the records
+shpRecords = readshpfile(fn)
+
+# now do whatever you want with the resulting data
+print "Shapefile name: ", fn
+print "Length of shapefile dictionary: ", len(shpRecords)
+
+# print out the first feature in this shapefile
+print shpRecords[0]['dbf_data']
+for part in shpRecords[0]['shp_data']:
+	print part, shpRecords[0]['shp_data'][part]
+
+# get list of coordinates for each feature and print them out
+shplist = summarize_shapefile(fn, 'tolist', 'temp.txt')
+print shplist
+
+# print coordinates directly to screen
+summarize_shapefile(fn, 'toscreen', 'temp.txt')
+
+# print coordinates to file
+printfn = summarize_shapefile(fn, 'tofile', 'temp.txt')
+os.system('head ' + printfn)
+
+
+
+
+
+
+# =====================================
+# Point-in-polygon operation
+# =====================================
+
+print ""
+print "2. Point-in-polygon operation."
+
+# Set up polygon
+ul = (-80, 80)
+ur = (0, 80)
+ll = (-80, 40)
+lr = (0, 40)
+poly = [ul, ur, ll, lr]
+
+shapefile_points_in_poly(shpRecords, poly)
+
+
+
+
+
+
+# =====================================
+# Read & parse a GBIF XML file (DarwinCore format)
+# =====================================
+print ""
+print "3. Extracting lat/longs from XML file (DarwinCore format)."
+
+from xml.etree import ElementTree as ET
+from geogUtils import print_subelements, extract_latlong
+import os
+
+xml_file = 'utric_search_v3.xml'
+
+# Name of file to output lats/longs to
+outfilename = 'latlongs.txt'
+
+try:
+	xmltree = ET.parse(xml_file)
+except Exception, inst:
+	print "Unexpected error opening %s: %s" % (xml_file, inst)
+
+outfh = open(outfilename, 'w')
+for element in xmltree.getroot():
+	extract_latlong(element, outfh)
+outfh.close()
+
+
+# 
+os.system('head latlongs.txt')
+os.system('tail latlongs.txt')
+
+
+
+
+
+
+
+# =====================================
+# Classify GBIF records as to whether or not they fall inside region of interest
+# =====================================
+print ""
+print "4. Do the GBIF records fall inside the region of interest?"
+
+from geogUtils import tablefile_points_in_poly
+
+# Open the lat/longs table
+outfh = open(outfilename, 'r')
+
+tablefile_points_in_poly(outfh, 0, 1, 2, poly)
+
+outfh.close()
+
+
+
+
+
diff --git a/Bio/Geography/dbfUtils.py b/Bio/Geography/dbfUtils.py
@@ -0,0 +1,113 @@
+import struct, datetime, decimal, itertools
+
+def dbfreader(f):
+    """Returns an iterator over records in a Xbase DBF file.
+
+    The first row returned contains the field names.
+    The second row contains field specs: (type, size, decimal places).
+    Subsequent rows contain the data records.
+    If a record is marked as deleted, it is skipped.
+
+    File should be opened for binary reads.
+
+    """
+    # See DBF format spec at:
+    #     http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT
+
+    numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))    
+    numfields = (lenheader - 33) // 32
+
+    fields = []
+    for fieldno in xrange(numfields):
+        name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
+        name = name.replace('\0', '')       # eliminate NULs from string   
+        fields.append((name, typ, size, deci))
+    yield [field[0] for field in fields]
+    yield [tuple(field[1:]) for field in fields]
+
+    terminator = f.read(1)
+    assert terminator == '\r'
+
+    fields.insert(0, ('DeletionFlag', 'C', 1, 0))
+    fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
+    fmtsiz = struct.calcsize(fmt)
+    for i in xrange(numrec):
+        record = struct.unpack(fmt, f.read(fmtsiz))
+        if record[0] != ' ':
+            continue                        # deleted record
+        result = []
+        for (name, typ, size, deci), value in itertools.izip(fields, record):
+            if name == 'DeletionFlag':
+                continue
+            if typ == "N":
+                value = value.replace('\0', '').lstrip()
+                if value == '':
+                    value = 0
+                elif deci:
+                    value = decimal.Decimal(value)
+                else:
+                    value = int(value)
+            elif typ == 'D':
+                y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
+                value = datetime.date(y, m, d)
+            elif typ == 'L':
+                value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
+            result.append(value)
+        yield result
+
+
+def dbfwriter(f, fieldnames, fieldspecs, records):
+    """ Return a string suitable for writing directly to a binary dbf file.
+
+    File f should be open for writing in a binary mode.
+
+    Fieldnames should be no longer than ten characters and not include \x00.
+    Fieldspecs are in the form (type, size, deci) where
+        type is one of:
+            C for ascii character data
+            M for ascii character memo data (real memo fields not supported)
+            D for datetime objects
+            N for ints or decimal objects
+            L for logical values 'T', 'F', or '?'
+        size is the field width
+        deci is the number of decimal places in the provided decimal object
+    Records can be an iterable over the records (sequences of field values).
+    
+    """
+    # header info
+    ver = 3
+    now = datetime.datetime.now()
+    yr, mon, day = now.year-1900, now.month, now.day
+    numrec = len(records)
+    numfields = len(fieldspecs)
+    lenheader = numfields * 32 + 33
+    lenrecord = sum(field[1] for field in fieldspecs) + 1
+    hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
+    f.write(hdr)
+
+    # field specs
+    for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
+        name = name.ljust(11, '\x00')
+        fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
+        f.write(fld)
+
+    # terminator
+    f.write('\r')
+
+    # records
+    for record in records:
+        f.write(' ')                        # deletion flag
+        for (typ, size, deci), value in itertools.izip(fieldspecs, record):
+            if typ == "N":
+                value = str(value).rjust(size, ' ')
+            elif typ == 'D':
+                value = value.strftime('%Y%m%d')
+            elif typ == 'L':
+                value = str(value)[0].upper()
+            else:
+                value = str(value)[:size].ljust(size, ' ')
+            assert len(value) == size
+            f.write(value)
+
+    # End of file
+    f.write('\x1A')