Skip to content

Commit

Permalink
added week1 code. Run _test_functions_wk1.py to run
Browse files Browse the repository at this point in the history
  • Loading branch information
nmatzke committed Jun 2, 2009
1 parent acadc1b commit 4d963a6
Show file tree
Hide file tree
Showing 9 changed files with 49,049 additions and 0 deletions.
124 changes: 124 additions & 0 deletions Bio/Geography/_test_functions_wk1.py
@@ -0,0 +1,124 @@
#!/usr/bin/python
import os
from geogUtils import readshpfile, summarize_shapefile, point_inside_polygon, shapefile_points_in_poly

print ""
print ""
print "Running _test_shp.py..."
print ""



# =====================================
# Processing shapefiles
# =====================================

print "1. Opening and summarizing shapefiles."

# Get path/name of shapefile
fn = 'placept_shp/placept.shp'

# Get the records
shpRecords = readshpfile(fn)

# now do whatever you want with the resulting data
print "Shapefile name: ", fn
print "Length of shapefile dictionary: ", len(shpRecords)

# print out the first feature in this shapefile
print shpRecords[0]['dbf_data']
for part in shpRecords[0]['shp_data']:
print part, shpRecords[0]['shp_data'][part]

# get list of coordinates for each feature and print them out
shplist = summarize_shapefile(fn, 'tolist', 'temp.txt')
print shplist

# print coordinates directly to screen
summarize_shapefile(fn, 'toscreen', 'temp.txt')

# print coordinates to file
printfn = summarize_shapefile(fn, 'tofile', 'temp.txt')
os.system('head ' + printfn)






# =====================================
# Point-in-polygon operation
# =====================================

print ""
print "2. Point-in-polygon operation."

# Set up polygon
ul = (-80, 80)
ur = (0, 80)
ll = (-80, 40)
lr = (0, 40)
poly = [ul, ur, ll, lr]

shapefile_points_in_poly(shpRecords, poly)






# =====================================
# Read & parse a GBIF XML file (DarwinCore format)
# =====================================
print ""
print "3. Extracting lat/longs from XML file (DarwinCore format)."

from xml.etree import ElementTree as ET
from geogUtils import print_subelements, extract_latlong
import os

xml_file = 'utric_search_v3.xml'

# Name of file to output lats/longs to
outfilename = 'latlongs.txt'

try:
xmltree = ET.parse(xml_file)
except Exception, inst:
print "Unexpected error opening %s: %s" % (xml_file, inst)

outfh = open(outfilename, 'w')
for element in xmltree.getroot():
extract_latlong(element, outfh)
outfh.close()


#
os.system('head latlongs.txt')
os.system('tail latlongs.txt')







# =====================================
# Classify GBIF records as to whether or not they fall inside region of interest
# =====================================
print ""
print "4. Do the GBIF records fall inside the region of interest?"

from geogUtils import tablefile_points_in_poly

# Open the lat/longs table
outfh = open(outfilename, 'r')

tablefile_points_in_poly(outfh, 0, 1, 2, poly)

outfh.close()





113 changes: 113 additions & 0 deletions Bio/Geography/dbfUtils.py
@@ -0,0 +1,113 @@
import struct, datetime, decimal, itertools

def dbfreader(f):
"""Returns an iterator over records in a Xbase DBF file.
The first row returned contains the field names.
The second row contains field specs: (type, size, decimal places).
Subsequent rows contain the data records.
If a record is marked as deleted, it is skipped.
File should be opened for binary reads.
"""
# See DBF format spec at:
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT

numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
numfields = (lenheader - 33) // 32

fields = []
for fieldno in xrange(numfields):
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
name = name.replace('\0', '') # eliminate NULs from string
fields.append((name, typ, size, deci))
yield [field[0] for field in fields]
yield [tuple(field[1:]) for field in fields]

terminator = f.read(1)
assert terminator == '\r'

fields.insert(0, ('DeletionFlag', 'C', 1, 0))
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
fmtsiz = struct.calcsize(fmt)
for i in xrange(numrec):
record = struct.unpack(fmt, f.read(fmtsiz))
if record[0] != ' ':
continue # deleted record
result = []
for (name, typ, size, deci), value in itertools.izip(fields, record):
if name == 'DeletionFlag':
continue
if typ == "N":
value = value.replace('\0', '').lstrip()
if value == '':
value = 0
elif deci:
value = decimal.Decimal(value)
else:
value = int(value)
elif typ == 'D':
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
value = datetime.date(y, m, d)
elif typ == 'L':
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
result.append(value)
yield result


def dbfwriter(f, fieldnames, fieldspecs, records):
""" Return a string suitable for writing directly to a binary dbf file.
File f should be open for writing in a binary mode.
Fieldnames should be no longer than ten characters and not include \x00.
Fieldspecs are in the form (type, size, deci) where
type is one of:
C for ascii character data
M for ascii character memo data (real memo fields not supported)
D for datetime objects
N for ints or decimal objects
L for logical values 'T', 'F', or '?'
size is the field width
deci is the number of decimal places in the provided decimal object
Records can be an iterable over the records (sequences of field values).
"""
# header info
ver = 3
now = datetime.datetime.now()
yr, mon, day = now.year-1900, now.month, now.day
numrec = len(records)
numfields = len(fieldspecs)
lenheader = numfields * 32 + 33
lenrecord = sum(field[1] for field in fieldspecs) + 1
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
f.write(hdr)

# field specs
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
name = name.ljust(11, '\x00')
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
f.write(fld)

# terminator
f.write('\r')

# records
for record in records:
f.write(' ') # deletion flag
for (typ, size, deci), value in itertools.izip(fieldspecs, record):
if typ == "N":
value = str(value).rjust(size, ' ')
elif typ == 'D':
value = value.strftime('%Y%m%d')
elif typ == 'L':
value = str(value)[0].upper()
else:
value = str(value)[:size].ljust(size, ' ')
assert len(value) == size
f.write(value)

# End of file
f.write('\x1A')

0 comments on commit 4d963a6

Please sign in to comment.