forked from biopython/biopython
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added week1 code. Run _test_functions_wk1.py to run
- Loading branch information
Showing
9 changed files
with
49,049 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#!/usr/bin/python | ||
import os | ||
from geogUtils import readshpfile, summarize_shapefile, point_inside_polygon, shapefile_points_in_poly | ||
|
||
print "" | ||
print "" | ||
print "Running _test_shp.py..." | ||
print "" | ||
|
||
|
||
|
||
# ===================================== | ||
# Processing shapefiles | ||
# ===================================== | ||
|
||
print "1. Opening and summarizing shapefiles." | ||
|
||
# Get path/name of shapefile | ||
fn = 'placept_shp/placept.shp' | ||
|
||
# Get the records | ||
shpRecords = readshpfile(fn) | ||
|
||
# now do whatever you want with the resulting data | ||
print "Shapefile name: ", fn | ||
print "Length of shapefile dictionary: ", len(shpRecords) | ||
|
||
# print out the first feature in this shapefile | ||
print shpRecords[0]['dbf_data'] | ||
for part in shpRecords[0]['shp_data']: | ||
print part, shpRecords[0]['shp_data'][part] | ||
|
||
# get list of coordinates for each feature and print them out | ||
shplist = summarize_shapefile(fn, 'tolist', 'temp.txt') | ||
print shplist | ||
|
||
# print coordinates directly to screen | ||
summarize_shapefile(fn, 'toscreen', 'temp.txt') | ||
|
||
# print coordinates to file | ||
printfn = summarize_shapefile(fn, 'tofile', 'temp.txt') | ||
os.system('head ' + printfn) | ||
|
||
|
||
|
||
|
||
|
||
|
||
# ===================================== | ||
# Point-in-polygon operation | ||
# ===================================== | ||
|
||
print "" | ||
print "2. Point-in-polygon operation." | ||
|
||
# Set up polygon | ||
ul = (-80, 80) | ||
ur = (0, 80) | ||
ll = (-80, 40) | ||
lr = (0, 40) | ||
poly = [ul, ur, ll, lr] | ||
|
||
shapefile_points_in_poly(shpRecords, poly) | ||
|
||
|
||
|
||
|
||
|
||
|
||
# ===================================== | ||
# Read & parse a GBIF XML file (DarwinCore format) | ||
# ===================================== | ||
print "" | ||
print "3. Extracting lat/longs from XML file (DarwinCore format)." | ||
|
||
from xml.etree import ElementTree as ET | ||
from geogUtils import print_subelements, extract_latlong | ||
import os | ||
|
||
xml_file = 'utric_search_v3.xml' | ||
|
||
# Name of file to output lats/longs to | ||
outfilename = 'latlongs.txt' | ||
|
||
try: | ||
xmltree = ET.parse(xml_file) | ||
except Exception, inst: | ||
print "Unexpected error opening %s: %s" % (xml_file, inst) | ||
|
||
outfh = open(outfilename, 'w') | ||
for element in xmltree.getroot(): | ||
extract_latlong(element, outfh) | ||
outfh.close() | ||
|
||
|
||
# | ||
os.system('head latlongs.txt') | ||
os.system('tail latlongs.txt') | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# ===================================== | ||
# Classify GBIF records as to whether or not they fall inside region of interest | ||
# ===================================== | ||
print "" | ||
print "4. Do the GBIF records fall inside the region of interest?" | ||
|
||
from geogUtils import tablefile_points_in_poly | ||
|
||
# Open the lat/longs table | ||
outfh = open(outfilename, 'r') | ||
|
||
tablefile_points_in_poly(outfh, 0, 1, 2, poly) | ||
|
||
outfh.close() | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import struct, datetime, decimal, itertools | ||
|
||
def dbfreader(f): | ||
"""Returns an iterator over records in a Xbase DBF file. | ||
The first row returned contains the field names. | ||
The second row contains field specs: (type, size, decimal places). | ||
Subsequent rows contain the data records. | ||
If a record is marked as deleted, it is skipped. | ||
File should be opened for binary reads. | ||
""" | ||
# See DBF format spec at: | ||
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT | ||
|
||
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32)) | ||
numfields = (lenheader - 33) // 32 | ||
|
||
fields = [] | ||
for fieldno in xrange(numfields): | ||
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32)) | ||
name = name.replace('\0', '') # eliminate NULs from string | ||
fields.append((name, typ, size, deci)) | ||
yield [field[0] for field in fields] | ||
yield [tuple(field[1:]) for field in fields] | ||
|
||
terminator = f.read(1) | ||
assert terminator == '\r' | ||
|
||
fields.insert(0, ('DeletionFlag', 'C', 1, 0)) | ||
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields]) | ||
fmtsiz = struct.calcsize(fmt) | ||
for i in xrange(numrec): | ||
record = struct.unpack(fmt, f.read(fmtsiz)) | ||
if record[0] != ' ': | ||
continue # deleted record | ||
result = [] | ||
for (name, typ, size, deci), value in itertools.izip(fields, record): | ||
if name == 'DeletionFlag': | ||
continue | ||
if typ == "N": | ||
value = value.replace('\0', '').lstrip() | ||
if value == '': | ||
value = 0 | ||
elif deci: | ||
value = decimal.Decimal(value) | ||
else: | ||
value = int(value) | ||
elif typ == 'D': | ||
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8]) | ||
value = datetime.date(y, m, d) | ||
elif typ == 'L': | ||
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?' | ||
result.append(value) | ||
yield result | ||
|
||
|
||
def dbfwriter(f, fieldnames, fieldspecs, records): | ||
""" Return a string suitable for writing directly to a binary dbf file. | ||
File f should be open for writing in a binary mode. | ||
Fieldnames should be no longer than ten characters and not include \x00. | ||
Fieldspecs are in the form (type, size, deci) where | ||
type is one of: | ||
C for ascii character data | ||
M for ascii character memo data (real memo fields not supported) | ||
D for datetime objects | ||
N for ints or decimal objects | ||
L for logical values 'T', 'F', or '?' | ||
size is the field width | ||
deci is the number of decimal places in the provided decimal object | ||
Records can be an iterable over the records (sequences of field values). | ||
""" | ||
# header info | ||
ver = 3 | ||
now = datetime.datetime.now() | ||
yr, mon, day = now.year-1900, now.month, now.day | ||
numrec = len(records) | ||
numfields = len(fieldspecs) | ||
lenheader = numfields * 32 + 33 | ||
lenrecord = sum(field[1] for field in fieldspecs) + 1 | ||
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord) | ||
f.write(hdr) | ||
|
||
# field specs | ||
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs): | ||
name = name.ljust(11, '\x00') | ||
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci) | ||
f.write(fld) | ||
|
||
# terminator | ||
f.write('\r') | ||
|
||
# records | ||
for record in records: | ||
f.write(' ') # deletion flag | ||
for (typ, size, deci), value in itertools.izip(fieldspecs, record): | ||
if typ == "N": | ||
value = str(value).rjust(size, ' ') | ||
elif typ == 'D': | ||
value = value.strftime('%Y%m%d') | ||
elif typ == 'L': | ||
value = str(value)[0].upper() | ||
else: | ||
value = str(value)[:size].ljust(size, ' ') | ||
assert len(value) == size | ||
f.write(value) | ||
|
||
# End of file | ||
f.write('\x1A') |
Oops, something went wrong.