@@ -809,6 +809,8 @@ def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
809809 If p is a scalar, the largest value of x less than or equal
810810 to the p percentage point in the sequence is returned.
811811 """
812+
813+
812814 x = npy .ravel (x )
813815 x .sort ()
814816 Nx = len (x )
@@ -1282,7 +1284,10 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=5, delimiter=',',
12821284 converterd, if not None, is a dictionary mapping column number or
12831285 munged column name to a converter function
12841286
1285- See examples/loadrec.py
1287+ names, if not None, is a list of header names. In this case, no
1288+ header will be read from the file
1289+
1290+ if no rows are found, None is returned See examples/loadrec.py
12861291 """
12871292
12881293 if converterd is None :
@@ -1291,9 +1296,42 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=5, delimiter=',',
12911296 import dateutil .parser
12921297 parsedate = dateutil .parser .parse
12931298
1299+
12941300 fh = cbook .to_filehandle (fname )
1295- reader = csv .reader (fh , delimiter = delimiter )
12961301
1302+
1303+ class FH :
1304+ """
1305+ for space delimited files, we want different behavior than
1306+ comma or tab. Generally, we want multiple spaces to be
1307+ treated as a single separator, whereas with comma and tab we
1308+ want multiple commas to return multiple (empty) fields. The
1309+ join/strip trick below effects this
1310+ """
1311+ def __init__ (self , fh ):
1312+ self .fh = fh
1313+
1314+ def close (self ):
1315+ self .fh .close ()
1316+
1317+ def seek (self , arg ):
1318+ self .fh .seek (arg )
1319+
1320+ def fix (self , s ):
1321+ return ' ' .join (s .split ())
1322+
1323+
1324+ def next (self ):
1325+ return self .fix (self .fh .next ())
1326+
1327+ def __iter__ (self ):
1328+ for line in self .fh :
1329+ yield self .fix (line )
1330+
1331+ if delimiter == ' ' :
1332+ fh = FH (fh )
1333+
1334+ reader = csv .reader (fh , delimiter = delimiter )
12971335 def process_skiprows (reader ):
12981336 if skiprows :
12991337 for i , row in enumerate (reader ):
@@ -1388,9 +1426,131 @@ def get_converters(reader):
13881426 rows .append ([func (val ) for func , val in zip (converters , row )])
13891427 fh .close ()
13901428
1429+ if not len (rows ):
1430+ return None
13911431 r = npy .rec .fromrecords (rows , names = names )
13921432 return r
13931433
1434+
1435+ def rec2csv (r , fname , delimiter = ',' ):
1436+ """
1437+ Save the data from numpy record array r into a comma/space/tab
1438+ delimited file. The record array dtype names will be used for
1439+ column headers.
1440+
1441+
1442+ fname - can be a filename or a file handle. Support for gzipped
1443+ files is automatic, if the filename ends in .gz
1444+ """
1445+ fh = cbook .to_filehandle (fname , 'w' )
1446+ writer = csv .writer (fh , delimiter = delimiter )
1447+ header = r .dtype .names
1448+ writer .writerow (header )
1449+ for row in r :
1450+ writer .writerow (map (str , row ))
1451+ fh .close ()
1452+
1453+ # some record array helpers
1454+ def rec_append_field (rec , name , arr , dtype = None ):
1455+ 'return a new record array with field name populated with data from array arr'
1456+ arr = npy .asarray (arr )
1457+ if dtype is None :
1458+ dtype = arr .dtype
1459+ newdtype = npy .dtype (rec .dtype .descr + [(name , dtype )])
1460+ newrec = npy .empty (rec .shape , dtype = newdtype )
1461+ for field in rec .dtype .fields :
1462+ newrec [field ] = rec [field ]
1463+ newrec [name ] = arr
1464+ return newrec .view (npy .recarray )
1465+
1466+
1467+ def rec_drop_fields (rec , names ):
1468+ 'return a new numpy record array with fields in names dropped'
1469+
1470+ names = set (names )
1471+ Nr = len (rec )
1472+
1473+ newdtype = npy .dtype ([(name , rec .dtype [name ]) for name in rec .dtype .names
1474+ if name not in names ])
1475+
1476+ newrec = npy .empty (Nr , dtype = newdtype )
1477+ for field in newdtype .names :
1478+ newrec [field ] = rec [field ]
1479+
1480+ return newrec .view (npy .recarray )
1481+
1482+
1483+ def rec_join (key , r1 , r2 ):
1484+ """
1485+ join record arrays r1 and r2 on key; key is a tuple of field
1486+ names. if r1 and r2 have equal values on all the keys in the key
1487+ tuple, then their fields will be merged into a new record array
1488+ containing the union of the fields of r1 and r2
1489+ """
1490+
1491+ for name in key :
1492+ if name not in r1 .dtype .names :
1493+ raise ValueError ('r1 does not have key field %s' % name )
1494+ if name not in r2 .dtype .names :
1495+ raise ValueError ('r2 does not have key field %s' % name )
1496+
1497+ def makekey (row ):
1498+ return tuple ([row [name ] for name in key ])
1499+
1500+
1501+ names = list (r1 .dtype .names ) + [name for name in r2 .dtype .names if name not in set (r1 .dtype .names )]
1502+
1503+
1504+
1505+ r1d = dict ([(makekey (row ),i ) for i ,row in enumerate (r1 )])
1506+ r2d = dict ([(makekey (row ),i ) for i ,row in enumerate (r2 )])
1507+
1508+ r1keys = set (r1d .keys ())
1509+ r2keys = set (r2d .keys ())
1510+
1511+ keys = r1keys & r2keys
1512+
1513+ r1ind = [r1d [k ] for k in keys ]
1514+ r2ind = [r2d [k ] for k in keys ]
1515+
1516+
1517+ r1 = r1 [r1ind ]
1518+ r2 = r2 [r2ind ]
1519+
1520+ r2 = rec_drop_fields (r2 , r1 .dtype .names )
1521+
1522+
1523+ def key_desc (name ):
1524+ 'if name is a string key, use the larger size of r1 or r2 before merging'
1525+ dt1 = r1 .dtype [name ]
1526+ if dt1 .type != npy .string_ :
1527+ return (name , dt1 .descr [0 ][1 ])
1528+
1529+ dt2 = r1 .dtype [name ]
1530+ assert dt2 == dt1
1531+ if dt1 .num > dt2 .num :
1532+ return (name , dt1 .descr [0 ][1 ])
1533+ else :
1534+ return (name , dt2 .descr [0 ][1 ])
1535+
1536+
1537+
1538+ keydesc = [key_desc (name ) for name in key ]
1539+
1540+ newdtype = npy .dtype (keydesc +
1541+ [desc for desc in r1 .dtype .descr if desc [0 ] not in key ] +
1542+ [desc for desc in r2 .dtype .descr if desc [0 ] not in key ] )
1543+
1544+
1545+ newrec = npy .empty (len (r1 ), dtype = newdtype )
1546+ for field in r1 .dtype .names :
1547+ newrec [field ] = r1 [field ]
1548+
1549+ for field in r2 .dtype .names :
1550+ newrec [field ] = r2 [field ]
1551+
1552+ return newrec .view (npy .recarray )
1553+
13941554def slopes (x ,y ):
13951555 """
13961556 SLOPES calculate the slope y'(x) Given data vectors X and Y SLOPES
0 commit comments