Skip to content
This repository

BUG: GH3493 fix Cannot append DataFrames with uint dtypes to HDFStore #3494

Merged
merged 2 commits into from 12 months ago

2 participants

Jeff Mellen jreback
Jeff Mellen

Fix for self-reported uint bug in HDFStore. #3493
Travis results (pass): https://travis-ci.org/jmellen/pandas/builds/6764554

jreback
Collaborator

looking good...yep...uint64 are a bit wonky and I cannot think of a use case anyhow....

jreback
Collaborator

pls add a mention in RELEASE.rst referencing the bug and issue

Jeff Mellen

Sorry for the delay this afternoon; here's the release note update. Thanks!

jreback jreback merged commit 75b5a8e into from
jreback jreback closed this
jreback
Collaborator
jreback commented

thanks for the PR!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
1  RELEASE.rst
Source Rendered
@@ -58,6 +58,7 @@ pandas 0.12.0
58 58
   - Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_)
59 59
   - Fix sorting in a frame with a list of columns which contains datetime64[ns] dtypes (GH3461_)
60 60
   - DataFrames fetched via FRED now handle '.' as a NaN. (GH3469_)
  61
+  - Fix issue when storing uint dtypes in an HDFStore. (GH3493_)
61 62
 
62 63
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
63 64
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
13  pandas/io/pytables.py
@@ -1284,8 +1284,17 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
1284 1284
     def convert_string_data(self, data, itemsize):
1285 1285
         return data.astype('S%s' % itemsize)
1286 1286
 
  1287
+    def get_atom_coltype(self):
  1288
+        """ return the PyTables column class for this column """
  1289
+        if self.kind.startswith('uint'):
  1290
+            col_name = "UInt%sCol" % self.kind[4:]
  1291
+        else:
  1292
+            col_name = "%sCol" % self.kind.capitalize()
  1293
+
  1294
+        return getattr(_tables(), col_name)
  1295
+
1287 1296
     def get_atom_data(self, block):
1288  
-        return getattr(_tables(), "%sCol" % self.kind.capitalize())(shape=block.shape[0])
  1297
+        return self.get_atom_coltype()(shape=block.shape[0])
1289 1298
 
1290 1299
     def set_atom_data(self, block):
1291 1300
         self.kind = block.dtype.name
@@ -1383,7 +1392,7 @@ def get_atom_string(self, block, itemsize):
1383 1392
         return _tables().StringCol(itemsize=itemsize)
1384 1393
 
1385 1394
     def get_atom_data(self, block):
1386  
-        return getattr(_tables(), "%sCol" % self.kind.capitalize())()
  1395
+        return self.get_atom_coltype()()
1387 1396
 
1388 1397
     def get_atom_datetime64(self, block):
1389 1398
         return _tables().Int64Col()
15  pandas/io/tests/test_pytables.py
@@ -458,6 +458,21 @@ def test_append(self):
458 458
             store.append('df', df)
459 459
             tm.assert_frame_equal(store['df'], df)
460 460
 
  461
+            # uints - test storage of uints
  462
+            uint_data = DataFrame({'u08' : Series(np.random.random_integers(0, high=255, size=5), dtype=np.uint8),
  463
+                                   'u16' : Series(np.random.random_integers(0, high=65535, size=5), dtype=np.uint16),
  464
+                                   'u32' : Series(np.random.random_integers(0, high=2**30, size=5), dtype=np.uint32),
  465
+                                   'u64' : Series([2**58, 2**59, 2**60, 2**61, 2**62], dtype=np.uint64)},
  466
+                                  index=np.arange(5))
  467
+            _maybe_remove(store, 'uints')
  468
+            store.append('uints', uint_data)
  469
+            tm.assert_frame_equal(store['uints'], uint_data)
  470
+
  471
+            # uints - test storage of uints in indexable columns
  472
+            _maybe_remove(store, 'uints')
  473
+            store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported
  474
+            tm.assert_frame_equal(store['uints'], uint_data)
  475
+
461 476
     def test_append_some_nans(self):
462 477
 
463 478
         with ensure_clean(self.path) as store:
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.