Skip to content
This repository has been archived by the owner on Apr 17, 2023. It is now read-only.

Commit

Permalink
pandas 0.25 compatibility (#84)
Browse files Browse the repository at this point in the history
* Replace df.__bytes__() with bytes()

* Update pandas versions for automated testing

* Remove duplicate tox dep

* use pandas.utils hash function

* clean

* update check if hash_init was set

* Make df hash a single int

* hash on object

* pandas >= 0.25 fix

* clean

* update ndarray only if needed

* Ignore notebook checkpoints

* Update version and changelog

* Update df hashing

* Remove default from hash call
  • Loading branch information
Spayralbe committed Oct 17, 2019
1 parent 5958501 commit a4a8b7b
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,6 @@ to_pip.sh
*.ipynb
*.log
~$*.xlsx
.ipynb_checkpoints/
!tmtk/arborist/static/jstree/dist
!examples/*.ipynb
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ env:
- TOX_ENV=pandas0.22
- TOX_ENV=pandas0.23
- TOX_ENV=pandas0.24
- TOX_ENV=pandas0.25

install:
- pip install tox-travis codecov coverage
Expand Down
4 changes: 4 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Changelog
=========


.. topic:: Version 0.5.7

* Make compatible with pandas 0.25

.. topic:: Version 0.5.6

* Add dimension_type and sort_order columns to the dimension_description
Expand Down
4 changes: 4 additions & 0 deletions tmtk/toolbox/template_reader/sheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ def get_level_columns(self, column_name):
column_index = self.df.columns.get_loc(column_name)
lower_columns = self.df.columns[:column_index].str.lower()
level_columns = lower_columns.str.contains('level') & ~lower_columns.str.contains('metadata')
# Make the array of booleans equal length to the number of columns in the full df
missing_false_values = len(self.df.columns) - len(level_columns)
if missing_false_values > 0:
level_columns = pd.np.append(level_columns, [False] * missing_false_values)
return level_columns


Expand Down
17 changes: 12 additions & 5 deletions tmtk/utils/filebase.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import os
from hashlib import sha256

import pandas as pd
from pandas.util import hash_pandas_object

from . import file2df, df2file, cached_property, Message


def hash_df_to_single_int(df) -> int:
return int.from_bytes(sha256(pd.util.hash_pandas_object(df).values).digest(), 'big')


class FileBase:
"""
Super class with shared utilities for file objects.
Expand All @@ -23,7 +30,7 @@ def _df(self):
Message.okay("Creating dataframe for: {}".format(self))
df = self.create_df()
df = self._df_processing(df)
self._hash_init = hash(df.__bytes__())
self._hash_init = hash_df_to_single_int(df)
return df

@property
Expand All @@ -36,7 +43,7 @@ def df(self, value):
if not isinstance(value, pd.DataFrame):
raise TypeError('Expected pd.DataFrame object.')
value = self._df_processing(value)
self._hash_init = self._hash_init or 1
self._hash_init = self._hash_init if self._hash_init is not None else 1
self._df = value

def _df_processing(self, df):
Expand All @@ -57,14 +64,14 @@ def _df_processing(self, df):
return df

def __hash__(self):
return hash(self.df.__bytes__())
return hash_df_to_single_int(self.df)

@property
def df_has_changed(self):
if not self._hash_init:
if self._hash_init is None:
return False
else:
return hash(self) != self._hash_init
return hash_df_to_single_int(self.df) != self._hash_init

@property
def header(self):
Expand Down
2 changes: 1 addition & 1 deletion tmtk/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
(see accompanying file LICENSE).
"""

version_info = (0, 5, 6)
version_info = (0, 5, 7)
__version__ = '.'.join(map(str, version_info))
5 changes: 2 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
[tox]
envlist=pandas{0.20,0.21,0.22,0.23,0.24}
envlist=pandas{0.22,0.23,0.24,0.25}

[testenv]
commands = coverage run --branch --omit={envdir}/*,tests/*.py -m unittest discover tests "*_tests.py"
deps =
coverage
pandas0.20: pandas>=0.20.0,<0.21.0
pandas0.21: pandas>=0.21.0,<0.22.0
pandas0.22: pandas>=0.22.0,<0.23.0
pandas0.23: pandas>=0.23.0,<0.24.0
pandas0.24: pandas>=0.24.0,<0.25.0
pandas0.25: pandas>=0.25.0,<0.26.0
-rrequirements-dev.txt

0 comments on commit a4a8b7b

Please sign in to comment.