Skip to content

Commit

Permalink
Fix hashing if the object has a name but the name is not a string. (#117
Browse files Browse the repository at this point in the history
)

* Fix hashing is the object has a name but it's not a string.

* Add test

* Use pandas hashing for series
  • Loading branch information
domoritz committed Sep 17, 2019
1 parent c66b548 commit ee4c4fa
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
6 changes: 5 additions & 1 deletion lib/streamlit/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ def _to_bytes(self, obj, context):
return b"bool:1"
elif obj is False:
return b"bool:0"
elif util.is_type(obj, "pandas.core.frame.Series"):
import pandas as pd
return pd.util.hash_pandas_object(obj).sum()
elif util.is_type(obj, "pandas.core.frame.DataFrame"):
import pandas as pd

Expand All @@ -251,7 +254,8 @@ def _to_bytes(self, obj, context):
elif inspect.isbuiltin(obj):
return self.to_bytes(obj.__name__)
elif hasattr(obj, "name") and (
isinstance(obj, io.IOBase) or os.path.exists(obj.name)
isinstance(obj, io.IOBase) or
(isinstance(obj.name, string_types) and os.path.exists(obj.name))
):
# Hash files as name + last modification date + offset.
h = hashlib.new(self.name)
Expand Down
10 changes: 9 additions & 1 deletion lib/tests/streamlit/hashing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,22 @@ def test_builtins(self):
self.assertEqual(get_hash(abs), get_hash(abs))
self.assertNotEqual(get_hash(abs), get_hash(type))

def test_pandas(self):
def test_pandas_dataframe(self):
df1 = pd.DataFrame({"foo": [12]})
df2 = pd.DataFrame({"foo": [42]})
df3 = pd.DataFrame({"foo": [12]})

self.assertEqual(get_hash(df1), get_hash(df3))
self.assertNotEqual(get_hash(df1), get_hash(df2))

def test_pandas_series(self):
series1 = pd.Series([1, 2])
series2 = pd.Series([1, 3])
series3 = pd.Series([1, 2])

self.assertEqual(get_hash(series1), get_hash(series3))
self.assertNotEqual(get_hash(series1), get_hash(series2))

def test_numpy(self):
np1 = np.zeros(10)
np2 = np.zeros(11)
Expand Down

0 comments on commit ee4c4fa

Please sign in to comment.