Skip to content
This repository has been archived by the owner on Apr 17, 2023. It is now read-only.

Commit

Permalink
Update df hashing
Browse files Browse the repository at this point in the history
  • Loading branch information
Spayralbe committed Oct 14, 2019
1 parent 352cad2 commit 91662c5
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions tmtk/utils/filebase.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import os
from hashlib import sha256

import pandas as pd
from pandas.util import hash_pandas_object

from . import file2df, df2file, cached_property, Message


def hash_df_to_single_int(df) -> int:
return int.from_bytes(sha256(pd.util.hash_pandas_object(df, index=True).values).digest(), 'big')


class FileBase:
"""
Super class with shared utilities for file objects.
Expand All @@ -25,7 +30,7 @@ def _df(self):
Message.okay("Creating dataframe for: {}".format(self))
df = self.create_df()
df = self._df_processing(df)
self._hash_init = int(hash_pandas_object(df).sum())
self._hash_init = hash_df_to_single_int(df)
return df

@property
Expand Down Expand Up @@ -59,14 +64,14 @@ def _df_processing(self, df):
return df

def __hash__(self):
return int(hash_pandas_object(self.df).sum())
return hash_df_to_single_int(self.df)

@property
def df_has_changed(self):
if self._hash_init is None:
return False
else:
return int(hash_pandas_object(self.df).sum()) != self._hash_init
return hash_df_to_single_int(self.df) != self._hash_init

@property
def header(self):
Expand Down

0 comments on commit 91662c5

Please sign in to comment.