In [6]:
pip install -q mmh3

Note: you may need to restart the kernel to use updated packages.


In [7]:
from SessionHandler import Session
import polars as pl

In [8]:
import mmh3
import polars as pl

def hash_pl_dataframe(df: pl.DataFrame, seed=42) -> str:
    """Hash a polars DataFrame. Due to the behaviour of pl.DataFrame.hash_rows
    this will only be consistent given a polars version.
    Args:
        df (pl.DataFrame): polars DataFrame to be hashed.
        seed (int, optional): Seed for the hash function.
    Returns:
        str: Hash of the polars DataFrame.
    """
    row_hashes = df.hash_rows(seed=seed)
    hasher = mmh3.mmh3_x64_128(seed=seed)
    for row_hash in row_hashes:
        hasher.update(row_hash.to_bytes(64, "little"))
    return hasher.digest().hex()

In [9]:
session = Session(max_versions=3)

session.set('a', 1)
session.set('b', 2)
print(session.show())  # {'a': 1, 'b': 2}

session.set('a', 10)
print(session.show())  # {'a': 10, 'b': 2}


{'a': 1, 'b': 2}
{'a': 10, 'b': 2}


In [16]:
dummy = {
    'a': [1, 2, 3],
    'b': [3, 4, 5],
    'c': ['gg', 'ez', 'go0g']
}
dummy_df = pl.DataFrame(dummy)
print(dummy_df)

shape: (3, 3)
┌─────┬─────┬──────┐
│ a   ┆ b   ┆ c    │
│ --- ┆ --- ┆ ---  │
│ i64 ┆ i64 ┆ str  │
╞═════╪═════╪══════╡
│ 1   ┆ 3   ┆ gg   │
│ 2   ┆ 4   ┆ ez   │
│ 3   ┆ 5   ┆ go0g │
└─────┴─────┴──────┘


In [11]:
type(dummy_df)

polars.dataframe.frame.DataFrame

In [23]:
session.set_hash_func(data_type=pl.DataFrame, hash_func=hash_pl_dataframe)

Hash function for <class 'polars.dataframe.frame.DataFrame'> has been added/modified.


In [24]:
session.set('data', dummy_df)

In [18]:
hash_pl_dataframe(dummy_df)

'e60190ded320cc3105b8ec6736bdabad'

In [25]:
session.__dict__

{'_data': {'a': 10,
  'b': 2,
  'data': shape: (3, 3)
  ┌─────┬─────┬──────┐
  │ a   ┆ b   ┆ c    │
  │ --- ┆ --- ┆ ---  │
  │ i64 ┆ i64 ┆ str  │
  ╞═════╪═════╪══════╡
  │ 1   ┆ 3   ┆ gg   │
  │ 2   ┆ 4   ┆ ez   │
  │ 3   ┆ 5   ┆ go0g │
  └─────┴─────┴──────┘},
 '_hashes': {'a': '1366d1c7cf5b1c1bf1470af568d02537',
  'b': '007d8c6c24a357e859f4873cf1e8a84a',
  'data': 'e60190ded320cc3105b8ec6736bdabad'},
 '_key_history': {'a': [None, 1],
  'b': [None],
  'data': [shape: (3, 3)
   ┌─────┬─────┬─────┐
   │ a   ┆ b   ┆ c   │
   │ --- ┆ --- ┆ --- │
   │ i64 ┆ i64 ┆ str │
   ╞═════╪═════╪═════╡
   │ 1   ┆ 3   ┆ gg  │
   │ 2   ┆ 4   ┆ ez  │
   │ 3   ┆ 5   ┆ gog │
   └─────┴─────┴─────┘,
   shape: (3, 3)
   ┌─────┬─────┬──────┐
   │ a   ┆ b   ┆ c    │
   │ --- ┆ --- ┆ ---  │
   │ i64 ┆ i64 ┆ str  │
   ╞═════╪═════╪══════╡
   │ 1   ┆ 3   ┆ gg   │
   │ 2   ┆ 4   ┆ ez   │
   │ 3   ┆ 5   ┆ go0g │
   └─────┴─────┴──────┘,
   shape: (3, 3)
   ┌─────┬─────┬──────┐
   │ a   ┆ b   ┆ c    │
   │ --- ┆ -

In [40]:
session.set('data', dummy_df)

In [41]:
session.get('data')

a,b,c
i64,i64,str
1,3,"""gg"""
2,4,"""ez"""
3,5,"""go0g"""


In [46]:
session.undo('data')

No more undos available for key: data


In [47]:
session.get('data')

a,b,c
i64,i64,str
1,3,"""gg"""
2,4,"""ez"""
3,5,"""gog"""
