In [23]:
import pandas as pd
import os
import pickle
from datetime import datetime

class GuepardDataFrame(pd.DataFrame):
    def __init__(self, *args, **kwargs):
        version_dir = kwargs.pop('version_dir', './versions')
        super().__init__(*args, **kwargs)
        self.current_version_path = os.path.join(version_dir, 'current_version.pkl')
        self.version_dir = version_dir
        self.versions_meta_file = os.path.join(version_dir, 'versions_meta.pkl')
        if not os.path.exists(self.version_dir):
            os.makedirs(self.version_dir)
        if 'data' in kwargs:
            self._load_data(kwargs['data'])
        else:
            self._load_current_version()
    
    def _load_data(self, data):
        super().__init__(data)
    
    def _load_current_version(self):
        if os.path.exists(self.current_version_path):
            with open(self.current_version_path, 'rb') as f:
                df = pickle.load(f)
            super().__init__(df)
    
    def commit(self, message=""):
        version_id = self._generate_version_id()
        self._save_current_version()
        self._store_version_meta(version_id, message)
        return version_id
    
    def _save_current_version(self):
        with open(self.current_version_path, 'wb') as f:
            pickle.dump(self, f)
    
    def _store_version_meta(self, version_id, message):
        versions_meta = self._load_versions_meta()
        versions_meta.append({'version_id': version_id, 'message': message, 'timestamp': datetime.now()})
        with open(self.versions_meta_file, 'wb') as f:
            pickle.dump(versions_meta, f)
    
    def _load_versions_meta(self):
        if os.path.exists(self.versions_meta_file):
            with open(self.versions_meta_file, 'rb') as f:
                return pickle.load(f)
        return []
    
    def list_versions(self):
        versions_meta = self._load_versions_meta()
        return [{'version_id': meta['version_id'], 'message': meta['message'], 'timestamp': meta['timestamp']} for meta in versions_meta]
    
    def rollback(self, version_id):
        version_path = os.path.join(self.version_dir, f"{version_id}.pkl")
        if not os.path.exists(version_path):
            raise ValueError("Version ID not found")
        with open(version_path, 'rb') as f:
            df = pickle.load(f)
        self._load_data(df)
        self._save_current_version()
    
    def save_version(self, version_id):
        version_path = os.path.join(self.version_dir, f"{version_id}.pkl")
        with open(version_path, 'wb') as f:
            pickle.dump(self, f)
    
    def _generate_version_id(self):
        return datetime.now().strftime("%Y%m%d_%H%M%S")

    def get_current_version(self):
        if os.path.exists(self.current_version_path):
            with open(self.current_version_path, 'rb') as f:
                df = pickle.load(f)
            return df
        else:
            return None

In [None]:

df = GuepardDataFrame(pd.read_csv("data.csv"), version_dir="./versions")

print("Initial DataFrame:")
print(df)

initial_version_id = df.commit("Initial version")
df.save_version(initial_version_id)

new_rows = pd.DataFrame({
    'id': [3, 4],
    'nom': ['alice', 'bob']
})



Initial DataFrame:
   id    nom
0   1   nour
1   2  kobbi


In [None]:
df=pd.concat([df, new_rows], ignore_index=True)
df = GuepardDataFrame(data=df, version_dir="./versions")
print(df)

   id    nom
0   1   nour
1   2  kobbi
2   3  alice
3   4    bob
   id    nom
0   1   nour
1   2  kobbi
2   3  alice
3   4    bob


In [26]:
df

Unnamed: 0,id,nom
0,1,nour
1,2,kobbi
2,3,alice
3,4,bob


In [27]:
# Commit the changes
new_version_id = df.commit("Added specific rows")
df.save_version(new_version_id)

In [28]:
# List versions
print("\nAvailable versions:")
for version in df.list_versions():
    print(version)


Available versions:
{'version_id': '20250328_033906', 'message': 'Initial version', 'timestamp': datetime.datetime(2025, 3, 28, 3, 39, 6, 470004)}
{'version_id': '20250328_033915', 'message': 'Added specific rows', 'timestamp': datetime.datetime(2025, 3, 28, 3, 39, 15, 927749)}


In [33]:
# Rollback to the initial version
df.rollback(version_id='20250328_033915')
print(f"\nDataFrame after rollback to version {initial_version_id}:")
print(df)


DataFrame after rollback to version 20250328_033906:
   id    nom
0   1   nour
1   2  kobbi
2   3  alice
3   4    bob


In [34]:
current_version = df.get_current_version()
print("\nCurrent Version DataFrame:")
print(current_version)



Current Version DataFrame:
None
