### Extend the DataFrame Class

In [5]:
import pandas as pd
import pickle

class SuperDataFrame(pd.DataFrame):
    _metadata = ['version', 'author', 'notes']

    def __init__(self, *args, **kwargs):
        self.version = None
        self.author = None
        self.notes = None
        super().__init__(*args, **kwargs)

    def add_version(self, version):
        self.version = version

    def add_author(self, author):
        self.author = author

    def add_notes(self, notes):
        self.notes = notes

    def __getstate__(self):
        state = super().__getstate__()
        state.update({attr: getattr(self, attr) for attr in self._metadata})
        return state

    def __setstate__(self, state):
        for attr in self._metadata:
            setattr(self, attr, state.pop(attr, None))
        super().__setstate__(state)

In [6]:
# Create an instance of SuperDataFrame
df = SuperDataFrame(data={'A': [1, 2, 3], 'B': [4, 5, 6]})

# Add attributes
df.add_version('1.0')
df.add_author('John Doe')
df.add_notes('Sample dataframe')

# Pickle the object
with open('mydataframe.pkl', 'wb') as f:
    pickle.dump(df, f)

# Unpickle the object
with open('mydataframe.pkl', 'rb') as f:
    df_restored = pickle.load(f)

In [7]:
df_restored

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [8]:
df_restored.version

'1.0'

### We can make some attributes essential for posting to dataverse etc.

In [9]:
import pandas as pd
import pickle

class SuperDataFrame(pd.DataFrame):
    _metadata = ['version', 'author', 'notes']

    def __init__(self, *args, version=None, author=None, notes=None, **kwargs):
        if version is None or author is None or notes is None:
            raise ValueError("version, author, and notes are required attributes")
        self.version = version
        self.author = author
        self.notes = notes
        super().__init__(*args, **kwargs)

    def __getstate__(self):
        state = super().__getstate__()
        state.update({attr: getattr(self, attr) for attr in self._metadata})
        return state

    def __setstate__(self, state):
        for attr in self._metadata:
            setattr(self, attr, state.pop(attr, None))
        super().__setstate__(state)

In [10]:
# Create an instance of SuperDataFrame
df = SuperDataFrame(data={'A': [1, 2, 3], 'B': [4, 5, 6]}, 
                    version='1.0', 
                    author='John Doe')

ValueError: version, author, and notes are required attributes