### serialization
        python objects - > flattened content  -- string/file

            dump  -- into a file
            dumps -- into a python string

    De-serialization
        string/file --> unflattened content -> python objects

            load  -- from a file
            loads -- from a python string



    Marshalling
    Pickle
    shelve
    xml
    json
    yaml

## Pickle

In [1]:
import pickle

In [2]:
help(pickle)

Help on module pickle:

NAME
    pickle - Create portable serialized representations of Python objects.

MODULE REFERENCE
    https://docs.python.org/3.10/library/pickle.html
    
    The following documentation is automatically generated from the Python
    source files.  It may be incomplete, incorrect or include features that
    are considered implementation detail and may vary between Python
    implementations.  When in doubt, consult the module reference at the
    location listed above.

DESCRIPTION
    See module copyreg for a mechanism for registering custom picklers.
    See module pickletools source for extensive comments.
    
    Classes:
    
        Pickler
        Unpickler
    
    Functions:
    
        dump(object, file)
        dumps(object) -> string
        load(file) -> object
        loads(bytes) -> object
    
    Misc variables:
    
        __version__
        format_version
        compatible_formats

CLASSES
    builtins.Exception(builtins.BaseException)


###  Pickle to string object

In [3]:
# Serialize an object
data = {"name": "John", "age": 30, "married": True}

str({"name": "John", "age": 30, "married": True})

"{'name': 'John', 'age': 30, 'married': True}"

In [4]:
serialized_data = pickle.dumps(data)

serialized_data

b'\x80\x04\x95&\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x04name\x94\x8c\x04John\x94\x8c\x03age\x94K\x1e\x8c\x07married\x94\x88u.'

In [5]:
# Deserialize the object
deserialized_data = pickle.loads(serialized_data)

deserialized_data

{'name': 'John', 'age': 30, 'married': True}

     Asignment: try all data types(basic and data structures) in python for pickling and unpickling

### Pickle to file

In [6]:
data = {"name": "John", "age": 30, "married": True}


# Write to a pickle file
with open("data.pickle", "wb") as fh:  # .pickle/ .pkl
    pickle.dump(data, fh)

In [7]:
! cat data.pickle

��&       }�(�name��John��age�K�married��u.

In [8]:
# Read from a pickle file
with open("data.pickle", "rb") as file:
    loaded_data = pickle.load(file)


loaded_data

{'name': 'John', 'age': 30, 'married': True}

### Pickling Multiple Objects

In [9]:
# Pickle multiple objects
data1 = {"name": "John", "age": 30}
data2 = {"name": "Alice", "age": 25}

serialized_data = pickle.dumps((data1, data2))
print(f"{serialized_data =}")

serialized_data =b'\x80\x04\x95/\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x04name\x94\x8c\x04John\x94\x8c\x03age\x94K\x1eu}\x94(h\x01\x8c\x05Alice\x94h\x03K\x19u\x86\x94.'


In [10]:
# Unpickle multiple objects
deserialized_data = pickle.loads(serialized_data)
obj1, obj2 = deserialized_data

print(f"{obj1 =}")
print(f"{obj2 =}")

obj1 ={'name': 'John', 'age': 30}
obj2 ={'name': 'Alice', 'age': 25}


### Pickling and Unpickling with Compression

In [11]:
import pickle
import gzip

data = {"name": "John", "age": 30, "married": True}

# Pickle with compression
with gzip.open("data.pickle.gz", "wb") as file:
    pickle.dump(data, file)

# Unpickle with compression
with gzip.open("data.pickle.gz", "rb") as file:
    loaded_data = pickle.load(file)

print(f"{loaded_data =}")

loaded_data ={'name': 'John', 'age': 30, 'married': True}


In [12]:
! cat data.pickle.gz

O�ld�data.pickle k`��� �S4zX�sS���x�g�M�aNLO��-�Þ�XT���2��T �w�Y1   

### Pickling and Unpickling Objects with External Dependencies

In [13]:
import pickle
import numpy as np

data = {"array": np.array([1, 2, 3])}

# Pickle an object with NumPy array
serialized_data = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
print("serialized_data =", serialized_data)

# Unpickle an object with NumPy array
deserialized_data = pickle.loads(serialized_data)
print("deserialized_data =", deserialized_data)

serialized_data = b'\x80\x05\x95\x96\x00\x00\x00\x00\x00\x00\x00}\x94\x8c\x05array\x94\x8c\x12numpy.core.numeric\x94\x8c\x0b_frombuffer\x94\x93\x94(\x96\x18\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x94\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02i8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x03\x85\x94\x8c\x01C\x94t\x94R\x94s.'
deserialized_data = {'array': array([1, 2, 3])}


### Handling Unpickling Errors

In [14]:
import pickle

# Try to unpickle an object, handle errors
try:
    with open("data.pickle", "rb") as file:
        loaded_data = pickle.load(file)
except (pickle.UnpicklingError, FileNotFoundError) as e:
    print(f"Error loading pickle file: {e}")

### Pickling and Unpickling Large Objects Incrementally

In [15]:
import pickle
import io

data = [1, 2, 3] * int(1e6)
chunk_size = 1000

# Pickle large object incrementally
serialized_data = io.BytesIO()
with pickle.Pickler(serialized_data) as pickler:
    for i in range(0, len(data), chunk_size):
        pickler.dump(data[i : i + chunk_size])

# Unpickle large object incrementally
serialized_data.seek(0)
with pickle.Unpickler(serialized_data) as unpickler:
    deserialized_data = []

AttributeError: __enter__

In [16]:
n1 = 212312323123

pickle_str = pickle.dumps(n1)
print(f"pickle_str      :{pickle_str}")

retrieved_obj = pickle.loads(pickle_str)
print(f"retrieved_obj   :{retrieved_obj} {type(retrieved_obj)}")

pickle_str      :b'\x80\x04\x95\x08\x00\x00\x00\x00\x00\x00\x00\x8a\x053\xf4\xccn1.'
retrieved_obj   :212312323123 <class 'int'>


In [17]:
n1 = {"asd", "cat", "bat", "rat"}

pickle_str = pickle.dumps(n1)
print(f"pickle_str      :{pickle_str}")

retrieved_obj = pickle.loads(pickle_str)
print(f"retrieved_obj   :{retrieved_obj} {type(retrieved_obj)}")

pickle_str      :b'\x80\x04\x95\x1d\x00\x00\x00\x00\x00\x00\x00\x8f\x94(\x8c\x03bat\x94\x8c\x03cat\x94\x8c\x03asd\x94\x8c\x03rat\x94\x90.'
retrieved_obj   :{'bat', 'cat', 'asd', 'rat'} <class 'set'>


In [18]:
pickle_str[-12:]

b'asd\x94\x8c\x03rat\x94\x90.'