In [5]:
# Sample dictionary with two lists
data = {
    'x': [6.28318, 2.71828, 1],
    'y': [2, 3, 5]
}

import pickle

# Save the dictionary into a pickle file
with open('data.p', 'wb') as file:   #(wb = writing and binary mode) 
    pickle.dump(data, file)

Cant open pickle files with notebook. Need to use load to view it:

In [6]:
# Load the pickle file
with open('data.p', 'rb') as file:     #reading not writing
    data = pickle.load(file)

print(data)

{'x': [6.28318, 2.71828, 1], 'y': [2, 3, 5]}


Now able to edit using python code

In [7]:
# Add a new element to each list
data['x'].append(0)
data['y'].append(7)

In [8]:
# Save our modifications
with open('data.p', 'wb') as file:
    pickle.dump(data, file)

**Numpy npy files**

Unlike Pickle files, it doesn’t duplicate the data in memory before loading or saving the file which is very convenient for large datasets.
It implements memory-mapping which allows reading small parts of a large dataset without loading the entire file into memory.

In [9]:
import numpy as np

# Create the Numpy array
data = np.array([6.28318, 2.71828, 1], dtype=np.float16)

In [10]:
# Save it into a .npy file
np.save('data.npy', data)

Do not need to open file beforehand as numpy does automatically

In [11]:
# Read it
np.load('data.npy') # array([6.28 , 2.719, 1.   ], dtype=float16)

array([6.28 , 2.719, 1.   ], dtype=float16)

In [12]:
data = {
    'x': np.array([6.28318, 2.71828, 1], dtype=np.float16),
    'y': np.array([2, 3, 5])
}

# Save it into a .npy file
np.save('data.npy', data)

# Read it
np.load('data.npy',allow_pickle=True)

# Note: Numpy wraps the dictionary in an array of
# type `object` and uses pickle to save that object

array({'x': array([6.28 , 2.719, 1.   ], dtype=float16), 'y': array([2, 3, 5])},
      dtype=object)

Here the file has .npy extension but contains pickle object. By default disallows Pickle for security reasons.

**Numpy npz format**

Possible to store multiple arrays using this format

In [13]:
# Create two Numpy arrays
x = np.array([6.28318, 2.71828, 1], dtype=np.float16)
y = np.array([2, 3, 5])

In [15]:
# Save them into a .npz file
np.savez('data.npz', features=x, targets=y)    # features and targets are labels

In [16]:
# Load the npz file
with np.load('data.npz', allow_pickle=False) as npz_file:
    # It's a dictionary-like object
    print(list(npz_file.keys()))

    # Load the arrays
    print('x:', npz_file['features'])
    print('y:', npz_file['targets'])

['features', 'targets']
x: [6.28  2.719 1.   ]
y: [2 3 5]
