# Plain binary files

This file type requires the user to know the file content and data types beforehand. Numerical values and text are dumped into the file "as the computer sees them". This can lead to incompatibilities, because some (older) computers store data types with more than 1 byte "big endian" (most significant bits first), while modern computers use "little endian" (least significant bits first).


In [1]:
import numpy as np

In [2]:
# create a 3x4 array
a = np.array( [[1.0, 0.0, 5.5, 3.0],
               [-1.0, 3.14159, 32767, 65535],
               [0.0001, 0.001, 0.01, 0.1]])
print(a)

[[ 1.00000e+00  0.00000e+00  5.50000e+00  3.00000e+00]
 [-1.00000e+00  3.14159e+00  3.27670e+04  6.55350e+04]
 [ 1.00000e-04  1.00000e-03  1.00000e-02  1.00000e-01]]


In [3]:
def write_array(file, a, format='float'):
        """
        Write a NumPy array into a plain Python binary file.

        Parameters:
            file: str or file-like object
                The filename, fully qualified path, or a file-like object to write the array.
            a: numpy.ndarray
                The NumPy array to be written.
            format: str, optional
                The dtype format to read the data from the file. Default is 'float'='f8'.
                Numpy dtypes (e.g. f4, f8, i1, i2, i4, i8, u2, U) or Python standard types (int, float, str) can be used.

        Returns:
            None
        """
        with open(file, 'wb') as f:
            a_bytes = a.astype(format).tobytes()
            f.write(a_bytes)

            
def read_array(file, shape, format='float'):
    """
    Read a NumPy array from a plain Python binary file.

    Parameters:
        file: str or file-like object
            The filename, fully qualified path, or a file-like object to read the array.
        shape: tuple
            The shape of the NumPy array to be read.
        format: str, optional
            The dtype format to read the data from the file. Default is 'float32'.

    Returns:
        numpy.ndarray
            The NumPy array read from the file.
    """
    with open(file, 'rb') as f:
        a_bytes = f.read()
        a = np.frombuffer(a_bytes, dtype=format)
        return a.reshape(shape)

def bytes_from_file(file):
    """
    Open a binary file and read the byte buffer
    """
    with open(file, 'rb') as f:
        a_bytes = f.read()
    return a_bytes


In [4]:
format="float"
filename=f"test_{format}.dat"
write_array(filename, a, format=format)
print(f"{format}: {bytes_from_file(filename).hex()}")

float: 000000000000f03f000000000000000000000000000016400000000000000840000000000000f0bf6e861bf0f921094000000000c0ffdf4000000000e0ffef402d431cebe2361a3ffca9f1d24d62503f7b14ae47e17a843f9a9999999999b93f


In [None]:
format="f4"
filename=f"test_{format}.dat"
write_array(filename, a, format=format)
print(f"{format}: {bytes_from_file(filename).hex()}")

In [None]:
format="i2"
filename=f"test_{format}.dat"
write_array(filename, a, format=format)
print(f"{format}: {bytes_from_file(filename).hex()}")

In [None]:
format="u2"
filename=f"test_{format}.dat"
write_array(filename, a, format=format)
print(f"{format}: {bytes_from_file(filename).hex()}")

In [None]:
format="str"
filename=f"test_{format}.dat"
write_array(filename, a, format=format)
print(f"{format}: {bytes_from_file(filename).hex()}")

In [None]:
afloat = read_array("test_float.dat", (4,3), format='float')
print(afloat)

In [None]:
aint16 = read_array("test_i2.dat", (4,3), format='i2')
print(aint16)

In [5]:
# now read the float file, but mistakingly assume it would be f4
afloat32 = read_array("test_float.dat", (4,3), format='f4')
print(afloat32)

ValueError: cannot reshape array of size 24 into shape (4,3)

In [6]:
# fake it to be an 8x3 array and see what kind of values we get.
afloat32 = read_array("test_float.dat", (8,3), format='f4')
print(afloat32)

[[ 0.0000000e+00  1.8750000e+00  0.0000000e+00]
 [ 0.0000000e+00  0.0000000e+00  2.3437500e+00]
 [ 0.0000000e+00  2.1250000e+00  0.0000000e+00]
 [-1.8750000e+00 -1.9253077e+29  2.1426985e+00]
 [ 0.0000000e+00  6.9999695e+00  0.0000000e+00]
 [ 7.4999847e+00 -1.8890966e+26  6.0239995e-01]
 [-5.1896949e+11  8.1399995e-01  8.9128961e+04]
 [ 1.0350000e+00 -1.5881868e-23  1.4499999e+00]]
