Life of ndArray

typedef struct PyArrayObject {
        PyObject_HEAD

        /* Block of memory */
        char *data;

        /* Data type descriptor */
        PyArray_Descr *descr;

        /* Indexing scheme */
        int nd;
        npy_intp *dimensions;
        npy_intp *strides;

        /* Other stuff */
        PyObject *base;
        int flags;
        PyObject *weakreflist;
} PyArrayObject;

### Block of memory

In [3]:
import numpy as np

In [4]:
x = np.array([1,2,3],dtype=np.int64)

In [5]:
x

array([1, 2, 3])

In [6]:
x.data

<memory at 0x10691c1c8>

In [8]:
bytes(x.data)

b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'

#### Memory address of the data:

In [11]:
x.__array_interface__['data'][0] 

140555522232128

In [12]:
x.__array_interface__

{'data': (140555522232128, False),
 'strides': None,
 'descr': [('', '<i8')],
 'typestr': '<i8',
 'shape': (3,),
 'version': 3}

#### Memory does not need to be owned by an ndarray:

In [22]:
x = b'1234'      # The 'b' is for "bytes", necessary in Python 3

In [23]:
x

b'1234'

In [25]:
#x is a string (in Python 3 a bytes), we can represent its data as an array of ints:

y=np.frombuffer(x, dtype=np.int8)
y

array([49, 50, 51, 52], dtype=int8)

In [26]:
y.data

<memory at 0x10691c348>

In [27]:
y.base is x

True

In [28]:
y.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : False
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [29]:
y.__array_interface__

{'data': (4405374640, True),
 'strides': None,
 'descr': [('', '|i1')],
 'typestr': '|i1',
 'shape': (4,),
 'version': 3}

### Data types

dtype: describes a single item in the array: <br>

type	: scalar type of the data, one of: int8, int16, float64, et al. (fixed size) str, unicode, void (flexible size)<br>
itemsize :	size of the data block<br>
byteorder:byte order: big-endian > / little-endian < / not applicable |<br>
fields:	sub-dtypes, if it’s a structured data type<br>
shape:	shape of the array, if it’s a sub-array<br>

In [30]:
np.dtype(int).type

numpy.int64

In [31]:
np.dtype(int).itemsize

8

In [32]:
np.dtype(int).byteorder

'='

In [33]:
wav_header_dtype = np.dtype([
    ("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4
    ("chunk_size", "<u4"),    # little-endian unsigned 32-bit integer
    ("format", "S4"),         # 4-byte string
    ("fmt_id", "S4"),
    ("fmt_size", "<u4"),
    ("audio_fmt", "<u2"),     #
    ("num_channels", "<u2"),  # .. more of the same ...
    ("sample_rate", "<u4"),   #
    ("byte_rate", "<u4"),
    ("block_align", "<u2"),
    ("bits_per_sample", "<u2"),
    ("data_id", ("S1", (2, 2))), # sub-array, just for fun!
    ("data_size", "u4"),
    #
    # the sound data itself cannot be represented here:
    # it does not have a fixed size
   ])

In [34]:
wav_header_dtype

dtype([('chunk_id', 'S4'), ('chunk_size', '<u4'), ('format', 'S4'), ('fmt_id', 'S4'), ('fmt_size', '<u4'), ('audio_fmt', '<u2'), ('num_channels', '<u2'), ('sample_rate', '<u4'), ('byte_rate', '<u4'), ('block_align', '<u2'), ('bits_per_sample', '<u2'), ('data_id', 'S1', (2, 2)), ('data_size', '<u4')])

In [37]:
np.dtype([("abc",int),("def",int)])

dtype([('abc', '<i8'), ('def', '<i8')])

#### Casting and re-interpretation/views

**casting**
- on assignment
- on array construction
- on arithmetic
- etc.
- and manually: .astype(dtype)
<br>

**data re-interpretation**
- manually: .view(dtype)

#### Casting
Casting in general copies data:

In [38]:
x = np.array([1, 2, 3, 4], dtype=np.float)

In [40]:
y = x.astype(np.int8)

In [41]:
y[:] = y + 1.5

In [42]:
y

array([2, 3, 4, 5], dtype=int8)

#### Re-interpretation / viewing

In [None]:
#How to switch from one to another?
#1. Switch the dtype:

In [64]:
x = np.array([1, 2, 3, 4], dtype=np.uint8)
x.dtype="<i2"
x

array([ 513, 1027], dtype=int16)

In [67]:
0x0201, 0x0403

(513, 1027)

In [66]:
0x01, 0x02

(1, 2)

In [49]:
#Note little-endian: least significant byte is on the left in memory

In [50]:
#2.Create a new view:

In [71]:
y = x.view("<i4")
y

array([67305985], dtype=int32)

In [72]:
0x04030201

67305985

In [73]:
0x01020304

16909060

**Note**
<br>
.view() makes views, does not copy (or alter) the memory block
only changes the dtype (and adjusts array shape):

In [54]:
x[1] = 5

In [55]:
y

array([328193], dtype=int32)

In [57]:
y.base is x

True

### Indexing scheme: strides

In [79]:
x = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]], dtype=np.int8)

In [80]:
str(x.data)

'<memory at 0x10faae048>'

In [82]:
x.strides

(3, 1)

In [83]:
#C and Fortran order
x = np.array([[1, 2, 3],
              [4, 5, 6]], dtype=np.int16, order='C')
x.strides



(6, 2)

In [89]:

str(x.data)  

'<memory at 0x10fabb048>'

In [85]:
y = np.array(x, order='F')
y.strides



(2, 4)

In [90]:
str(y.data)  

'<memory at 0x10fabb048>'

#### Slicing with integers

##### Everything can be represented by changing only shape, strides, and possibly adjusting the data pointer!
##### Never makes copies of the data

In [92]:
x = np.zeros((10, 10, 10), dtype=np.float)
print(x.strides)

print(x.T.strides)

(800, 80, 8)
(8, 80, 800)


### Example: fake dimensions with strides

In [93]:
from numpy.lib.stride_tricks import as_strided

In [94]:
x = np.array([1,2,3,4],dtype=np.int16)
x

array([1, 2, 3, 4], dtype=int16)

In [98]:
as_strided(x,strides=(2*2,),shape=(2,))

array([1, 3], dtype=int16)

In [99]:
x[::2]

array([1, 3], dtype=int16)

array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]], dtype=int16)

### Broadcasting

In [106]:
as_strided(x,strides=(0,2),shape=(4,4))

array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]], dtype=int16)

In [107]:
as_strided(x,strides=(2,0),shape=(4,4))

array([[1, 1, 1, 1],
       [2, 2, 2, 2],
       [3, 3, 3, 3],
       [4, 4, 4, 4]], dtype=int16)