# Focus on ndarray type

In [1]:
import numpy as np
import numpy.typing as npt

## Typing ndarrays
- np.integer super type of int8, ...., uint64
- np.floating super type of float32, ...

In [10]:
data: npt.NDArray[np.floating] = np.random.normal(10, 5, (10_000,10_000))
data

array([[ 3.5778984 ,  9.36779126,  9.79377196, ..., 16.52432954,
        10.55792896,  9.59925685],
       [ 6.67892914,  2.59447695, 12.44393702, ...,  3.58069913,
        10.59004597,  0.87098387],
       [12.17709143,  8.44923574, 13.24505018, ..., 19.51648038,
        16.40344909,  4.13341405],
       ...,
       [ 5.61888806, 12.11251555, 11.14663522, ..., -2.90689405,
         4.70426458, 17.63071517],
       [11.72364193,  2.78377919,  7.10779272, ..., 15.85948528,
         6.92047002, 12.59690456],
       [ 7.8358548 , 13.64722859,  7.80314739, ...,  9.92595322,
        10.7512723 , 13.46745655]])

In [5]:
data.mean(), data.std()

(9.999912836856355, 5.000010793848019)

In [6]:
data.dtype

dtype('float64')

## Slots and properties
https://docs.python.org/3/library/functions.html#property

In [12]:
# AttributeError: 'numpy.ndarray' object has no attribute 'city'
# data.city = "Pau"

In [14]:
# NB: data attribute is a pointer on memory area of numbers
data.data

<memory at 0x00000169650F1560>

In [16]:
# AttributeError: Cannot delete array data
# del data.data

In [18]:
# TypeError: a bytes-like object is required, not 'int'
# data.data = 2

In [20]:
data.shape = (10**8,)
data

array([ 3.5778984 ,  9.36779126,  9.79377196, ...,  9.92595322,
       10.7512723 , 13.46745655])

In [22]:
# ValueError: cannot reshape array of size 100000000 into shape (1000,1000,1000)
# data.shape = (1000, 1000, 1000)

## ndarray and subclasses: MaskedArray

In [23]:
data.min(), data.max()

(-17.155860217579438, 40.99584751893339)

In [29]:
negative_values = data[data < -10]
nb = len(negative_values)
negative_values

array([-11.14894038, -10.32070857, -10.76475524, ..., -10.1758062 ,
       -10.89142882, -10.48040674])

In [30]:
nb, len(data)

(3186, 100000000)

In [36]:
data_aboveM10 = np.ma.masked_less(data, -10)
data_aboveM10

masked_array(data=[3.5778984013407324, 9.367791261979862,
                   9.793771961439928, ..., 9.925953222977899,
                   10.751272301716293, 13.467456554688457],
             mask=[False, False, False, ..., False, False, False],
       fill_value=1e+20)

In [37]:
isinstance(data_aboveM10, (np.ma.MaskedArray, np.ndarray, object)) 

True

In [38]:
data_aboveM10.min(), data_aboveM10.max(), data_aboveM10.mean(), data_aboveM10.std()

(-9.999928749564273, 40.99584751893339, 10.001144490201932, 4.998947767826213)