**1.3.3 [More elaborate arrays](http://scipy-lectures.org/intro/numpy/elaborate_arrays.html)**

本节内容：
- More data types
- Structured data types
- maskedarray: dealing with (propagation of) missing data

# 1.3.3.1. More data types

**Casting**

In [2]:
# “Bigger” type wins in mixed-type operations:

import numpy as np

np.array([1, 2, 3]) + 1.5

array([2.5, 3.5, 4.5])

In [3]:
# Assignment never changes the type!

a = np.array([1, 2, 3])
a.dtype

dtype('int64')

In [5]:
a[0] = 1.9     # <-- float is truncated to integer
a

array([1, 2, 3])

In [8]:
# Forced casts:

a = np.array([1.7, 1.2, 1.6])
b = a.astype(int)  # <-- truncates to integer
b

array([1, 1, 1])

In [9]:
# Rounding
a = np.array([1.2, 1.5, 1.6, 2.5, 3.5, 4.5])
b = np.around(a)
b                    # still floating-point


array([1., 2., 2., 2., 4., 4.])

In [10]:
c = np.around(a).astype(int)
c

array([1, 2, 2, 2, 4, 4])

**Different data type sizes**

In [11]:
np.array([1], dtype=int).dtype


dtype('int64')

In [12]:
np.iinfo(np.int32).max, 2**31 - 1


(2147483647, 2147483647)

In [13]:
np.iinfo(np.uint32).max, 2**32 - 1


(4294967295, 4294967295)

In [14]:
np.finfo(np.float32).eps

1.1920929e-07

In [15]:
np.finfo(np.float64).eps


2.220446049250313e-16

In [16]:
np.float32(1e-8) + np.float32(1) == 1


True

In [17]:
np.float64(1e-8) + np.float64(1) == 1


False

# 1.3.3.2. Structured data types

In [25]:
samples = np.zeros(
    (6,), dtype=[('sensor_code', 'S4'),    ('position', float), ('value', float)])

samples

array([(b'', 0., 0.), (b'', 0., 0.), (b'', 0., 0.), (b'', 0., 0.),
       (b'', 0., 0.), (b'', 0., 0.)],
      dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])

In [28]:
samples.ndim

1

In [29]:
samples.shape

(6,)

In [30]:
samples.dtype.names


('sensor_code', 'position', 'value')

In [33]:
samples[:] = [('ALFA',   1, 0.37), ('BETA', 1, 0.11), ('TAU', 1,   0.13),
              ('ALFA', 1.5, 0.37), ('ALFA', 3, 0.11), ('TAU', 1.2, 0.13)]

samples

array([(b'ALFA', 1. , 0.37), (b'BETA', 1. , 0.11), (b'TAU', 1. , 0.13),
       (b'ALFA', 1.5, 0.37), (b'ALFA', 3. , 0.11), (b'TAU', 1.2, 0.13)],
      dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])

In [34]:
# Field access works by indexing with field names:
samples['sensor_code']    


array([b'ALFA', b'BETA', b'TAU', b'ALFA', b'ALFA', b'TAU'], dtype='|S4')

In [35]:
samples['value']


array([0.37, 0.11, 0.13, 0.37, 0.11, 0.13])

In [36]:
samples[0]    


(b'ALFA', 1., 0.37)

In [37]:
samples[0]['sensor_code'] = 'TAU'
samples[0]

(b'TAU', 1., 0.37)

In [38]:
# Multiple fields at once:

samples[['position', 'value']] 


array([(1. , 0.37), (1. , 0.11), (1. , 0.13), (1.5, 0.37), (3. , 0.11),
       (1.2, 0.13)],
      dtype={'names':['position','value'], 'formats':['<f8','<f8'], 'offsets':[4,12], 'itemsize':20})

In [39]:
samples[samples['sensor_code'] == b'ALFA']    


array([(b'ALFA', 1.5, 0.37), (b'ALFA', 3. , 0.11)],
      dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])

# 1.3.3.3. maskedarray: dealing with (propagation of) missing data

In [40]:
# For floats one could use NaN’s, but masks work for all types:
x = np.ma.array([1, 2, 3, 4], mask=[0, 1, 0, 1])
x


masked_array(data=[1, --, 3, --],
             mask=[False,  True, False,  True],
       fill_value=999999)

In [41]:
y = np.ma.array([1, 2, 3, 4], mask=[0, 1, 1, 1])
x + y

masked_array(data=[2, --, --, --],
             mask=[False,  True,  True,  True],
       fill_value=999999)

In [42]:
# Masking versions of common functions:
np.ma.sqrt([1, -1, 2, -2]) 


masked_array(data=[1.0, --, 1.4142135623730951, --],
             mask=[False,  True, False,  True],
       fill_value=1e+20)