In [1]:
import numpy as np

In [2]:

# More Data Types


In [3]:
# Casting
np.array([1,2,3]) + 1.5

array([ 2.5,  3.5,  4.5])

In [5]:
# assignment never changes the type, though
a = np.array([4,2,3])
print(a.dtype)
a[0] = 1.9             # <-- Float gets truncated (not rounded) to Int to match the array type
a

int64


array([1, 2, 3])

In [7]:
# But you CAN force a cast to a new type
a = np.array([1,2, 1.5, 1.6, 2.5, 3.5, 4.5])
b = np.around(a)
print(b)
c = np.around(a).astype(int)
c

[ 1.  2.  2.  2.  2.  4.  4.]


array([1, 2, 2, 2, 2, 4, 4])

In [9]:
# Different Data Type Sizes

In [10]:
np.array([1], dtype=int).dtype

dtype('int64')

In [11]:
np.iinfo(np.int32).max, 2**31 - 1

(2147483647, 2147483647)

In [12]:
np.iinfo(np.uint32).max, 2**32 - 1

(4294967295, 4294967295)

In [13]:
np.finfo(np.float32).eps

1.1920929e-07

In [14]:
np.finfo(np.float64).eps

2.2204460492503131e-16

In [15]:
np.float32(1e-8) + np.float32(1) == 1

True

In [16]:
np.float64(1e-8) + np.float64(1) == 1

False

In [20]:
# Don't use special datatypes if you don't know you need them.

a = np.zeros((1e6,), dtype=np.float64)
b = np.zeros((1e6,), dtype=np.float32)

%timeit a*a
%timeit b*b

  app.launch_new_instance()


1000 loops, best of 3: 704 µs per loop
The slowest run took 6.21 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 3: 274 µs per loop


In [21]:

# Structured Data Types


In [23]:
samples = np.zeros((6,), dtype=[('sensor_code', 'S4'),
                                ('position', float),
                                ('value', float)])
print(samples.ndim)
print(samples.shape)
print(samples.dtype.names)
print(samples)

samples[:] = [('ALFA',   1, 0.37), ('BETA', 1, 0.11), ('TAU', 1, 0.13),
              ('ALFA', 1.5, 0.37), ('BETA', 3, 0.11), ('TAU', 1.2, 0.13)]
samples

1
(6,)
('sensor_code', 'position', 'value')
[(b'', 0.0, 0.0) (b'', 0.0, 0.0) (b'', 0.0, 0.0) (b'', 0.0, 0.0)
 (b'', 0.0, 0.0) (b'', 0.0, 0.0)]


array([(b'ALFA', 1.0, 0.37), (b'BETA', 1.0, 0.11), (b'TAU', 1.0, 0.13),
       (b'ALFA', 1.5, 0.37), (b'BETA', 3.0, 0.11), (b'TAU', 1.2, 0.13)], 
      dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])

In [24]:
# Field access works by indexing by the field names.  For example...
samples['sensor_code']

array([b'ALFA', b'BETA', b'TAU', b'ALFA', b'BETA', b'TAU'], 
      dtype='|S4')

In [25]:
samples['value']

array([ 0.37,  0.11,  0.13,  0.37,  0.11,  0.13])

In [26]:
samples['position']

array([ 1. ,  1. ,  1. ,  1.5,  3. ,  1.2])

In [27]:
samples[0]

(b'ALFA', 1.0, 0.37)

In [28]:
samples[0]['sensor_code']

b'ALFA'

In [29]:
samples[0]['sensor_code'] = 'TAU'
samples[0]

(b'TAU', 1.0, 0.37)

In [31]:
# Multiple simultaneous field access
samples[['sensor_code', 'value']]

array([(b'TAU', 0.37), (b'BETA', 0.11), (b'TAU', 0.13), (b'ALFA', 0.37),
       (b'BETA', 0.11), (b'TAU', 0.13)], 
      dtype=[('sensor_code', 'S4'), ('value', '<f8')])

In [32]:
# And the fancy indexing still works
samples[samples['sensor_code'] == 'ALFA']

  from ipykernel import kernelapp as app


(b'TAU', 1.0, 0.37)

In [33]:

# maskedarray: dealing with (propogation of) missing data


In [36]:
# For floats, one could use NaN, but masks work for all types
x = np.ma.array([1,2,3,4], mask=[0,1,0,1])
x

masked_array(data = [1 -- 3 --],
             mask = [False  True False  True],
       fill_value = 999999)

In [37]:
y = np.ma.array([1,2,3,4], mask=[0,1,1,1])
y

masked_array(data = [1 -- -- --],
             mask = [False  True  True  True],
       fill_value = 999999)

In [38]:
x + y

masked_array(data = [2 -- -- --],
             mask = [False  True  True  True],
       fill_value = 999999)

In [39]:
# Masking versions of common functions
np.ma.sqrt([1,-1,2,-2])

masked_array(data = [1.0 -- 1.4142135623730951 --],
             mask = [False  True False  True],
       fill_value = 1e+20)