In [1]:
import numpy as np

## Introducing Strides

In [2]:
x = np.arange(8, dtype = np.int8)
x

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int8)

In [3]:
x.strides

(1,)

In [4]:
str(x.data)

'<memory at 0x7fd131e64100>'

In [5]:
x.shape = 2,4
x

array([[0, 1, 2, 3],
       [4, 5, 6, 7]], dtype=int8)

In [6]:
x.strides

(4, 1)

In [7]:
x.shape = 1,4,2
x.strides

(8, 2, 1)

In [8]:
x = np.ones((10000,))
y = np.ones((10000 * 100,))[::100]
x.shape, y.shape

((10000,), (10000,))

In [9]:
x == y

array([ True,  True,  True, ...,  True,  True,  True])

In [10]:
x.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [11]:
y.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [12]:
x.strides, y.strides

((8,), (800,))

In [13]:
%timeit x.sum()

6.02 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [14]:
%timeit y.sum()

23.4 µs ± 334 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## Structured arrays

In [15]:
x = np.empty((2,), dtype = ('i4,f4,a10'))
x[:] = [(1,0.5, 'NumPy'), (10,-0.5, 'Essential')]
x

array([( 1,  0.5, b'NumPy'), (10, -0.5, b'Essential')],
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', 'S10')])

In [16]:
x[0]

(1, 0.5, b'NumPy')

In [17]:
x['f2']

array([b'NumPy', b'Essential'], dtype='|S10')

In [18]:
y = x['f0']
y

array([ 1, 10], dtype=int32)

In [19]:
y[:] = y * 10
y

array([ 10, 100], dtype=int32)

In [20]:
y[:] = y + 0.5
y

array([ 10, 100], dtype=int32)

In [21]:
x

array([( 10,  0.5, b'NumPy'), (100, -0.5, b'Essential')],
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', 'S10')])

In [22]:
z = np.ones((2,), dtype=('3i4, (2,3)f4'))
z

array([([1, 1, 1], [[1., 1., 1.], [1., 1., 1.]]),
       ([1, 1, 1], [[1., 1., 1.], [1., 1., 1.]])],
      dtype=[('f0', '<i4', (3,)), ('f1', '<f4', (2, 3))])

In [23]:
x.dtype.names

('f0', 'f1', 'f2')

In [24]:
x.dtype.names = ('id', 'value', 'note')
x

array([( 10,  0.5, b'NumPy'), (100, -0.5, b'Essential')],
      dtype=[('id', '<i4'), ('value', '<f4'), ('note', 'S10')])

In [25]:
list_ex = np.zeros((2,), dtype = [('id', 'i4'), ('value', 'f4', (2,))])
list_ex

array([(0, [0., 0.]), (0, [0., 0.])],
      dtype=[('id', '<i4'), ('value', '<f4', (2,))])

In [26]:
dict_ex = np.zeros((2,), dtype = {'names':['id', 'value'], 'formats': ['i4', '2f4']})
dict_ex

array([(0, [0., 0.]), (0, [0., 0.])],
      dtype=[('id', '<i4'), ('value', '<f4', (2,))])

In [27]:
x[['id', 'note']]

array([( 10, b'NumPy'), (100, b'Essential')],
      dtype={'names':['id','note'], 'formats':['<i4','S10'], 'offsets':[0,8], 'itemsize':18})

### Dates and time in NumPy

In [28]:
x = np.datetime64('2015-04-01')
y = np.datetime64('2015-04')
x.dtype, y.dtype

(dtype('<M8[D]'), dtype('<M8[M]'))

In [29]:
y = np.datetime64('2015-04', 'D')
y, y.dtype

(numpy.datetime64('2015-04-01'), dtype('<M8[D]'))

In [30]:
x = np.arange('2015-01', '2015-04', dtype = 'datetime64[M]')
x

array(['2015-01', '2015-02', '2015-03'], dtype='datetime64[M]')

In [31]:
y = np.datetime64('2015-01-01')
x - y

array([ 0, 31, 59], dtype='timedelta64[D]')

In [33]:
np.datetime64('2015') + np.timedelta64(12, 'M')

numpy.datetime64('2016-01')

In [34]:
np.timedelta64(1, 'W') / np.timedelta64(1, 'D')

7.0

In [35]:
x

array(['2015-01', '2015-02', '2015-03'], dtype='datetime64[M]')

In [36]:
x.tolist()

[datetime.date(2015, 1, 1),
 datetime.date(2015, 2, 1),
 datetime.date(2015, 3, 1)]

In [37]:
[element.item() for element in x]

[datetime.date(2015, 1, 1),
 datetime.date(2015, 2, 1),
 datetime.date(2015, 3, 1)]

### File I/O and NumPy

In [40]:
id = np.arange(1000)
value = np.random.random(1000)
day = np.random.random_integers(0, 365, 1000) * np.timedelta64(1, 'D')
date = np.datetime64('2014-01-01') + day
rec_array = np.core.records.fromarrays([id, value, date], names='id, value, date', formats='i4, f4, a10')
rec_array[:5]

  day = np.random.random_integers(0, 365, 1000) * np.timedelta64(1, 'D')


rec.array([(0, 0.5592625 , b'2014-05-13'), (1, 0.72924864, b'2014-11-26'),
           (2, 0.5727561 , b'2014-09-03'), (3, 0.30992693, b'2014-04-22'),
           (4, 0.08592469, b'2014-03-11')],
          dtype=[('id', '<i4'), ('value', '<f4'), ('date', 'S10')])

In [43]:
np.savetxt('./record.csv', rec_array, fmt='%i,%.4f,%s')

In [48]:
read_array = np.genfromtxt('./record.csv', dtype='i4,f4,a10', delimiter=',', skip_header=0)
read_array[:5]

array([(0, 0.5593, b"b'2014-05-"), (1, 0.7292, b"b'2014-11-"),
       (2, 0.5728, b"b'2014-09-"), (3, 0.3099, b"b'2014-04-"),
       (4, 0.0859, b"b'2014-03-")],
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', 'S10')])

In [49]:
read_array.dtype.names = ('id', 'value', 'date')
mask = read_array['value'] >= 0.5
from numpy.lib.recfunctions import append_fields
read_array = append_fields(read_array, 'mask', data=mask, dtypes='i1')
read_array[:5]

masked_array(data=[(0, 0.5593000054359436, b"b'2014-05-", 1),
                   (1, 0.729200005531311, b"b'2014-11-", 1),
                   (2, 0.5727999806404114, b"b'2014-09-", 1),
                   (3, 0.3098999857902527, b"b'2014-04-", 0),
                   (4, 0.08590000122785568, b"b'2014-03-", 0)],
             mask=[(False, False, False, False),
                   (False, False, False, False),
                   (False, False, False, False),
                   (False, False, False, False),
                   (False, False, False, False)],
       fill_value=(999999, 1.e+20, b'N/A', 63),
            dtype=[('id', '<i4'), ('value', '<f4'), ('date', 'S10'), ('mask', 'i1')])