# Numpy tips

In [2]:
import numpy as np

## Indexing, arrya/nd-array construction

Basic operations

In [3]:
x = np.array([1, 2, 3, 4])

NumPy uses zero-base indexing system:

In [4]:
print("First element of the array is: ", x[0])

First element of the array is:  1


## Attributes of a NumPy array

In [5]:
x.ndim # the number of dimensions

1

In [6]:
x.size # the number of items in array

4

In [7]:
x.shape # shape of an array

(4,)

In [8]:
x.itemsize # the number of byte per element

8

In [9]:
x.dtype # underlying data type (there are many useful data types are came with numpy)

dtype('int64')

In [10]:
x.data # memoryview of the underlying data

<memory at 0x7fb10d30a948>

## Array creation

There are several commonly used functions to create arrays using numpy:
  * np.zeros  - creates arrays filled with zero values;
  * np.ones   - create array filled with unity values;
  * np.arange - create array of equal stated values, e.g. 1,2,3,4,5 ... 
  * np.linspace - create array of equal stated values, based on specified interval, e.g. [a,..,b] divided into n equal spaces;
  * np.empty   - creates of uninitialized array of desired shape
  * np.ones_like - creates array of unity values based on provided template array 
  * np.zeros_like - creates array of zero values based on provided template array
  * np.empty_like - creates unintialized array of specified shape (shape is getting from provided template)
  * np.full_like - creates array using template array and fills it with specified value
  * np.full - creates array of specified shape and fills it with specified value 
  * np.fromfunction - creates array using specified function to fill it with values

In [11]:
zeros = np.zeros((3, 3)) # make 3x3 array of zeros
print(zeros)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [12]:
ones = np.ones((3, 3, 3)) #make 3x3x3 array of ones
print(ones)

[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]


In [13]:
np.full((3, 4), 10) # creates array of 3(rows)x4(columns) shape and fills it with 10

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [14]:
np.empty_like([3,4]) # Note: the function returns unintialized array!

array([-9093133594791772939,      140399231909784])

In [15]:
np.empty_like(ones) # This doesn't guarantee that function always will return array of unity values

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]])

In [16]:
np.full_like(ones, 3) # Make an array using template `ones` (shape=3x3x3) and fill it with 3

array([[[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]],

       [[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]],

       [[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]]])

In [17]:
np.empty((4,5)) # just empty (uninitialized) array; it filled with arbitrary values of floats

array([[6.93664372e-310, 1.52984384e-316, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 4.85560926e-033,
        3.21142670e-322, 1.52116014e-316],
       [1.53211891e-316, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000]])

###  Building arrays of equally spaced values

In [18]:
np.arange(1, 100) # values from 1 to 99 

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [19]:
np.arange(1, 10, 0.2) # equally spaced values with specified step

array([1. , 1.2, 1.4, 1.6, 1.8, 2. , 2.2, 2.4, 2.6, 2.8, 3. , 3.2, 3.4,
       3.6, 3.8, 4. , 4.2, 4.4, 4.6, 4.8, 5. , 5.2, 5.4, 5.6, 5.8, 6. ,
       6.2, 6.4, 6.6, 6.8, 7. , 7.2, 7.4, 7.6, 7.8, 8. , 8.2, 8.4, 8.6,
       8.8, 9. , 9.2, 9.4, 9.6, 9.8])

In [20]:
# Note: no error occurs, just empty array. We cannot create array since start value is greater the end value
np.arange(10, 1, 0.2) 

array([], dtype=float64)

In [21]:
np.linspace(1, 10, 10) # ten values between [1, 10] including bounding values; this is useful when plotting graphs

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [22]:
np.fromfunction(lambda x, y: x + y, (2,2))

array([[0., 1.],
       [1., 2.]])

In [23]:
np.fromfunction(lambda x, y, z: x + y + z, shape=(3, 2, 3))

array([[[0., 1., 2.],
        [1., 2., 3.]],

       [[1., 2., 3.],
        [2., 3., 4.]],

       [[2., 3., 4.],
        [3., 4., 5.]]])

### Randomly generated arrays

NumPy has a random submodule that provides facilities to fill multidimensional arrays with randomly (with specified distribution) generated values.

This submodule includes:

   * np.random.rand  - generates uniformly distributed values of specified shape
   * np.random.randn - generates normally distributed values of specified shape
   * np.random.seed - sets state to generator of random values (this is very important to get reproducible results)


In [24]:
np.random.rand(3, 3) # creates 3x3 matrix of random values sampled (uniform distribution is used) from [0, 1) 

array([[0.10546843, 0.73293819, 0.42881775],
       [0.30081699, 0.60815635, 0.85251351],
       [0.9564448 , 0.32030944, 0.7882488 ]])

In [25]:
np.random.randn(3, 3) # creates 3x3 matrix of random values sampled (standard normal distribution is used)

array([[ 1.72415299,  0.32855192, -1.03990431],
       [-2.31055624,  0.5703617 , -0.27302127],
       [ 0.69664634, -0.27071803,  1.52216203]])

In [26]:
np.random.seed(23) # sets state of random generator; this makes further results reproducible

## Universal functions

Universal functions operate elementwise and could be applyed to numpy arrays. These functions produce arryas of the same shape as input ones. 

    * np.sin
    * np.cos
    * np.tan
    * np.arcsin
    * np.arccos
    * np.arctan
    * np.sqrt
    * np.power
    * np.exp
    
    * np.frompyfunc

In [27]:
data = np.array([1, 2, 3])

In [28]:
np.sin(data)

array([0.84147098, 0.90929743, 0.14112001])

In [29]:
np.cos(data)

array([ 0.54030231, -0.41614684, -0.9899925 ])

In [30]:
z=np.frompyfunc(lambda x: x ** 2, 1, 1) # That is useful to produce vectorized version of a function

### Array advanced indexing / Block manipulation

    * np.c_
    * np.r_
    * np.column_stack
    * np.hstack
    * np.row_stack
    * p. vstack
    * np.dstack

In [31]:
z = np.random.rand(3,10)

In [32]:
z.shape

(3, 10)

In [33]:
z.resize(10,3) # .resize modified array itself

In [34]:
z.shape

(10, 3)

In [35]:
z.reshape(3, 10).shape # .reshape retruns modified array; this function doesn't change the input data

(3, 10)

In [36]:
z.shape

(10, 3)

In [37]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])

In [38]:
np.hstack([x, y])

array([1, 2, 3, 4, 5, 6])

In [39]:
np.vstack([x, y])

array([[1, 2, 3],
       [4, 5, 6]])

In [40]:
np.c_[x, y]

array([[1, 4],
       [2, 5],
       [3, 6]])

In [41]:
np.r_[x, y]

array([1, 2, 3, 4, 5, 6])

In [42]:
np.column_stack([x, y])

array([[1, 4],
       [2, 5],
       [3, 6]])

In [43]:
np.row_stack([x, y])

array([[1, 2, 3],
       [4, 5, 6]])

In [44]:
np.dstack([x, y, x])

array([[[1, 4, 1],
        [2, 5, 2],
        [3, 6, 3]]])

In [45]:
np.concatenate([x, y], axis=0)

array([1, 2, 3, 4, 5, 6])

In [46]:
w = np.random.rand(3, 4)
v = np.random.rand(3, 4)

In [47]:
np.dstack([v,w]).shape 

(3, 4, 2)

In [48]:
np.stack([v, w]).shape # See the difference in shapes when applying dstack and stack

(2, 3, 4)

In [49]:
np.concatenate([v, w], axis=1).shape

(3, 8)

In [50]:
np.concatenate([v, w], axis=0).shape  # Be care regardin axis argument... 

(6, 4)

In [51]:
np.concatenate([v, w], axis=None).shape  # when axis=None, arrays are flattened before concatenation

(24,)

In [52]:
z = np.arange(100)

In [53]:
np.split(v, 2, axis=1)  # This function raise an Exception when the array couldn't be splitted to equal parts

[array([[0.65584546, 0.55770676],
        [0.50605463, 0.3967457 ],
        [0.90160219, 0.50575932]]), array([[0.17824727, 0.24058353],
        [0.48305519, 0.95573984],
        [0.82070149, 0.82771593]])]

In [54]:
np.array_split(v, 2)  # This function doesn't raise an Exception when the array couldn't be splitted to equal parts

[array([[0.65584546, 0.55770676, 0.17824727, 0.24058353],
        [0.50605463, 0.3967457 , 0.48305519, 0.95573984]]),
 array([[0.90160219, 0.50575932, 0.82070149, 0.82771593]])]

In [58]:
# reshaping with unknown dimension
data = np.random.rand(10, 10)
data.reshape(2, -1).shape  # 50 is automatically determined

(2, 50)

In [60]:
data.reshape(2, -1, -1) # Note: we cann't pass two (or more) unknown dimensions!!!

ValueError: can only specify one unknown dimension

## Working with dates and times (np.datetime64 type)

In [1]:
import numpy as np 
np.busday_count(np.datetime64('2011-07-11'), np.datetime64('2011-07-18'))

5

In [4]:
np.busday_count(np.datetime64('2019-04-01'), np.datetime64('2019-04-30')) #  we have 21 busines days in april, 2019

21

In [12]:
np.is_busday(np.datetime64('2019-04-06'))  # Return False if Sat or Sun

False

## Constants

There are some constats are predefined in NumPy; some of they are listed below:


   * np.inf
   * np.NINF   #Negative infinity
   * np.nan
   * np.NZERO  # Negative zero
   * np.PZERO  # Positive zero
   * np.e
   * np.pi
   * np.euler_gamma
   * np.infty   # alias for inf


### Assigning to flat array

In [57]:
x = np.empty(shape=(3,3))

In [58]:
x.flat = list(range(9))

In [59]:
x

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

In [61]:
x.flat = range(10,19)

## Broadcasting example

In [77]:
x = np.array([[1], [2], [3]])
y = np.array([4, 5, 6])

In [64]:
x

array([[1],
       [2],
       [3]])

In [65]:
y

array([4, 5, 6])

In [67]:
for item in np.broadcast(x, y):
    print(item)

(1, 4)
(1, 5)
(1, 6)
(2, 4)
(2, 5)
(2, 6)
(3, 4)
(3, 5)
(3, 6)


In [69]:
bb = np.broadcast(x, y)

In [70]:
z = np.empty(bb.shape)

In [71]:
z.flat = [x*y for x,y in bb]

In [74]:
z == x * y

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [76]:
bb.reset(); next(bb)  # You need to reset bc-iterator to use it again

(1, 4)

### ufunc creation


In [93]:
myufunc = np.frompyfunc(lambda x, y: x * y, 2, 1)

In [94]:
for j in [x for x in dir(myufunc) if not x.startswith('__')]:
    print("Examining ufunc par: ", j, " val: ", getattr(myufunc,j))

Examining ufunc par:  accumulate  val:  <built-in method accumulate of numpy.ufunc object at 0x2cc8df0>
Examining ufunc par:  at  val:  <built-in method at of numpy.ufunc object at 0x2cc8df0>
Examining ufunc par:  identity  val:  None
Examining ufunc par:  nargs  val:  3
Examining ufunc par:  nin  val:  2
Examining ufunc par:  nout  val:  1
Examining ufunc par:  ntypes  val:  1
Examining ufunc par:  outer  val:  <built-in method outer of numpy.ufunc object at 0x2cc8df0>
Examining ufunc par:  reduce  val:  <built-in method reduce of numpy.ufunc object at 0x2cc8df0>
Examining ufunc par:  reduceat  val:  <built-in method reduceat of numpy.ufunc object at 0x2cc8df0>
Examining ufunc par:  signature  val:  None
Examining ufunc par:  types  val:  ['OO->O']


In [96]:
myufunc.outer(x, y)

array([[[4, 5, 6]],

       [[8, 10, 12]],

       [[12, 15, 18]]], dtype=object)

In [2]:
import numpy as np
x = np.arange(6).reshape(2,3)
y = np.asfortranarray(x)
x.flags['F_CONTIGUOUS']
False
y.flags['F_CONTIGUOUS']
True

True

In [11]:
x,y = np.ogrid[:6,:6]

In [17]:
np.diag(np.diagflat([1,2,3]))

array([1, 2, 3])

In [19]:
np.vstack([[1,2,3], [3,4,5]]).mean(axis=1)

array([2., 4.])

## Moving axis

In [128]:
x = np.arange(9).reshape(3,3)

In [129]:
x.swapaxes(0,-1)

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [130]:
np.moveaxis(x, 1, 0)

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [131]:
x

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [132]:
data = np.arange(3*4*5).reshape(3, 4, 5)

In [133]:
data.swapaxes(1, 2)

array([[[ 0,  5, 10, 15],
        [ 1,  6, 11, 16],
        [ 2,  7, 12, 17],
        [ 3,  8, 13, 18],
        [ 4,  9, 14, 19]],

       [[20, 25, 30, 35],
        [21, 26, 31, 36],
        [22, 27, 32, 37],
        [23, 28, 33, 38],
        [24, 29, 34, 39]],

       [[40, 45, 50, 55],
        [41, 46, 51, 56],
        [42, 47, 52, 57],
        [43, 48, 53, 58],
        [44, 49, 54, 59]]])

In [134]:
np.moveaxis(data, 0, 2)

array([[[ 0, 20, 40],
        [ 1, 21, 41],
        [ 2, 22, 42],
        [ 3, 23, 43],
        [ 4, 24, 44]],

       [[ 5, 25, 45],
        [ 6, 26, 46],
        [ 7, 27, 47],
        [ 8, 28, 48],
        [ 9, 29, 49]],

       [[10, 30, 50],
        [11, 31, 51],
        [12, 32, 52],
        [13, 33, 53],
        [14, 34, 54]],

       [[15, 35, 55],
        [16, 36, 56],
        [17, 37, 57],
        [18, 38, 58],
        [19, 39, 59]]])