In [1]:
import numpy as np
import pandas as pd

# NumPy

In [2]:
# Creating a one dimensional array
a = np.array([2,4,6])
display(a)

array([2, 4, 6])

In [3]:
# Creating a two dimensional array
b = np.array([[1,2],
              [3,4],
              [5,6]])
print(b)


[[1 2]
 [3 4]
 [5 6]]


In [4]:
c = np.array([[[1,2,10],
               [2,3,11]],
              [[4,5,12],
               [6,7,13]]])

print(c.shape)

(2, 2, 3)


NumPy distinguishes (one-dimensional) vectors and 2-dimensional matrices with 1 width or height:

In [5]:
a = np.array([1,2,3])
print(a.shape)
b = np.array([[1,2,3]])
print(b.shape)

(3,)
(1, 3)


Modifying the original object will not modify the array:

In [6]:
a = [1,2,3]
b = np.array(a)
print(a)
print(b)
a[2] = 10
print(a)
print(b)

[1, 2, 3]
[1 2 3]
[1, 2, 10]
[1 2 3]


#### `np.arange`
- Generate evenly spaced values within a given [start, end) half-open interval (= like range(), start is included, end is excluded). 
- Most important parameters: [start] end [step]

In [7]:
a = np.arange(3)
b = np.arange(1,4)
c = np.arange(0,21,4)
print(a)
print(b)
print(c)

[0 1 2]
[1 2 3]
[ 0  4  8 12 16 20]


#### `np.linspace`
- Generate evenly spaced values within closed interval [start, end]. (end can be excluded with endpoint=False)
- Most important parameters:    start    end    [num]

In [8]:
np.linspace(0,20,num=5)

array([ 0.,  5., 10., 15., 20.])

#### Random arrays with `np.random`
* `rand` = random numbers from a Uniform(low=0, high=1) distribution (-> mean around 0.5)
* `randn` = random numbers from a Normal(mean=0, variance=1) distribution. (-> mean around 0)
* `randint` = random integer by default between 0 and the specified number.
* `permutation` = random permutation of numbers.

When not specifying dimensions, `rand`, `randn` and `randint` return a single random number.

rand

In [9]:
# A random float between 0 and 1
print(np.random.rand())

0.8362700251938371


In [10]:
# A random array with uniform distribution over [0,1)
np.random.rand(100)

array([0.44329777, 0.95834991, 0.55344831, 0.36812165, 0.82375293,
       0.06601073, 0.32386298, 0.51623212, 0.62979718, 0.04733893,
       0.86282576, 0.66167053, 0.40915178, 0.97156896, 0.64360727,
       0.88074579, 0.94336234, 0.02352873, 0.39606023, 0.09429231,
       0.55246428, 0.00214997, 0.0931024 , 0.0193509 , 0.38481638,
       0.34063557, 0.58899588, 0.37965849, 0.19208272, 0.78632162,
       0.18088651, 0.38166723, 0.92818444, 0.39720802, 0.71100254,
       0.19733093, 0.57626168, 0.75220759, 0.34569183, 0.6945031 ,
       0.6288408 , 0.12518495, 0.89739339, 0.73450573, 0.40482207,
       0.61776924, 0.40530786, 0.25977594, 0.37974974, 0.68484932,
       0.28851335, 0.54039261, 0.07927197, 0.85055284, 0.48963692,
       0.3799561 , 0.52697007, 0.33698597, 0.07941819, 0.01581213,
       0.39732789, 0.96605735, 0.69691867, 0.43700668, 0.3854167 ,
       0.33588584, 0.9133245 , 0.81856005, 0.31205934, 0.97193267,
       0.00899141, 0.02178379, 0.2417586 , 0.05727865, 0.34393

randn

In [11]:
print(np.random.randn())

1.4410900936765725


In [12]:
# A random array sampled from standard normal distribution
np.random.randn(100)

array([ 1.617285  ,  1.23848729,  1.86617925,  0.95122227, -0.54137515,
        0.39097818,  0.16872785, -0.49633292, -0.78525284,  0.86040213,
       -0.58754853,  1.22691717,  1.36699776,  0.02539124, -0.98321273,
       -0.77609018,  0.9419592 , -2.18034899,  0.03993631, -0.49908261,
        0.22306932, -0.88656815, -1.8166429 ,  0.3765828 ,  1.83305652,
        1.8241934 , -0.73347114, -0.22724347, -1.92867525, -0.24122329,
        0.10057997,  2.27033858, -0.25600732,  0.0975169 , -1.62783638,
        1.46241   , -0.09198657, -2.42600337, -1.15646146,  0.61993068,
       -0.89040749,  2.03797804,  0.04929783,  1.61407809,  1.16537744,
       -0.30086097, -1.82658396, -0.32369486, -0.28164602,  0.75195616,
        0.09232619,  1.41981045,  0.5414754 ,  0.42811127, -1.80960709,
       -0.64065989,  0.27944128, -1.69846967, -0.71277478,  0.34439434,
       -0.05549831, -0.30895541, -0.23886265, -2.17107949, -0.14853166,
        0.38385683,  0.30596062,  0.39477566, -0.54349472, -2.49

randint

In [13]:
# Generates random integer between 0 and 10
np.random.randint(10)

5

In [14]:
# Array of specified shape with random integer
np.random.randint(10,size=(2,3))

array([[7, 9, 9],
       [9, 3, 6]])

In [15]:
# Array of specified shape with random integer between 10 and 100
np.random.randint(10,100,size=4)

array([19, 88, 88, 43])

Permutation

In [16]:
np.random.permutation(10)

array([3, 7, 0, 6, 2, 9, 1, 8, 4, 5])

In [17]:
np.random.permutation(["one","two","three","four","five"])

array(['three', 'one', 'four', 'five', 'two'], dtype='<U5')

#### `Seeding`
* For reproducible work, it is good practice to generate **random numbers that can be reproduced**.
* This is not a contradiction, since computer-generated random numbers are in fact "pseudo-random" (generated using an algorithm, hence, reproducible).
* Solution: "**seeding**". In numpy, this has for a long time been done using "random states", although these are now being replaced with random generators.
* For a quick overview, see, e.g.: https://towardsdatascience.com/stop-using-numpy-random-seed-581a9972805f

In [18]:
for _ in range(3):
    print(np.random.randint(100))

92
88
0


In [19]:
for _ in range(3):
    np.random.seed(42)
    print(np.random.randint(100))

51
51
51


#### `np.ones`, `np.zeros`, `np.full`

In [20]:
print(np.ones(4))
print(np.ones((4,3)))
print(np.ones(4, dtype='bool'))

[1. 1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[ True  True  True  True]


In [21]:
print(np.zeros(4))
print(np.zeros((4,3)))
print(np.zeros(4, dtype='bool'))

[0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[False False False False]


In [22]:
np.full((2,3),'rabbit')

array([['rabbit', 'rabbit', 'rabbit'],
       ['rabbit', 'rabbit', 'rabbit']], dtype='<U6')

What is `dtype='<U6'`? The type of the values in the array:

* < = Little Endian
* U = Unicode
* 6 = length of longest string

So caution:

In [23]:
a = np.full(4, "rabbit")
a[0] = "capybara"
a

array(['capyba', 'rabbit', 'rabbit', 'rabbit'], dtype='<U6')

#### dtype of numpy arrays

A numpy array is **homogeneous** = contains elements of the same type: all elements are either floats or integers or strings etc. 

The most importanty NumPy types we will meet are

- unsigned integer types: uint8,...,uint64
- integer types: int8,..., int64
- float types: float8,...,float32,
- the boolean type: bool
- string types for strings of various length. e.g. <U5 for unicode string shorter than 6 chars.
- a general "object" type for arrays that can contain any Python object.


-> The `dtype` of an array can be specified in its constructor.

In [24]:
c = np.array([1,2,3], np.uint8)
c.dtype

dtype('uint8')

In [25]:
c = np.array([1,2,3], np.int16)
c.dtype

dtype('int16')

In [26]:
e = np.array([[1,2,3]])
print(e.dtype)
e = np.array([[1.,2,3]])
print(e.dtype)
e = np.array([["1.",2,3]])
print(e.dtype)

int32
float64
<U11


A `list` can be heterogeneous but a `ndarray` is homogeneous

In [27]:
mylist = [1, "one", 1.0]
for item in mylist:
    print("\t", item, "\t", type(item))

	 1 	 <class 'int'>
	 one 	 <class 'str'>
	 1.0 	 <class 'float'>


In [28]:
myarray = np.array([1, "one", 1.0])
for item in myarray:
    print("\t", item, "\t", type(item))

	 1 	 <class 'numpy.str_'>
	 one 	 <class 'numpy.str_'>
	 1.0 	 <class 'numpy.str_'>


#### Setting types -> `dtype` in creation and `astype` to change type

In [29]:
a = np.arange(0,5)
print(a)
print(a.dtype)

print()

b = np.arange(0,5, dtype="float")
print(b)
print(b.dtype)

[0 1 2 3 4]
int32

[0. 1. 2. 3. 4.]
float64


In [30]:
a = np.ones(5)
print(a)
print(a.dtype)

print()

b = a.astype("bool")
print(b)
print(b.dtype)

[1. 1. 1. 1. 1.]
float64

[ True  True  True  True  True]
bool


In [31]:
a = np.full(4,'rabbit')
a

array(['rabbit', 'rabbit', 'rabbit', 'rabbit'], dtype='<U6')

In [32]:
a = np.full(4,'rabbit',dtype='str')
a

array(['r', 'r', 'r', 'r'], dtype='<U1')

In [33]:
a = np.full(4,'rabbit')
a[0] = 'capybara'
a

array(['capyba', 'rabbit', 'rabbit', 'rabbit'], dtype='<U6')

In [34]:
a = np.full(4,'rabbit',dtype='<U100')
a[0] = 'capybara'
a

array(['capybara', 'rabbit', 'rabbit', 'rabbit'], dtype='<U100')

In [35]:
a[1] = 1
a

array(['capybara', '1', 'rabbit', 'rabbit'], dtype='<U100')

In [36]:
try:
    a[2] = ["kangaroo", "koala"]
except Exception as e:
    print(f"Exception: ({type(e).__name__}) {e}")

Exception: (ValueError) setting an array element with a sequence


In [37]:
a = np.full(4,'rabbit',dtype='object')
a[0] = 'capybara'
a[1] = ["kangaroo", "koala"]
a

array(['capybara', list(['kangaroo', 'koala']), 'rabbit', 'rabbit'],
      dtype=object)

An array is N-dimensional (1$\leq$N). 
* Its dimensions are called **axes**
* The number of axes is called the **rank** of the array.  -> `array.ndim`
* The array's **shape** indicates the size of the array in each dimension. -> `array.shape`
* The array's **size** is the total number of its elements. -> `array.size`

An example:

```
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
```
* 2 axes -> rank = 2
* shape = (3,4) 
    * 3 items on the initial axis (axis 0)
    * 4 items (for each item on axis 0) on the next axis (axis 1)
* size = 12 -> the number of elements, or cells, in the array

In [38]:
a = np.array([1,2,3])
b = np.random.rand(2,3)

In [39]:
for i,array in enumerate([a,b]):
    print(f"Array {['a','b'][i]}")
    display(array)
    print('Rank :',array.ndim)
    print('Shape :',array.shape)
    print('Size :',array.size)
    print('------------------')
    

Array a


array([1, 2, 3])

Rank : 1
Shape : (3,)
Size : 3
------------------
Array b


array([[0.95071431, 0.73199394, 0.59865848],
       [0.15601864, 0.15599452, 0.05808361]])

Rank : 2
Shape : (2, 3)
Size : 6
------------------


#### Reshaping Arryas

* CAUTION: reshaping doesn't create a copy = **modifications** of the original array are **reflected** in the new one and vice versa!
* Most reshapers are available as functions and methods alike.

In [40]:
a = np.arange(12)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [41]:
a = a.reshape((3,4))
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

-1 means inferring size for dimension from length of array

In [42]:
b = a.reshape(-1)
b

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [43]:
b = a.reshape(-1,1)
b

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11]])

In [44]:
b = a.reshape(3,2,-1)
b

array([[[ 0,  1],
        [ 2,  3]],

       [[ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11]]])

In [45]:
c = b.reshape(3,-1)
c

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [46]:
print("Modification in one array...")
c[0,0] = 10

print(c)

print("\n...is reflected in the others connected through reshaping:")
print(b)
print("\n...all others back to the original:")
print(a)

Modification in one array...
[[10  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

...is reflected in the others connected through reshaping:
[[[10  1]
  [ 2  3]]

 [[ 4  5]
  [ 6  7]]

 [[ 8  9]
  [10 11]]]

...all others back to the original:
[[10  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


`ravel`: return 1D-array

In [47]:
d = c.ravel()
d

array([10,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

`np.flatten`: like ravel, but returns a copy

In [48]:
e = c.flatten()
print(e)
print("Shape:", e.shape)

[10  1  2  3  4  5  6  7  8  9 10 11]
Shape: (12,)


Modification does not carry across flattened arrays:

In [49]:
print("Modified original:")
c[0,0] = 500
print(c)
print("Shape:", c.shape)

print("\n...flattened array not affected:")

print(e)
print("Shape:", e.shape)

Modified original:
[[500   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]
Shape: (3, 4)

...flattened array not affected:
[10  1  2  3  4  5  6  7  8  9 10 11]
Shape: (12,)


#### Transposition

*   List item
*   List item



Switch rows and columns with .T or .transpose()

* Caution: `.T` is not a method, and is thus _not_ followed by parentheses
* As such, if arguments are to be specified (in the case of higher-rank arrays), better to use the .`transpose()` method

In [50]:
a = np.arange(6).reshape(2,3)
print(a)
b = a.T
print(b)
c = a.transpose()
print(c)

[[0 1 2]
 [3 4 5]]
[[0 3]
 [1 4]
 [2 5]]
[[0 3]
 [1 4]
 [2 5]]


For arrays with >2 dimensions, the desired **reshuffled axes** can be specified **using a permutation of the indices of the old axes** (simply integers in `.transpose()` method, list of integers for the `axes` kwarg in `np.transpose()`)

* `np.transpose(a, axes=(2, 3, 0, 1))`
    * 0th axis should be axis 2 of array `a`
    * 1st axis should be axis 3 of array `a`
    * 2nd axis should be axis 0 of array `a`
    * 3rd axis should be axis 1 of array `a`

In [51]:
a = np.arange(2*3*4).reshape(2,3,4)
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [52]:
b = a.transpose(2,0,1)
# b = np.transpose(a,axes=2,0,1)

#### Indexing

In [53]:
a = np.arange(1,10).reshape(3,3)
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [54]:
a[0]

array([1, 2, 3])

In [55]:
a[-1]

array([7, 8, 9])

In [56]:
for row in a: print(row)

[1 2 3]
[4 5 6]
[7 8 9]


In [57]:
for elem in a.flatten(): print(elem)

1
2
3
4
5
6
7
8
9


In [58]:
a[[0,2]]

array([[1, 2, 3],
       [7, 8, 9]])

`Slices` can be given a 'step' parameter after a second colon.

In [59]:
v = [0,1,2,3,4,5,6,7,8,9,10]
display(v[2::2])
v = np.array(v)
display(v[2::2])

[2, 4, 6, 8, 10]

array([ 2,  4,  6,  8, 10])

Lists: only reach elements of embedded lists via iterated indexing:

In [60]:
mylist = [list(range(5)), list(range(5,10))]
print(mylist)

print("\nmylist[0][1]:")
print(mylist[0][1])

print("\nmylist[0,1]:")
try:
    print(mylist[0,1])
except Exception as e:
    print(f"Exception ({type(e).__name__}):",   e)

print("\nmylist[[0,1]]:")
try:
    print(mylist[[0,1]])
except Exception as e:
    print(f"Exception ({type(e).__name__}):",   e)

[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]

mylist[0][1]:
1

mylist[0,1]:
Exception (TypeError): list indices must be integers or slices, not tuple

mylist[[0,1]]:
Exception (TypeError): list indices must be integers or slices, not list


Numpy arrays: also indexing scheme `[rows, columns]`

In [61]:
myarray = np.arange(10).reshape(2,-1)
print(myarray)

print("\nmyarray[0][1]:")
print(myarray[0][1])    ## <- column 1 of row 0

print("\nmyarray[0,1]:")
print(myarray[0,1]) ## <- row 0, column 1

print("\nmyarray[[0,1]]:")
print(myarray[[0,1]])   ## <- rows 0 and 1

[[0 1 2 3 4]
 [5 6 7 8 9]]

myarray[0][1]:
1

myarray[0,1]:
1

myarray[[0,1]]:
[[0 1 2 3 4]
 [5 6 7 8 9]]


Also works with slices

In [62]:
myarray = np.arange(20).reshape(5, 4)
myarray

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [63]:
i = 2
j = 1

print(f"Specifying only rows :{i}")
print(myarray[:i])
print()


print(f"Rows :{i}, Column {j}")
print(myarray[:i,j])
print()

print(f"Rows :{i}, Column :{j}")
print(myarray[:i,:j])
print()

i2 = 5
j2 = 3
print(f"Rows {i}:{i2}, Column {j}:{j2}")
print(myarray[i:i2,j:j2])
print()

Specifying only rows :2
[[0 1 2 3]
 [4 5 6 7]]

Rows :2, Column 1
[1 5]

Rows :2, Column :1
[[0]
 [4]]

Rows 2:5, Column 1:3
[[ 9 10]
 [13 14]
 [17 18]]



Pay attention to the shapes!
* single index -> rank reduction
* slice -> no rank reduction

#### Assigning new values

In [64]:
mylist = list(range(5))
myarray = np.arange(5)

print(mylist)
print(myarray)

[0, 1, 2, 3, 4]
[0 1 2 3 4]


In [65]:
try:
    mylist[1:3] = 100
except Exception as e:
    print(f"Exception ({type(e).__name__}):",   e)
print(mylist)

Exception (TypeError): can only assign an iterable
[0, 1, 2, 3, 4]


In [66]:
myarray[1:3] = 100
print(myarray)

[  0 100 100   3   4]


Regular lists: modifying slice leaves original intact

In [67]:
list_A = [1, 2, 3, 4]

list_B = list_A[:3]
list_B[0] = 100

print(list_A)
print(list_B)

[1, 2, 3, 4]
[100, 2, 3]


Numpy arrays: **modifying slice modifies original**

In [68]:
array_A = np.array([1, 2, 3, 4])

array_B = array_A[:3]
array_B[0] = 100

print(array_A)
print(array_B)

[100   2   3   4]
[100   2   3]


In [69]:
array_A = np.array([1, 2, 3, 4])

print("When making a copy...\n")
array_B = array_A[:3].copy()
array_B[0] = 100

print(array_A)
print(array_B)

When making a copy...

[1 2 3 4]
[100   2   3]


#### Operations

In [70]:
# Adding of list = Concatenation
[1,1,1] + [1,2,3]

[1, 1, 1, 1, 2, 3]

In [71]:
# Adding of NumPy Arrays = Elementwise addition
np.array([1,1,1]) + np.array([1,2,3])

array([2, 3, 4])

In [72]:
# To concatenate NumPy arrays:
np.concatenate((np.array([1,1,1]),
               np.array([1,2,3])))

array([1, 1, 1, 1, 2, 3])

In [73]:
a = np.array([1,1,1,2,2,2]).reshape(2,3)
b = np.array([3,3,3,4,4,4]).reshape(2,3)

for axis in [0,1]:
    print(f"Concatenate along the axis {axis}")
    print(np.concatenate((a,b),axis=axis))

Concatenate along the axis 0
[[1 1 1]
 [2 2 2]
 [3 3 3]
 [4 4 4]]
Concatenate along the axis 1
[[1 1 1 3 3 3]
 [2 2 2 4 4 4]]


Addition of an element to a list

In [74]:
a = [1, 1, 1]
print(a, "\t = the original list")
print("\n----------")
print("trying + 3...")
try:
    print(a+3)
except Exception as e:
    print(f"Exception ({type(e).__name__}):",   e)
print(a, "\t = the original")

print("\n----------")
print("append 3")
a.append(3)
print(a, "\t = the original")

[1, 1, 1] 	 = the original list

----------
trying + 3...
Exception (TypeError): can only concatenate list (not "int") to list
[1, 1, 1] 	 = the original

----------
append 3
[1, 1, 1, 3] 	 = the original


Appending an element to a numpy array doesn't update original array:

In [75]:
a = np.array([1,1,1])
print(a, "\t = the original")
print(np.append(a, 3), "\t = appending 3")
print(a, "\t = the original")

[1 1 1] 	 = the original
[1 1 1 3] 	 = appending 3
[1 1 1] 	 = the original


Addition to a NumPy array is elementwise addition

In [76]:
a = np.array([1,1,1])
print(a+3)

[4 4 4]


Multilication likewise

In [77]:
a = np.array([1,1,1])
print(a*3)

[3 3 3]


Multiplication vs. matrix multiplication/dot product:

In [78]:
a = np.array([1,2,3])
print(a * a)

[1 4 9]


In [79]:
a = np.array([1,2,3])
print(a.dot(a))
print((a * a).sum())

14
14


Broadcasting Example

In [80]:
a = np.arange(12).reshape(3,4)
b = np.array([10**x for x in range(4)])

print(f"a (shape {a.shape}):")
print(a)
print(f"\nb (shape {b.shape}):")
print(b)
print("\na + b:")
print(a + b)

a (shape (3, 4)):
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

b (shape (4,)):
[   1   10  100 1000]

a + b:
[[   1   11  102 1003]
 [   5   15  106 1007]
 [   9   19  110 1011]]


In [81]:
a = np.arange(12).reshape(4,3)
b = np.array([10**x for x in range(4)])

print(f"a (shape {a.shape}):")
print(a)
print(f"\nb (shape {b.shape}):")
print(b)
try:
    print(a+b)
except Exception as e:
    print(f'\nException ({type(e).__name__}) : {e}')

a (shape (4, 3)):
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]

b (shape (4,)):
[   1   10  100 1000]

Exception (ValueError) : operands could not be broadcast together with shapes (4,3) (4,) 


In [82]:
a = np.arange(12).reshape(4,3)
b = np.array([10**x for x in range(4)]).reshape(-1,1)

print(f"a (shape {a.shape}):")
print(a)
print(f"\nb (shape {b.shape}):")
print(b)
print("\na + b:")
print(a + b)

a (shape (4, 3)):
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]

b (shape (4, 1)):
[[   1]
 [  10]
 [ 100]
 [1000]]

a + b:
[[   1    2    3]
 [  13   14   15]
 [ 106  107  108]
 [1009 1010 1011]]


#### Local Operators

~, |, &, ^ are elementwise, just like arithmetic operations.

In [83]:
a = np.array([1,0,0]).astype('bool')
b = np.array([1,1,0]).astype('bool')

print('a :',a)
print('b :',b)
print()
print("~ a as NOT:\t", ~ a)
print("~ b as NOT:\t", ~ b)
print("a & b as AND:\t", a & b)
print("a | b as OR:\t", a | b)
print("a ^ b as XOR:\t", a ^ b)

a : [ True False False]
b : [ True  True False]

~ a as NOT:	 [False  True  True]
~ b as NOT:	 [False False  True]
a & b as AND:	 [ True False False]
a | b as OR:	 [ True  True False]
a ^ b as XOR:	 [False  True False]


Caution: the usual "not", "or", "and" **cannot perform elementwise** operations, but expect single values (ValueError for size >1 arrays)!

In [84]:
a = np.array([1, 0, 0]).astype("bool")
b = np.array([1, 1, 0]).astype("bool")

print("a:", a)
print("b:", b)
print()

print("not a:\n")
try:
    print(not a)    
except Exception as e:
    print(f"Exception ({type(e).__name__}):",   e)

print("\na and b:\n")
try:
    print(a and b)    
except Exception as e:
    print(f"Exception ({type(e).__name__}):",   e)

a: [ True False False]
b: [ True  True False]

not a:

Exception (ValueError): The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

a and b:

Exception (ValueError): The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()


Other Comparisons

In [85]:
a = np.arange(6).reshape(2,3)
b = np.array([[0, 1, 4], [3, 3, 5]])

print(a, "\ta\n")
print(b, "\tb\n")
print(a==b, "\ta == b\n")
print(a>b, "\ta > b\n")
print(a>=b, "\ta >= b\n")

[[0 1 2]
 [3 4 5]] 	a

[[0 1 4]
 [3 3 5]] 	b

[[ True  True False]
 [ True False  True]] 	a == b

[[False False False]
 [False  True False]] 	a > b

[[ True  True False]
 [ True  True  True]] 	a >= b



`.all()` and `.any()`

In [86]:
print('All Elements The Same :', (a==b).all())
print('Any Elements The Same :', (a==b).any())

All Elements The Same : False
Any Elements The Same : True


Checking along specific axes: rows or columns in a 2D-array. As we have seen:

* axis 0 = rows
* axis 1 = columns

In [87]:
# Rows
print('All Elements The Same :', (a==b).all(0))
print('All Elements The Same :', (a==b).all(1))


# Columns
print('Any Elements The Same :', (a==b).any(0))
print('Any Elements The Same :', (a==b).any(1))

All Elements The Same : [ True False False]
All Elements The Same : [False False]
Any Elements The Same : [ True  True  True]
Any Elements The Same : [ True  True]


Element-wise comparisons -> make sure the arrays are of the same shape (or broadcastable to the same shape)!
-> Will raise a ValueError (for ==, in future versions) otherwise.

#### Comparison along dtypes


As in ndarray creation, numpy will try to force elements into a common type, _if possible_.

In [88]:
a = np.ones(4).astype("int")
b = np.ones(4).astype("float")
c = np.ones(4).astype("bool")

print(f"a ({a.dtype}):\t", a)
print(f"b ({b.dtype}):\t", b)
print(f"c ({c.dtype}):\t", c)

print()

print("a == b:", a == b)
print("a == c:", a == c)
print("b == c:", b == c)

a (int32):	 [1 1 1 1]
b (float64):	 [1. 1. 1. 1.]
c (bool):	 [ True  True  True  True]

a == b: [ True  True  True  True]
a == c: [ True  True  True  True]
b == c: [ True  True  True  True]


In [89]:
a = np.full(4,2).astype("int")
b = np.ones(4).astype("float")
c = np.ones(4).astype("bool")

print(f"a ({a.dtype}):\t", a)
print(f"b ({b.dtype}):\t", b)
print(f"c ({c.dtype}):\t", c)

print()

print("a == b:", a == b)
print("a == c:", a == c)
print("b == c:", b == c)

a (int32):	 [2 2 2 2]
b (float64):	 [1. 1. 1. 1.]
c (bool):	 [ True  True  True  True]

a == b: [False False False False]
a == c: [False False False False]
b == c: [ True  True  True  True]


In [90]:
a = np.array(["True"]*4, dtype="object")
b = np.array(["True"]*4, dtype="<U4")
c = np.ones(4).astype("bool")

print(f"a ({a.dtype}):\t", a)
print(f"b ({b.dtype}):\t", b)
print(f"c ({c.dtype}):\t", c)

print()

print("a == b:", a == b)
print("a == c:", a == c)
print("b == c:", b == c)

a (object):	 ['True' 'True' 'True' 'True']
b (<U4):	 ['True' 'True' 'True' 'True']
c (bool):	 [ True  True  True  True]

a == b: [ True  True  True  True]
a == c: [False False False False]
b == c: False


  print("b == c:", b == c)


#### Array-Level equality

In [91]:
a = np.arange(6).reshape(2,3)
b = np.arange(8).reshape(2,4)

print(a, "\ta\n")
print(b, "\tb\n")

np.array_equal(a,b)

[[0 1 2]
 [3 4 5]] 	a

[[0 1 2 3]
 [4 5 6 7]] 	b



False

#### Aggregations

In [92]:
a = np.arange(10,15)
print("Summation ->\t", a.sum())
print('Mean (Med) ->\t', a.mean())
print('Product ->\t', a.prod())
print('StdDev ->\t', a.std())
print('Variance ->\t', a.var())
print('Minimum ->\t', a.min())
print('Min Idx ->\t', a.argmin())
print('Maximum ->\t', a.max())
print('Max Idx->\t', a.argmax())

Summation ->	 60
Mean (Med) ->	 12.0
Product ->	 240240
StdDev ->	 1.4142135623730951
Variance ->	 2.0
Minimum ->	 10
Min Idx ->	 0
Maximum ->	 14
Max Idx->	 4


#### Boolean Indexing
array B can be used as a mask on an array A of the same shape such that values in True positions are kept, while values in False positions are not.

In [93]:
a = np.arange(10,14)
b = np.array([True,False,True,True])
print(a[b])

[10 12 13]


In [94]:
a = np.arange(1,6)
print(a[a >= 3])

[3 4 5]


Results are 1D arrays:

In [95]:
a = np.arange(6).reshape(2, 3)
b = np.array([[True, True, False],
              [True, True, False]])

print("a:\n", a)
print("\nb:\n", b)

print("\na[b]:")
print(a[b])

print("\na[a < 5]:")
print(a[b])

a:
 [[0 1 2]
 [3 4 5]]

b:
 [[ True  True False]
 [ True  True False]]

a[b]:
[0 1 3 4]

a[a < 5]:
[0 1 3 4]


We can also **index** arrays with boolean arrays:
* **0th** axis of the boolean array **indexes rows** (0th axis)
* **1st** axis of the boolean array **indexes columns** (1st axis)

In [96]:
print("Indexing rows...\n")
b = np.array([True, True, False])
print(f"b (shape {b.shape}) as an indexer:\n", b)

for a in [np.arange(6).reshape(3,2),
          np.arange(6).reshape(2,3)]:
    print("\n------------")
    print(f"a (shape {a.shape}):\n", a)

    print("\nIndexing rows --- a[b]:")
    try:
        print(a[b])
    except Exception as e:
        print(f"Exception ({type(e).__name__}):",   e)

Indexing rows...

b (shape (3,)) as an indexer:
 [ True  True False]

------------
a (shape (3, 2)):
 [[0 1]
 [2 3]
 [4 5]]

Indexing rows --- a[b]:
[[0 1]
 [2 3]]

------------
a (shape (2, 3)):
 [[0 1 2]
 [3 4 5]]

Indexing rows --- a[b]:
Exception (IndexError): boolean index did not match indexed array along dimension 0; dimension is 2 but corresponding boolean dimension is 3


In [97]:
print("Indexing columns...\n")
b = np.array([True, True, False])
print(f"b (shape {b.shape}) as an indexer:\n", b)

for a in [np.arange(6).reshape(3,2),
          np.arange(6).reshape(2,3)]:
    print("\n------------")
    print(f"a (shape {a.shape}):\n", a)

    print("\nIndexing columns --- a[:, b]:")
    try:
        print(a[:, b])
    except Exception as e:
        print(f"Exception ({type(e).__name__}):",   e)


Indexing columns...

b (shape (3,)) as an indexer:
 [ True  True False]

------------
a (shape (3, 2)):
 [[0 1]
 [2 3]
 [4 5]]

Indexing columns --- a[:, b]:
Exception (IndexError): boolean index did not match indexed array along dimension 1; dimension is 2 but corresponding boolean dimension is 3

------------
a (shape (2, 3)):
 [[0 1 2]
 [3 4 5]]

Indexing columns --- a[:, b]:
[[0 1]
 [3 4]]


#### Empty Values

When using the regular Python NoneType object None in an array:

In [98]:
np.array([1, None, 1])

array([1, None, 1], dtype=object)

The float type empty value is `np.NaN` ("not a number"):

In [99]:
np.array([1, np.NaN, 1])

array([ 1., nan,  1.])

Missing values when applying conditions...

In [100]:
print("Is np.NaN < 3?")
print(np.NaN < 3)
print("\nIs np.NaN >= 3?")
print(np.NaN >= 3)

Is np.NaN < 3?
False

Is np.NaN >= 3?
False


In [101]:
a = np.array([1, np.NaN, 1, 3])
print("a:\t", a, "\n")

print("a[a < 2]:\t", a[a < 2])
print("a[a >= 2]:\t", a[a >= 2])
print("a[~(a < 2)]:\t", a[~(a < 2)])

a:	 [ 1. nan  1.  3.] 

a[a < 2]:	 [1. 1.]
a[a >= 2]:	 [3.]
a[~(a < 2)]:	 [nan  3.]
