In [1]:
import numpy as np

# 1 Fancy Indexing
Can index a numpy array using another numpy array, a python list, or a sequence of integers

### With another numpy array

In [2]:
arr = np.arange(1,20)
print(arr)
indices = np.array([3,5,1])
print(arr[indices])

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[4 6 2]


### 1.1 Basic Slicing: 
In arr[idx], idx is the slice object, and it can be of the form start : stop : step, integer, or tuple of slice objects and integers.

In [3]:
# slice object of start: stop : step
print(arr[2:10:2])
print(arr[0:10:2])
print(arr[-8:17:1])

[3 5 7 9]
[1 3 5 7 9]
[12 13 14 15 16 17]


Using ellipsis: ... serves as however many : are needed for a selection tuple matching the dimensions of the array

In [4]:
print(arr[...])
arr_3d = np.array([[[1,2,3], [4,5,6]], [[11,12,13], [14,15,16]], 
                  [[21,22,23],[24,25,26]], [[31,32,344], [34,35,36]]])
print(arr_3d[2,...])

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[[21 22 23]
 [24 25 26]]


In [5]:
print(arr[::-1]) # negative index reverses order
print(arr[-1:0:-2])
print(arr[2:8:-1])
print(arr[8:2:-1])

[19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1]
[19 17 15 13 11  9  7  5  3]
[]
[9 8 7 6 5 4]


### 1.2 Advanced indexing
In arr[idx], wher idx is an integer or boolean ndarray, a tuple containing a sequence object, or a non-tuple sequence object.

Returns copy of data, not a view of it

##### 1.2.1 Purely integer indexing

All the first dim, all the second dim, etc.

In [6]:
arr3d = np.array([[[111, 112, 113, 114], [121, 122, 123, 124]],
                  [[211, 212, 213, 214], [221, 222, 223, 224]],
                  [[311, 312, 313, 314], [321, 322, 323, 324]],
                  [[411, 412, 413, 414], [421, 422, 423, 244]],
                  [[511, 512, 513, 514], [521, 522, 523, 524]]])
print(arr3d.shape)
print('arr3d[[2], [1]]:\t', arr3d[[2], [1]])
print('ar3d[[2], [1], [0]]:\t', arr3d[[2], [1], [0]])
print('arr_3d[[2], [1], [0]]:\t', arr3d[[2,], [1], [0]])
print('arr_3d[[2, 0, 0], [1,0,3], [4,0,2]]:\t', arr3d[[2, 0, 4], [1,0,0], [3,3,2]])


try: 
    print('arr_3d[[2, 0, 0], [1,0,3], [4,0,2], [0,0,0]]:\t', arr3d[[2, 0, 4], [1,0,0], [3,3,2], [0,0,0]])
except IndexError:
    print('Should only have one [] for each dimension, \
but you can have unlimited integers inside that []')

print('arr3d[[2,0,4,4,4], [1,0,0,1,2,3], [3,3,2,2,1,1]]:\t', arr3d[[2,0,4,4,4,4], [1,0,0,1,0,1], [3,3,2,2,1,1]])


(5, 2, 4)
arr3d[[2], [1]]:	 [[321 322 323 324]]
ar3d[[2], [1], [0]]:	 [321]
arr_3d[[2], [1], [0]]:	 [321]
arr_3d[[2, 0, 0], [1,0,3], [4,0,2]]:	 [324 114 513]
Should only have one [] for each dimension, but you can have unlimited integers inside that []
arr3d[[2,0,4,4,4], [1,0,0,1,2,3], [3,3,2,2,1,1]]:	 [324 114 513 523 512 522]


##### 1.2.2 Boolean indexing
Used for filtering, the index is a boolean expression. Only elements that satisfy that expression are returned.


In [7]:
print(arr[arr%2==1]) # select odd values
# breaking it down
arr_bools = arr%2==1 # gives array of T/F values depending on if each element meets the condition
print(arr_bools)
print(arr[arr_bools]) # just returns elements corresponding to True in the condition array

[ 1  3  5  7  9 11 13 15 17 19]
[ True False  True False  True False  True False  True False  True False
  True False  True False  True False  True]
[ 1  3  5  7  9 11 13 15 17 19]


In [8]:
# get only the rows (of 3rd dim) with a sum >1500
# print(arr3d)
print(arr3d.shape)
sums = arr3d.sum(axis=2)
print(sums.shape)
print(sums)
print(arr3d[sums>=1500])

(5, 2, 4)
(5, 2)
[[ 450  490]
 [ 850  890]
 [1250 1290]
 [1650 1510]
 [2050 2090]]
[[411 412 413 414]
 [421 422 423 244]
 [511 512 513 514]
 [521 522 523 524]]


In [9]:
# only the rows with a sum < 1200
print(arr3d[sums<=1200])

[[111 112 113 114]
 [121 122 123 124]
 [211 212 213 214]
 [221 222 223 224]]


In [10]:
# (1st dim) columns with sums < val (for all of 3rd dim)
sums2 = arr3d.sum(axis=0)
print(sums2.shape)
print(sums2)
print(arr3d[:,sums2<=1565])

(2, 4)
[[1555 1560 1565 1570]
 [1605 1610 1615 1440]]
[[111 112 113 124]
 [211 212 213 224]
 [311 312 313 324]
 [411 412 413 244]
 [511 512 513 524]]


# 2 Shaping multidimensional arrays

### 2.1 np.reshape

In [11]:
arr_r = np.arange(20,40,2)
print(arr_r)


[20 22 24 26 28 30 32 34 36 38]


In [12]:
try: 
    print(arr_r.reshape(5,3))
except ValueError:
    print("new shape needs to have same total elements")
print('(5,2):\n', arr_r.reshape(5,2))
print('(2,5):\n', arr_r.reshape(2,5))

new shape needs to have same total elements
(5,2):
 [[20 22]
 [24 26]
 [28 30]
 [32 34]
 [36 38]]
(2,5):
 [[20 22 24 26 28]
 [30 32 34 36 38]]


In [13]:
print('(2,5):\n', arr_r.reshape(2,5)) 
print('(1,1,1,2,5):\n', arr_r.reshape(1,1,1,2,5)) 
# print('(2,1,1,5):\n', arr_r.reshape(2,1,5))
# but a 1 in the middle add's an extra axis

(2,5):
 [[20 22 24 26 28]
 [30 32 34 36 38]]
(1,1,1,2,5):
 [[[[[20 22 24 26 28]
    [30 32 34 36 38]]]]]


In [14]:
print('(2,5,1,1):\n', arr_r.reshape(2,5,1,1)) 

(2,5,1,1):
 [[[[20]]

  [[22]]

  [[24]]

  [[26]]

  [[28]]]


 [[[30]]

  [[32]]

  [[34]]

  [[36]]

  [[38]]]]


In [15]:

print('(5,1,2):\n', arr_r.reshape(2,1,5))
print('(2,5,1):\n', arr_r.reshape(2,1,5))

(5,1,2):
 [[[20 22 24 26 28]]

 [[30 32 34 36 38]]]
(2,5,1):
 [[[20 22 24 26 28]]

 [[30 32 34 36 38]]]


unpack existing shape with *

In [16]:
print(arr_3d.reshape(1, *arr_3d.shape))
print(*arr_3d.shape) # just takes it out of parentheses
print(arr_3d.shape)

[[[[  1   2   3]
   [  4   5   6]]

  [[ 11  12  13]
   [ 14  15  16]]

  [[ 21  22  23]
   [ 24  25  26]]

  [[ 31  32 344]
   [ 34  35  36]]]]
4 2 3
(4, 2, 3)


In [17]:
print(arr_3d.shape[0])
try:
    print(*arr_3d.shape[0]) # but no longer iterable
except TypeError:
    print('* makes shape not iterable')

4
* makes shape not iterable


### np.newaxis
does same as reshaping with a 1 where the new axis is (and other dims same)

In [18]:
print(arr_r.reshape(1,10))
print((arr_r.reshape(10,1)))

[[20 22 24 26 28 30 32 34 36 38]]
[[20]
 [22]
 [24]
 [26]
 [28]
 [30]
 [32]
 [34]
 [36]
 [38]]


In [19]:
print(arr_r[np.newaxis,:]) # same as reshape starting with 1
print(arr_r[:,np.newaxis]) # same as reshape ending with 1

[[20 22 24 26 28 30 32 34 36 38]]
[[20]
 [22]
 [24]
 [26]
 [28]
 [30]
 [32]
 [34]
 [36]
 [38]]


In [20]:
print(arr_3d.shape)
print(arr_3d)

(4, 2, 3)
[[[  1   2   3]
  [  4   5   6]]

 [[ 11  12  13]
  [ 14  15  16]]

 [[ 21  22  23]
  [ 24  25  26]]

 [[ 31  32 344]
  [ 34  35  36]]]


In [21]:
print(np.array_equal(arr_3d.reshape(4,2,1,3), arr_3d[:,:,np.newaxis,:]))

True


In [22]:
print(arr_3d[0].reshape(6))

[1 2 3 4 5 6]


### 2.3 np.expand_dims()
np.expand_dims(array, index of new dim)

In [23]:
print(np.array_equal(np.expand_dims(arr_3d, 2), 
                     arr_3d[:,:,np.newaxis,:]))
# expand_dims and newaxis don't require specifying size

True


### 2.4 Flattening 
##### np.ravel() / arr.ravel()
* Returns a view of the array in 1d (making it faster than flatten)
* Can specify order of flatten or ravel with 'A', 'C', 'F', K' 
* All those work for reshape too except 'K'.

In [24]:
print(arr_3d.ravel())
print(np.ravel(arr_3d))

[  1   2   3   4   5   6  11  12  13  14  15  16  21  22  23  24  25  26
  31  32 344  34  35  36]
[  1   2   3   4   5   6  11  12  13  14  15  16  21  22  23  24  25  26
  31  32 344  34  35  36]


##### reshape(-1)
returns a view (if possible)

In [25]:
print(arr_3d.reshape(-1))
print(np.reshape(arr_3d, -1))

[  1   2   3   4   5   6  11  12  13  14  15  16  21  22  23  24  25  26
  31  32 344  34  35  36]
[  1   2   3   4   5   6  11  12  13  14  15  16  21  22  23  24  25  26
  31  32 344  34  35  36]


##### flatten()
always returns a copy, this also makes it slower because it needs to allocate new space

In [26]:
print(arr_3d.flatten())

[  1   2   3   4   5   6  11  12  13  14  15  16  21  22  23  24  25  26
  31  32 344  34  35  36]


In [27]:
flat_3d = arr_3d.flatten()
print(flat_3d)
print(arr_3d)

[  1   2   3   4   5   6  11  12  13  14  15  16  21  22  23  24  25  26
  31  32 344  34  35  36]
[[[  1   2   3]
  [  4   5   6]]

 [[ 11  12  13]
  [ 14  15  16]]

 [[ 21  22  23]
  [ 24  25  26]]

 [[ 31  32 344]
  [ 34  35  36]]]


use ndarray.base to determine if view or copy

In [28]:
shape_3d  = arr_3d.reshape(-1)
# print(shape_3d)
print(shape_3d.base)
print(arr_3d.base)
print(flat_3d.base)


[[[  1   2   3]
  [  4   5   6]]

 [[ 11  12  13]
  [ 14  15  16]]

 [[ 21  22  23]
  [ 24  25  26]]

 [[ 31  32 344]
  [ 34  35  36]]]
None
None


# 3 Views vs Copies
Views: 
* reshape()
* ravel()
* basic index of range
* view()

Copies: 
* flatten()
* fancy indexing 
* copy()
* basic index of one values

In [29]:
original = np.array([1,2,3])
reshaped = np.reshape(original, -1)
raveled = original.ravel()
flattened = original.flatten()
basicindexedrange = original[0:2]
basicindexedcolon = original[:]
basicindex1 = original[0]
fancyindexed = original[original%1==0]

original[0] = 5
print('reshaped', reshaped) # reshaped is just a view
print('flattened', flattened) # flattened is a copy
print('raveled', raveled)
print('basicindexedrange', basicindexedrange)
print('basicindexedcolon', basicindexedcolon)
print('basicindex1', basicindex1)
print('fancyindexed', fancyindexed) # returned a copy

reshaped [5 2 3]
flattened [1 2 3]
raveled [5 2 3]
basicindexedrange [5 2]
basicindexedcolon [5 2 3]
basicindex1 1
fancyindexed [1 2 3]


### Determine if something is a copy using np.shares_memory()
shares = True if view

In [30]:
print('reshaped', np.shares_memory(original, reshaped))
print('flattened', np.shares_memory(original, flattened))
print('raveled', np.shares_memory(original, raveled))
print('basicindexedrange', np.shares_memory(original, basicindexedrange))
print('basicindexedcolon', np.shares_memory(original, basicindexedcolon))
print('basicindex1', np.shares_memory(original, basicindex1))
print('fancyindexed', np.shares_memory(original, fancyindexed))

reshaped True
flattened False
raveled True
basicindexedrange True
basicindexedcolon True
basicindex1 False
fancyindexed False


2 views of same thing also return True

In [31]:
print(np.shares_memory(basicindexedrange, raveled))

True


may_share_memory() checks if memory address ranges overlap, not if elemetns reference the same memory

In [32]:
print(np.may_share_memory(original[::2], original[1::2]))
print(np.shares_memory(original[::2], original[1::2]))
print(original[::2]) # double colons indicates start:stop:step indexing
print(original[1::2])
print(original)

True
False
[5 3]
[2]
[5 2 3]


### Determine if something is a copy using ndarray.base
None if it owns its memory

In [33]:
print('reshaped', reshaped.base) # reshaped is just a view
print('flattened', flattened.base) # flattened is a copy
print('raveled', raveled.base)
print('basicindexedrange', basicindexedrange.base)
print('basicindexedcolon', basicindexedcolon.base)
print('basicindex1', basicindex1.base)
print('fancyindexed', fancyindexed.base) # returned a copy

reshaped [5 2 3]
flattened None
raveled [5 2 3]
basicindexedrange [5 2 3]
basicindexedcolon [5 2 3]
basicindex1 None
fancyindexed None


# 4 Math with ndarrays

math function of a (1xN) array and a (Mx1) array produces an NxM array

In [35]:
x_arr = np.array([[1,2,3]])
print('x_arr', x_arr.shape)
y_arr = np.array([[1],[10],[100],[1000]])
print('y_arr', y_arr.shape)
print(x_arr*y_arr, '\nshape', (y_arr*x_arr).shape)


x_arr (1, 3)
y_arr (4, 1)
[[   1    2    3]
 [  10   20   30]
 [ 100  200  300]
 [1000 2000 3000]] 
shape (4, 3)
