# Indexing and slicing and iterating in NumPy

In [1]:
# import numpy
import numpy as np

In [2]:
# create a 1d array
x = np.linspace(0,9,10)

x[1]                     # just the second entry, remember 0 based indexing

# specific start and stop points (exclusive)
x[0:2]                   # the first and second entries in the array, so N>=0 and N<2 (note the < upper bound - not inclusive)

# assign the 2nd - 4th element to 5 (index 1,2,3)
x[1:4] = 100               
print(x[1:4])

# start, stop, step interval
print(x[0:8:2])

# reverse x
print(x[::-1])

# iterate over all elements in x
for i in x:
    print(i*3)    # then i takes the value of each element in x

[ 100.  100.  100.]
[   0.  100.    4.    6.]
[   9.    8.    7.    6.    5.    4.  100.  100.  100.    0.]
0.0
300.0
300.0
300.0
12.0
15.0
18.0
21.0
24.0
27.0


## multidimentional array indexing, slicing etc

In [6]:
x = np.round(np.random.rand(10,5)*10)   # generate a matrix of uniformly distributed random numbers over 0:10
print(x)

x[0,0]     # first row, first column
x[2,3]     # third row, 4th column

x[:, 3]    # all entries in the 4th column 
x[3, :]    # all entries in the 4th row
x[0:2, 4]  # first two entries of the 5th column
x[6, 2:4]  # 7th row, 3rd and 4th entries. 

x[6]       # if not all dims specified then missing values are considered complete slices
x[6,]      # these three ways of writing all do the same thing...
x[6,:]

# trick
print('last row: ', x[-1,:])     # last row
print('last column: ', x[:,-1])  # last column
print('last entry: ', x[-1,-1])  # last value

# iterating goes over the first dim (rows)
for r in x:
     print(r)
        
# can also iterate over all entries in the array using 'flat'
# will proceed along 1st row, then to 2nd row, etc. 
for a in x.flat:
    print(a,end=' ')

[[  3.   6.   9.   9.   9.]
 [  6.   3.   0.   5.   4.]
 [  3.   2.   6.   6.   2.]
 [  6.   4.  10.   7.   8.]
 [  9.   4.   7.   1.   1.]
 [  5.   5.   1.   5.   0.]
 [  3.   8.   6.   3.   3.]
 [  7.   9.  10.   4.   2.]
 [  3.   2.   3.   3.   9.]
 [  1.   6.   3.   6.   5.]]
last row:  [ 1.  6.  3.  6.  5.]
last column:  [ 9.  4.  2.  8.  1.  0.  3.  2.  9.  5.]
last entry:  5.0
[ 3.  6.  9.  9.  9.]
[ 6.  3.  0.  5.  4.]
[ 3.  2.  6.  6.  2.]
[  6.   4.  10.   7.   8.]
[ 9.  4.  7.  1.  1.]
[ 5.  5.  1.  5.  0.]
[ 3.  8.  6.  3.  3.]
[  7.   9.  10.   4.   2.]
[ 3.  2.  3.  3.  9.]
[ 1.  6.  3.  6.  5.]
3.0 6.0 9.0 9.0 9.0 6.0 3.0 0.0 5.0 4.0 3.0 2.0 6.0 6.0 2.0 6.0 4.0 10.0 7.0 8.0 9.0 4.0 7.0 1.0 1.0 5.0 5.0 1.0 5.0 0.0 3.0 8.0 6.0 3.0 3.0 7.0 9.0 10.0 4.0 2.0 3.0 2.0 3.0 3.0 9.0 1.0 6.0 3.0 6.0 5.0 

## pull out subset of rows and columns

In [7]:
# generate a matrix of random numbers over 0-1
x = np.random.rand(4,3) 
print(x)

# first two rows - note that you don't have to specify the 2nd dim - and note that 
# '2' here means rows 0 and 1 (not 0 through 2!)
y = x[:2] 
print('\n', y)

# can also take the last two rows...in the same manner...in this case rows 3 and 4
y = x[2:] 
print('\n', y)

# first two rows, 1st column
y = x[:2,0] 
print('\n', y)

# last two rows, last 2 columns (i.e. from column 2 - end)
y = x[2:,1:]
print('\n', y)

[[ 0.70082374  0.01064899  0.02062295]
 [ 0.81786869  0.25950106  0.07827977]
 [ 0.30824358  0.20858188  0.47875572]
 [ 0.40931346  0.9388113   0.84343883]]

 [[ 0.70082374  0.01064899  0.02062295]
 [ 0.81786869  0.25950106  0.07827977]]

 [[ 0.30824358  0.20858188  0.47875572]
 [ 0.40931346  0.9388113   0.84343883]]

 [ 0.70082374  0.81786869]

 [[ 0.20858188  0.47875572]
 [ 0.9388113   0.84343883]]


<div class="alert alert-info">
important - slicing an array creates a view of it! if you change the view, you also will change the original data!
</div>

In [13]:
z = x[:2,]
print(z.shape)

z[:]=100     # so if you change data in z it will also change in x

print(x)

(2, 3)
[[ 100.  100.  100.]
 [ 100.  100.  100.]
 [ 100.  100.  100.]
 [ 100.  100.  100.]]


## Fancy indexing...using arrays to index arrays - used all the time in data analysis...

<div class="alert alert-info">
fancy indexing always makes a COPY of the data (unlike slicing which creates a view)!!!
</div>

In [14]:
# define an array
x = np.random.rand(3,4)

# index array - can be a tuple
y = (2,3)

# index
print(x)
print('\n x indexed at tuple y: ', x[y])

[[ 0.84383997  0.41700925  0.28863538  0.52646956]
 [ 0.91219572  0.08924574  0.18205183  0.29823305]
 [ 0.03647543  0.09029493  0.14901326  0.43216867]]

 x indexed at tuple y:  0.432168666868


In [19]:
# can use fancy indexing to extract elements in a particular order
print(x)

# this will extract the 3rd row, then the 2nd row, then the first row
x[[2,1,0]]
x[::-1]

# this will extract all columns from the 3rd row, then the 1st row, then the 2nd row
# x[[2,0,1]]

# and this will extract all rows from the 2nd, 3rd and then 1st column. 
# x[:,[1,2,0]]

[[ 0.84383997  0.41700925  0.28863538  0.52646956]
 [ 0.91219572  0.08924574  0.18205183  0.29823305]
 [ 0.03647543  0.09029493  0.14901326  0.43216867]]


array([[ 0.03647543,  0.09029493,  0.14901326,  0.43216867],
       [ 0.91219572,  0.08924574,  0.18205183,  0.29823305],
       [ 0.84383997,  0.41700925,  0.28863538,  0.52646956]])

In [20]:
# or can pass in multiple arrays...will return a 1D array 
# corresponding to each set of tuples (1,1) and (2,2) in this case
print(x)
x[[1,2],[1,2]]

[[ 0.84383997  0.41700925  0.28863538  0.52646956]
 [ 0.91219572  0.08924574  0.18205183  0.29823305]
 [ 0.03647543  0.09029493  0.14901326  0.43216867]]


array([ 0.08924574,  0.14901326])

<div class="alert alert-info">
As opposed to selecting a set of tuples, you can also select a block of indices from a matrix. 
</div>

In [26]:
# grab the lower right chunk of data. 
print(x,'\n')
# print(x[[1,2]][:,[2,3]])

print(x[:2,2:4])
# print(x[[1,2][:,[2,3]]])

[[ 0.84383997  0.41700925  0.28863538  0.52646956]
 [ 0.91219572  0.08924574  0.18205183  0.29823305]
 [ 0.03647543  0.09029493  0.14901326  0.43216867]] 

[[ 0.28863538  0.52646956]
 [ 0.18205183  0.29823305]]


TypeError: list indices must be integers or slices, not tuple