## Task
Explore indexing and slicing in NumPy

## Notebook Summary
* Basic indexing
* Assigning values
* Boolean indexing
* Fancy indexing
* `take`, `put`

## References
* *Python for Data Analysis*, Wes McKinney, O'Reilly, 2012
* *Numerical Python*, Robert Johansson, APress, 2015
* *Python Data Science Handbook*, Jake VanderPlas, O'Reilly, 2016


In [2]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import platform
print 'python.version = ', platform.python_version()
import IPython
print 'ipython.version =', IPython.version_info

import numpy as np
print 'numpy.version = ', np.__version__


python.version =  2.7.10
ipython.version = (5, 1, 0, '')
numpy.version =  1.11.3


In [2]:
# Basic indexing & slicing

myarr = np.arange(10)
myarr
myarr[1]
myarr[[-3,-2,-1]]
myarr[1:6:2]
myarr[::-1] # reverse elements

print '---'

myslice = myarr[4:6] # this is a view, not a copy
myslice
myslice[:] = -999
myslice
myarr # original array is also modified

print '---'

myslice = myarr[4:6].copy()
myslice[:] = -1
myslice
myarr # original array is not modified

print '---'

myarr = np.arange(24).reshape(6,4)
myarr

myarr[5], myarr[5].shape
myarr[4:,:], myarr[4:,:].shape

print '---'

myarr[0,1]

myarr[:,1]
myarr[:,[2,3]]
myarr[:,:3], myarr[:,:3].shape

myarr2 = myarr.copy()
myarr2[:,[2,3]] = -99
myarr2

myarr[1,:]
myarr[[2,3],:]
myarr[[0,1,2],2:]
myarr[:2, 2:], myarr[:2, 2:].shape

print '---'

myarr[2,2]
myarr[2][2]

# Need to mix integer and slices to get lower dimensional slices
# See Fig 4.2 on pg 90 in 'Python for Data Analysis'



array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

1

array([7, 8, 9])

array([1, 3, 5])

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

---


array([4, 5])

array([-999, -999])

array([   0,    1,    2,    3, -999, -999,    6,    7,    8,    9])

---


array([-1, -1])

array([   0,    1,    2,    3, -999, -999,    6,    7,    8,    9])

---


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

(array([20, 21, 22, 23]), (4,))

(array([[16, 17, 18, 19],
        [20, 21, 22, 23]]), (2, 4))

---


1

array([ 1,  5,  9, 13, 17, 21])

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15],
       [18, 19],
       [22, 23]])

(array([[ 0,  1,  2],
        [ 4,  5,  6],
        [ 8,  9, 10],
        [12, 13, 14],
        [16, 17, 18],
        [20, 21, 22]]), (6, 3))

array([[  0,   1, -99, -99],
       [  4,   5, -99, -99],
       [  8,   9, -99, -99],
       [ 12,  13, -99, -99],
       [ 16,  17, -99, -99],
       [ 20,  21, -99, -99]])

array([4, 5, 6, 7])

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

array([[ 2,  3],
       [ 6,  7],
       [10, 11]])

(array([[2, 3],
        [6, 7]]), (2, 2))

---


10

10

In [21]:
# Assigning values

arr = np.arange(9, dtype='int').reshape(3,3)
arr

arr[0,0] = 3.14 # assigning float to int array silently truncates float!
arr

# slices are views of original array
sl = arr[:2, :2]
sl
sl[0,0] = 9999
sl

arr

# create copies to avoid this behavior
sl = arr[:2,:2].copy()
sl
sl[0,0] = -1
sl
arr # not modified


array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

array([[3, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

array([[3, 1],
       [3, 4]])

array([[9999,    1],
       [   3,    4]])

array([[9999,    1,    2],
       [   3,    4,    5],
       [   6,    7,    8]])

array([[9999,    1],
       [   3,    4]])

array([[-1,  1],
       [ 3,  4]])

array([[9999,    1,    2],
       [   3,    4,    5],
       [   6,    7,    8]])

In [3]:
# Boolean indexing - index with arrays of True & False

myarr = np.arange(20)
myarr
idx = myarr % 4 == 0
idx
myarr[idx]
myarr[myarr % 4 == 0]

print '---'

myarr = np.arange(32).reshape(8,4)
myarr

# statement below will not return 1st & 3rd cols because True, False are interpreted as 1, 0 
# resulting in myarr[:,[1,0,1,0]]
myarr[:,[True, False, True, False]] 
# instead, do this
myarr[:,np.array([True, False, True, False])]

print '---'

myarr = np.random.randn(24).reshape(6,4)
myarr[myarr<0] = 0
myarr = myarr.round(2)
myarr[1] = 7
myarr


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

array([ True, False, False, False,  True, False, False, False,  True,
       False, False, False,  True, False, False, False,  True, False,
       False, False], dtype=bool)

array([ 0,  4,  8, 12, 16])

array([ 0,  4,  8, 12, 16])

---


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])



array([[ 1,  0,  1,  0],
       [ 5,  4,  5,  4],
       [ 9,  8,  9,  8],
       [13, 12, 13, 12],
       [17, 16, 17, 16],
       [21, 20, 21, 20],
       [25, 24, 25, 24],
       [29, 28, 29, 28]])

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14],
       [16, 18],
       [20, 22],
       [24, 26],
       [28, 30]])

---


array([[ 0.  ,  0.75,  0.  ,  0.  ],
       [ 7.  ,  7.  ,  7.  ,  7.  ],
       [ 0.4 ,  0.  ,  0.49,  0.  ],
       [ 0.  ,  0.93,  0.19,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.25,  0.  ,  0.  ]])

In [4]:
# Fancy indexing - indexing using integer arrays / lists / seq of integers

myarr = np.arange(32).reshape(8,4)
myarr

myarr[[0,1],:]
myarr[[0,1]]
myarr[np.array([0,1])]
myarr[[1,0,1],:]

myslice = myarr[[0,1],:] # fancy indexing returns a copy, not a view
myslice[:] = 99
myslice
myarr
myarr[[0,1],:] = 99 # assign values with fancy indexing
myarr

print '---'

myarr[:,[0,1]]
myarr[:,[-1, -2]]

print '---'

myarr[[0,1],[-3,1]] # values returned are those @ [0,-3] & [1,1]
# compare to
myarr[[0,1]][:,[-3,1]] 
# and see also
myarr[np.ix_([0,1],[-3,1])]


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[4, 5, 6, 7],
       [0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[99, 99, 99, 99],
       [99, 99, 99, 99]])

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

array([[99, 99, 99, 99],
       [99, 99, 99, 99],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

---


array([[99, 99],
       [99, 99],
       [ 8,  9],
       [12, 13],
       [16, 17],
       [20, 21],
       [24, 25],
       [28, 29]])

array([[99, 99],
       [99, 99],
       [11, 10],
       [15, 14],
       [19, 18],
       [23, 22],
       [27, 26],
       [31, 30]])

---


array([99, 99])

array([[99, 99],
       [99, 99]])

array([[99, 99],
       [99, 99]])

In [10]:
# take, put

n = 1000
arr = np.arange(n)
idx = np.random.permutation(n)[:500]
arr[idx][:5]
arr.take(idx)[:5]

%timeit arr[idx]
%timeit arr.take(idx)

print '-----'
%timeit arr[idx] = 99
%timeit arr.put(idx, -1)

# performance is almost similar in all 4 cases


array([108, 590, 808, 639, 987])

array([108, 590, 808, 639, 987])

The slowest run took 5.51 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.08 µs per loop
The slowest run took 15.42 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.17 µs per loop
-----
The slowest run took 4.81 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.49 µs per loop
100000 loops, best of 3: 6.99 µs per loop
