## Task
Explore searching arrays in NumPy

## Notebook Summary
* argmax, argmin
* where, argwhere
* nonzero, count_nonzero
* searchsorted
* extract

## References
* *Python for Data Analysis*, Wes McKinney, O'Reilly, 2012
* *Numerical Python*, Robert Johansson, APress, 2015
* *Python Data Science Handbook*, Jake VanderPlas, O'Reilly, 2016


In [1]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import platform
print 'python.version = ', platform.python_version()
import IPython
print 'ipython.version =', IPython.version_info

import numpy as np
print 'numpy.version = ', np.__version__


python.version =  2.7.10
ipython.version = (5, 1, 0, '')
numpy.version =  1.11.3


In [3]:
# where

myarr = np.round(np.random.randn(4,4),2)
myarr

np.where(myarr < 0, 0, 99) # replace -ve values with 0 and non-neg values with 99
np.where(myarr < 0, 0, myarr) # replace -ve values with 0; leave non-neg values as-is

print '---'

# select
np.select([myarr<0, myarr>= 0], [0, myarr]) # same as using where above
np.select([myarr<0, myarr<1, myarr>= 1], [0, 1, myarr]) # same as using where above

print '---'

# choose 
np.choose([0,0,1,1,2,2], [11,12,13])

# nonzero
np.nonzero(myarr)
myarr[np.nonzero(np.zeros_like(myarr))]
myarr[np.nonzero(np.ones_like(myarr))]


array([[ 0.34, -2.07,  0.27,  2.4 ],
       [-0.05,  0.97, -0.28, -0.77],
       [-0.48,  1.  , -0.32,  0.13],
       [-0.23, -0.26,  0.42, -0.71]])

array([[99,  0, 99, 99],
       [ 0, 99,  0,  0],
       [ 0, 99,  0, 99],
       [ 0,  0, 99,  0]])

array([[ 0.34,  0.  ,  0.27,  2.4 ],
       [ 0.  ,  0.97,  0.  ,  0.  ],
       [ 0.  ,  1.  ,  0.  ,  0.13],
       [ 0.  ,  0.  ,  0.42,  0.  ]])

---


array([[ 0.34,  0.  ,  0.27,  2.4 ],
       [ 0.  ,  0.97,  0.  ,  0.  ],
       [ 0.  ,  1.  ,  0.  ,  0.13],
       [ 0.  ,  0.  ,  0.42,  0.  ]])

array([[ 1. ,  0. ,  1. ,  2.4],
       [ 0. ,  1. ,  0. ,  0. ],
       [ 0. ,  1. ,  0. ,  1. ],
       [ 0. ,  0. ,  1. ,  0. ]])

---


array([11, 11, 12, 12, 13, 13])

(array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]),
 array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]))

array([], dtype=float64)

array([ 0.34, -2.07,  0.27,  2.4 , -0.05,  0.97, -0.28, -0.77, -0.48,
        1.  , -0.32,  0.13, -0.23, -0.26,  0.42, -0.71])

In [2]:
# searchsorted - find indices where new items must be inserted in a sorted array to maintain sorted-ness

arr = np.arange(10)
arr

arr.searchsorted(0) # return index to left of existing 0
arr.searchsorted(10)
arr.searchsorted([1,2,1,2])


# Bin data using searchsorted
values = np.round(np.random.randn(50),2)
values
values.min(), values.max()

bins = np.array([-5,-1,0,1,5])

bins.searchsorted(values)

# compare with np.digitize
np.digitize(values, bins)

# verify both methods generate the same result
np.array_equal(bins.searchsorted(values), np.digitize(values, bins))


array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

0

10

array([1, 2, 1, 2])

array([-0.07,  0.52,  0.51, -0.53,  1.21, -1.53,  1.45,  0.97, -0.24,
       -0.39,  0.4 ,  0.71, -0.88, -0.65, -0.05,  1.57,  1.14,  0.23,
        1.63, -0.73, -1.32, -0.33, -1.5 ,  1.  , -1.17,  1.41,  0.57,
       -1.46, -0.31, -0.02,  1.72,  0.01,  0.28,  0.77,  0.26,  0.5 ,
        0.54,  0.37,  0.48, -0.68,  0.27, -0.26, -0.06, -1.13,  0.11,
        0.7 , -1.08, -1.63, -1.13, -0.69])

(-1.6299999999999999, 1.72)

array([2, 3, 3, 2, 4, 1, 4, 3, 2, 2, 3, 3, 2, 2, 2, 4, 4, 3, 4, 2, 1, 2, 1,
       3, 1, 4, 3, 1, 2, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 1, 3, 3,
       1, 1, 1, 2])

array([2, 3, 3, 2, 4, 1, 4, 3, 2, 2, 3, 3, 2, 2, 2, 4, 4, 3, 4, 2, 1, 2, 1,
       4, 1, 4, 3, 1, 2, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 1, 3, 3,
       1, 1, 1, 2])

False