In [1]:
import numpy as np

# Initialize the numpy array

In [2]:
# create array from list
arr = np.array([1, 2, 3])
print('arr:{0}, type:{1}'.format(arr, type(arr)))

arr:[1 2 3], type:<class 'numpy.ndarray'>


In [3]:
# create zero array 
arr = np.zeros(7)
print('arr:{0}, type:{1}'.format(arr, type(arr)))

arr:[ 0.  0.  0.  0.  0.  0.  0.], type:<class 'numpy.ndarray'>


In [9]:
# create 2D one array 
arr = np.zeros((3,4))+1
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]], type:<class 'numpy.ndarray'>, dtype:float64


In [10]:
# we can force the element type to be integer (to save memory space)
arr = np.zeros((3,4)).astype(int)+1
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]], type:<class 'numpy.ndarray'>, dtype:int64


In [14]:
# create 2D one array, by default the entry is float64 (for mac 64-bit) 
arr = np.ones((3,4))
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]], type:<class 'numpy.ndarray'>, dtype:float64


In [11]:
# create an array containing a sequence of integers from 0 to 9
arr = np.arange(10)
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:[0 1 2 3 4 5 6 7 8 9], type:<class 'numpy.ndarray'>, dtype:int64


In [13]:
# create an array containing a sequence of integers from 10 to 20, increment by 0.5
# I also force the entry to be of type float16
arr = np.arange(10, 20, 0.5, dtype=np.float16)
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:[ 10.   10.5  11.   11.5  12.   12.5  13.   13.5  14.   14.5  15.   15.5
  16.   16.5  17.   17.5  18.   18.5  19.   19.5], type:<class 'numpy.ndarray'>, dtype:float16


# Selecting rows and columns

In [15]:
alist = [[1, 2], [3, 4]]
arr2d = np.array(alist)
print('arr:{0}, type:{1}, dtype:{2}'.format(arr2d, type(arr2d), arr2d.dtype))

arr:[[1 2]
 [3 4]], type:<class 'numpy.ndarray'>, dtype:int64


In [16]:
arr2d[:,0]

array([1, 3])

In [17]:
arr2d[:,1]

array([2, 4])

In [18]:
arr2d[1,0]

3

In [19]:
# 3d array
alist = [[[1, 2], [3, 4]],[[5, 6], [7, 8]]]
arr3d = np.array(alist)
print('arr:{0}, type:{1}, dtype:{2}'.format(arr3d, type(arr3d), arr3d.dtype))

arr:[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]], type:<class 'numpy.ndarray'>, dtype:int64


In [21]:
arr3d[:,:,1]

array([[2, 4],
       [6, 8]])

# Conditional selection

For example, when we want to get all the entries whose value > 5.

### 1D case

In [48]:
# initialize the array
arr = np.arange(0,20)
print(arr)

# produce index where the entries value > 5
idx = arr > 5
print('index: {0}'.format(idx))

# select the ones complying with the condition
arr[idx]
print('selected part: {0}'.format(arr[idx]))

# replace the selected entries with new values
arr[idx] = arr[idx]+100
print('selected part: {0}'.format(arr))

# replace the selected entries with new values
arr[idx] = -10
print('selected part: {0}'.format(arr))

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
index: [False False False False False False  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
selected part: [ 6  7  8  9 10 11 12 13 14 15 16 17 18 19]
selected part: [  0   1   2   3   4   5 106 107 108 109 110 111 112 113 114 115 116 117
 118 119]
selected part: [  0   1   2   3   4   5 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
 -10 -10]


### 2D case

In [None]:
# initialize a 2d array
arr = np.arange(1,10)
arr2d = np.reshape(arr, (3,3))
print('arr2d: {0}'.format(arr2d))

# produce index where the entries value > 5
idx = arr2d > 5
print('index: {0}'.format(idx))

# select the ones complying with the condition
print('selected part: {0}'.format(arr2d[idx]))

### Using np.where()

In [58]:
# another way to select is to use np.where()
# the output is selected reduced-size index, but not Boolean.
idx2 = np.where(arr2d > 5)
print('index: {0}'.format(idx2))

# select the ones complying with the condition
print('selected part: {0}'.format(arr2d[idx2]))

arr2d: [[1 2 3]
 [4 5 6]
 [7 8 9]]
index: [[False False False]
 [False False  True]
 [ True  True  True]]
selected part: [6 7 8 9]
index: (array([1, 2, 2, 2]), array([2, 0, 1, 2]))
selected part: [6 7 8 9]


# Type of elements in numpy array

The differences between list and numpy array are:
* List can contain entries of different types, for example, alist = [1, 'b', [1,2], {'a':True, 'b':False}]
    * List only provides iterators, but cannot broadcast
* Array can only contain the same type, for instance, arr = np.array([1, 2, 3]), arr = np.array(['a', 'b', 'c'])
    * Array operation is much faster than list, it supports "broadcasting" http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html

In [25]:
# The entries in a list can be of different types
alist = [1, 'b', [1,2], {'a':True, 'b':False}]
print('arr:{0}, type:{1}'.format(alist, type(alist)))

arr:[1, 'b', [1, 2], {'b': False, 'a': True}], type:<class 'list'>


In [29]:
# The entries are all char
arr = np.array(['a', 'b', 'c'])
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:['a' 'b' 'c'], type:<class 'numpy.ndarray'>, dtype:<U1


In [30]:
# The entries are all list
arr = np.array([ ['a', 'b', 'c'], [1,2,3]])
print('arr:{0}, type:{1}, dtype:{2}'.format(arr, type(arr), arr.dtype))

arr:[['a' 'b' 'c']
 ['1' '2' '3']], type:<class 'numpy.ndarray'>, dtype:<U1


In [34]:
# np array can broadcast
np.array([1, 2, 3, 4])*3

array([ 3,  6,  9, 12])

In [35]:
# but not for list
alist = [1, 2, 3, 4]
alist*3

[1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]

# Generating random numbers

In [59]:
import numpy.random as rand

In [61]:
# random number from Gaussian distribution
arr2d = rand.randn(3,4)
print('arr2d: {0}'.format(arr2d))

arr2d: [[ 0.04061861 -0.15869304  0.88563816  0.05971316]
 [-2.04072172 -0.14551835  0.10813258  1.79215372]
 [ 0.60823275  0.60746081 -1.03855834 -0.22705447]]


In [65]:
# random intergers from discrete uniform distribution
arr2d = rand.randint(10,15, (3,4))
print('arr2d: {0}'.format(arr2d))

arr2d: [[13 14 14 13]
 [12 10 13 13]
 [12 13 12 12]]
