In [1]:
import numpy as np

In [5]:
'''The easiest way to create an array is to use the array function'''
data1 = [6, 7.5, 8, 9, 1]
arr1 = np.array(data1)
arr1

array([ 6. ,  7.5,  8. ,  9. ,  1. ])

In [8]:
'''Nested sequences, like a list of equal length lists, 
will be converted into a multidimensional array'''

data2 = [[1,2,3,4], [5,6,7,8]]

arr2= np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [9]:
arr2.ndim

2

In [10]:
arr2.shape

(2, 4)

In [11]:
arr1.dtype

dtype('float64')

In [12]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [14]:
np.zeros((2,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [15]:
np.empty((3,3))

array([[  6.93491756e-310,   6.93491756e-310,   0.00000000e+000],
       [  0.00000000e+000,   0.00000000e+000,   0.00000000e+000],
       [  8.70018275e-313,   2.02566915e-322,   3.95252517e-322]])

In [17]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
arr2.dtype

dtype('int64')

In [19]:
"""can change data types into other dtypes"""
arr2.astype('float64')

array([[ 1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.]])

## Operations between arrays and scalars 

1. Arrays are important because they allow you to express batch operations on data without any FOR loops  This is called 'vectorization'

In [22]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [23]:
arr2*arr2

array([[ 1,  4,  9, 16],
       [25, 36, 49, 64]])

In [24]:
arr2 - arr2

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [25]:
1 / arr2

array([[ 1.        ,  0.5       ,  0.33333333,  0.25      ],
       [ 0.2       ,  0.16666667,  0.14285714,  0.125     ]])

In [27]:
arr2 ** 0.5

array([[ 1.        ,  1.41421356,  1.73205081,  2.        ],
       [ 2.23606798,  2.44948974,  2.64575131,  2.82842712]])

## Basic Indexing and Slicing

In [29]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
arr[5]

5

In [31]:
arr[5:8]

array([5, 6, 7])

In [32]:
arr[5:8] = 12

In [33]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [35]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [42]:
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    64, 12345,    64,     8,     9])

In [40]:
arr_slice[:] = 64

In [41]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

## 2d Arrays

In [44]:
arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [45]:
arr2d.ndim

2

In [59]:
# a slice across axis 0 - the first axis
arr2d[0:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [60]:
# slicing across the 0 and 1 axis
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [47]:
arr2d[2]

array([7, 8, 9])

In [48]:
arr2d[0][2]

3

In [50]:
arr3d = np.array([[[1,2,3], 
                   [4,5,6]], 
                  [[7,8,9], 
                  [10,11,12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [53]:
arr3d.ndim, arr3d.shape

(3, (2, 2, 3))

In [52]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

## Boolean Indexing

In [65]:
names = np.array(['Bob', 'joe', 'will', 'bob', 
                 'will', 'joe', 'joe'])
data = np.random.randn(7,4)

In [66]:
names

array(['Bob', 'joe', 'will', 'bob', 'will', 'joe', 'joe'], 
      dtype='<U4')

In [67]:
data, data.ndim

(array([[ 0.95966463,  0.10460413,  1.18129797, -1.62281603],
        [-0.3348651 , -0.02485407, -0.52169052,  0.4284356 ],
        [ 1.33714349, -0.59770291,  1.06138432, -0.39030719],
        [ 1.39365624,  0.66506695,  0.20732505,  0.30258038],
        [-1.22832671,  1.39889354, -0.61030678,  0.42173928],
        [-0.23496043,  0.29746641, -0.36625832,  0.6190873 ],
        [ 1.44838575,  0.94288428, -0.42198293,  1.14680355]]), 2)

In [68]:
names == 'Bob'

array([ True, False, False, False, False, False, False], dtype=bool)

In [69]:
data[names == 'Bob']

array([[ 0.95966463,  0.10460413,  1.18129797, -1.62281603]])

In [72]:
mask  = (names == 'Bob') | (names == 'will')

mask

array([ True, False,  True, False,  True, False, False], dtype=bool)

In [73]:
data[data < 0] = 0

In [75]:
data[names !='joe'] = 7
data

array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  0.        ,  0.        ,  0.4284356 ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  0.29746641,  0.        ,  0.6190873 ],
       [ 1.44838575,  0.94288428,  0.        ,  1.14680355]])

## Fancy Indexing

In [81]:
arr = np.empty((8,4))

for i in range(8):
    arr[i] = i
    
arr

array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

In [83]:
'''To select a subset of rows in a particular order, 
simply pass a list of ndarray integers specifying the desired order'''

arr[[4,3,0,6]]

array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

In [86]:
arr = np.arange(32).reshape((8,4))
arr, arr.ndim

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23],
        [24, 25, 26, 27],
        [28, 29, 30, 31]]), 2)

In [87]:
'''indexing by 0 axis then the 1 axis'''
arr[[1,5,7,2], [0,3,1,2]]

array([ 4, 23, 29, 10])

In [88]:
arr[[1,5,7,2]][:, [0,3,1,2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

## Transposing Arrays and Swapping Axes

In [90]:
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [91]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])