### Chapter 4 notes/practice for Python for Data Analysis

In [1]:
import numpy as np

data1 = [1,2,3,4,5,6]

arr1 = np.array(data1)

print(arr1)

[1 2 3 4 5 6]


#### Build some arrays

In [23]:
#build a 10x10 array going from 0 to 99

array100 = np.arange(100).reshape(10,10)

print(array100)

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]]


In [25]:
# build an empty array 3x3 size

emptyCube = np.empty((3,3))
print(emptyCube)

[[ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


#### Playing with ndarray indexing and slicing


In [4]:
arr1[2:4] = 10

print(arr1)

[ 1  2 10 10  5  6]


In [5]:
#slices are views of the original array, so data is not copied. Edits to the slice will also edit the original array

arr2 = arr1[1:3]

arr2[:] = 13

print(arr1)



[ 1 13 13 10  5  6]


In [6]:
#to explicitly copy, use .copy method

arr3 = arr1[1:3].copy()

arr3[:] = 100

print(arr1)

[ 1 13 13 10  5  6]


In [10]:
#indexing multi dim arrays can be accomplished by passing a comma separated list of indices

data2 = (((1,2,3), (4,5,6)), ((7,8,9), (10,11,12)))

multiArray = np.array(data2)

multiArray[0,0,0]

1

In [11]:
#produce the indices to get 8

multiArray[1, 0, 1]

8

In [16]:
#produce the indices to get 1,2,4 and 5

multiArray[0, 0:2,0:2]

array([[1, 2],
       [4, 5]])

#### Playing around w/ boolean indexing

In [10]:
dogs = np.array(['terrier', 'dachshund', 'newfoundland', 'terrier', 'terrier', 'labrador', 'poodle', 'dachshund', 'corgi'])

dataSet = np.random.randint(1, 100, (9,3))

print(dataSet)

[[57 74 68]
 [14 43 35]
 [17 41 36]
 [80 74 98]
 [67 21 81]
 [30 85 61]
 [76 89 23]
 [32 27 25]
 [29 83 17]]


In [11]:
dogs == 'terrier'

array([ True, False, False,  True,  True, False, False, False, False], dtype=bool)

In [13]:
dataSet[~(dogs == 'terrier')]

array([[14, 43, 35],
       [17, 41, 36],
       [30, 85, 61],
       [76, 89, 23],
       [32, 27, 25],
       [29, 83, 17]])

In [21]:
dataSet[dogs == 'corgi', 0]

array([29])

#### Fancy Indexing

In [26]:
#suppose you have a list of sequential numbers in a 2 column array

twoCol = np.arange(20).reshape((10,2))
print(twoCol)

[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]]


In [40]:
#using fancy indexing, reorder so that the odd numbers precede the even numbers in each row

twoCol[:, [1,0]]

array([[ 1,  0],
       [ 3,  2],
       [ 5,  4],
       [ 7,  6],
       [ 9,  8],
       [11, 10],
       [13, 12],
       [15, 14],
       [17, 16],
       [19, 18]])

In [42]:
#using fancy indexing, produce two rows, [19,18] and [1,0]

twoCol[[9,0]][:, [1,0]]

array([[19, 18],
       [ 1,  0]])

#### Transposing arrays 

In [53]:
arr = np.arange(16).reshape((2,8))

arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [54]:
arr.T

array([[ 0,  8],
       [ 1,  9],
       [ 2, 10],
       [ 3, 11],
       [ 4, 12],
       [ 5, 13],
       [ 6, 14],
       [ 7, 15]])

In [55]:
np.dot(arr.T, arr)

array([[ 64,  72,  80,  88,  96, 104, 112, 120],
       [ 72,  82,  92, 102, 112, 122, 132, 142],
       [ 80,  92, 104, 116, 128, 140, 152, 164],
       [ 88, 102, 116, 130, 144, 158, 172, 186],
       [ 96, 112, 128, 144, 160, 176, 192, 208],
       [104, 122, 140, 158, 176, 194, 212, 230],
       [112, 132, 152, 172, 192, 212, 232, 252],
       [120, 142, 164, 186, 208, 230, 252, 274]])

In [68]:
arr = arr.reshape(4,4,1)

arr

array([[[ 0],
        [ 1],
        [ 2],
        [ 3]],

       [[ 4],
        [ 5],
        [ 6],
        [ 7]],

       [[ 8],
        [ 9],
        [10],
        [11]],

       [[12],
        [13],
        [14],
        [15]]])

In [73]:
arr.transpose((1, 2, 0)).reshape(1,16)

array([[ 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15]])

In [71]:
arr.transpose(2,1,0)

array([[[ 0,  4,  8, 12],
        [ 1,  5,  9, 13],
        [ 2,  6, 10, 14],
        [ 3,  7, 11, 15]]])

In [72]:
arr.transpose(1,0,2)

array([[[ 0],
        [ 4],
        [ 8],
        [12]],

       [[ 1],
        [ 5],
        [ 9],
        [13]],

       [[ 2],
        [ 6],
        [10],
        [14]],

       [[ 3],
        [ 7],
        [11],
        [15]]])

#### ufuncs

In [74]:
arr = np.arange(10).reshape(2,5)

arr

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [75]:
np.sqrt(arr)

array([[ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ],
       [ 2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ]])

In [82]:
#ufuncs can operate in-place

floatArr = arr.astype(np.float64)

np.sqrt(floatArr, floatArr)

floatArr


array([[ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ],
       [ 2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ]])

In [85]:
#using conditional func

np.where(floatArr > 1, 1, 0)


array([[0, 0, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### Linear Algebra

In [101]:
#* is not a matrix dot product

x = np.arange(-2,2,1).reshape(1,4)

y = np.arange(3,7,1).reshape(1,4)

print('x:{} \ny:{} \nx*y:{}'.format(x, y, x*y))

x.dot(y.T)

x:[[-2 -1  0  1]] 
y:[[3 4 5 6]] 
x*y:[[-6 -4  0  6]]


array([[-4]])