In [5]:
import numpy as np
import numpy.random as rn

In [9]:
grades = [[79, 95, 60],
 [95, 60, 61],
 [99, 67, 84],
 [76, 76, 97],
 [91, 84, 98],
 [70, 69, 96],
 [88, 65, 76],
 [67, 73, 80],
 [82, 89, 61],
 [94, 67, 88]]

In [10]:
# How to get final exam grade of student 0? 
grades[0][2]
# How to get grades of student 2?
grades[2]
#How to get grades of all students in midterm 1?
#How to get midterm grades of the first three students (or all female students, or those who failed final)?
#How to get mean grade of each exam?
#How to get (weighted) average exam grade for each student?


[99, 67, 84]

In [11]:
gArray = np.array(grades)
gArray

array([[79, 95, 60],
       [95, 60, 61],
       [99, 67, 84],
       [76, 76, 97],
       [91, 84, 98],
       [70, 69, 96],
       [88, 65, 76],
       [67, 73, 80],
       [82, 89, 61],
       [94, 67, 88]])

In [22]:
gArray[0,2]



60

In [12]:
gArray[2,:]



array([99, 67, 84])

In [9]:
gArray[:, 0]



array([79, 95, 99, 76, 91, 70, 88, 67, 82, 94])

In [23]:
gArray[:3, :2]


array([[79, 95],
       [95, 60],
       [99, 67]])

## ndarray is used for storage of homogeneous data
i.e., all elements must be the same type
Every array must have a shape
And a dtype
Supports convenient slicing, indexing and efficient vectorized computation
Avoid for loops, and much more efficient

In [11]:
gArray.ndim

2

In [12]:
gArray.shape

(10, 3)

In [13]:
gArray.dtype

dtype('int32')

In [14]:
np.array([[0,1,2],[2,3,4]])
np.zeros((2,3))
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [15]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [16]:
rn.randint(0, 10, (3,3))

array([[9, 9, 1],
       [6, 8, 9],
       [9, 0, 8]])

In [17]:
gArray.astype(np.float64)

array([[79., 95., 60.],
       [95., 60., 61.],
       [99., 67., 84.],
       [76., 76., 97.],
       [91., 84., 98.],
       [70., 69., 96.],
       [88., 65., 76.],
       [67., 73., 80.],
       [82., 89., 61.],
       [94., 67., 88.]])

In [18]:
num_string = np.array(['1.0', '2.05', '3'])
num_string.astype(np.float)

array([1.  , 2.05, 3.  ])

## Array operations
Between arrays and scalars
Between equal-sized arrays: elementwise operation

In [25]:
arr = np.array([[0,1,2],[3,4,5]])

In [26]:
arr * 2

array([[ 0,  2,  4],
       [ 6,  8, 10]])

In [27]:
arr ** 2

array([[ 0,  1,  4],
       [ 9, 16, 25]], dtype=int32)

In [28]:
2 ** arr

array([[ 1,  2,  4],
       [ 8, 16, 32]], dtype=int32)

In [29]:
arr * arr

array([[ 0,  1,  4],
       [ 9, 16, 25]])

In [30]:
arr / (arr+1)

array([[0.        , 0.5       , 0.66666667],
       [0.75      , 0.8       , 0.83333333]])

## Speed difference between for loop and vectorized computation

In [2]:
a = np.random.rand(1000000,1)
%timeit a**2
%timeit [a[i]**2 for i in range(1000000)]

NameError: name 'np' is not defined

In [3]:
from functools import reduce
def mySum(inputList):
    s = 0
    for i in range(len(inputList)):
        s += inputList[i]
    return s 
print('profiling mySum')
%timeit mySum(a)
print('profiling np.sum')
%timeit np.sum(a)
print('profiling lambda')
%timeit reduce(lambda x, y: x+y, a)

profiling mySum


NameError: name 'a' is not defined

## Array indexing and slicing
Somewhat similar to python list, but much more flexible
Returns view instead of copy (contrary to python list slicing)

In [34]:
gArray

array([[79, 95, 60],
       [95, 60, 61],
       [99, 67, 84],
       [76, 76, 97],
       [91, 84, 98],
       [70, 69, 96],
       [88, 65, 76],
       [67, 73, 80],
       [82, 89, 61],
       [94, 67, 88]])

In [33]:
gArray[0]

array([79, 95, 60])

In [13]:
gArray[:, 2]

array([60, 61, 84, 97, 98, 96, 76, 80, 61, 88])

In [None]:
gArray[1:3]


In [None]:
gArray[0][2]


In [None]:
gArray[0,2]

In [24]:
gArray[:, [2]]

array([[60],
       [61],
       [84],
       [97],
       [98],
       [96],
       [76],
       [80],
       [61],
       [88]])

In [25]:
gArray[:2, [0, 2]]

array([[79, 60],
       [95, 61]])

### Using two lists of indices to get sub-arrays

In [None]:
try: 
    gArray[[0, 2], [0, 1, 2]]   # both indices are lists
except IndexError as err:
    print(err)

In [26]:
gArray[[0, 2], [0, 2]] # this works, but not what you may have wanted.

array([79, 84])

#### Correct way

In [None]:
gArray[[0,2]][:,[0,2]] # opt 1

In [None]:
gArray[np.ix_([0, 2], [0, 2])] # opt 2

### Array slices are views

In [27]:
a = np.array([1, 2, 3])
b = a[:2]
b[0]=10
a


array([10,  2,  3])

In [29]:
arr2 = gArray.copy()
arr2 is gArray



False

In [30]:
arr2[1,:]=100
gArray[1,:]


array([95, 60, 61])

### Boolean indexing as slices

In [None]:
female = [ True, False,  True,  True, False,  True, False, False, False, False]
gArray[female, :]

In [None]:
cat = np.array(['test', 'test', 'practice'])
gArray[:, cat=='test']

In [None]:
gArray[gArray < 70] = 70

In [20]:
gArray

array([[79, 95, 60],
       [95, 60, 61],
       [99, 67, 84],
       [76, 76, 97],
       [91, 84, 98],
       [70, 69, 96],
       [88, 65, 76],
       [67, 73, 80],
       [82, 89, 61],
       [94, 67, 88]])

In [None]:
gArray[gArray[:, 2]<70,:] = 70

In [None]:
a = np.arange(4)
a[[1, 3]]

In [None]:
a[[1,3]] = [5, 6]
a

In [None]:
b = a[[1, 3]]
b

In [None]:
b[0] = 20
a

### Reshaping and transposing!

In [None]:
np.arange(6).reshape((2,3))

In [None]:
np.arange(6).reshape((2,3), order='F')

In [None]:
np.arange(6).reshape(2,3).T

### Reshape and transpose only changes view!


In [None]:
import numpy as np
a = np.arange(6)
b = a.reshape((2,3))
b


In [None]:
b[0,0]=10
a

In [None]:
c = b.transpose()
c[0,0]=6
b

### sum, mean, std, along one axis or in the whole array

In [None]:
gArray.sum(axis=0)

In [None]:
gArray.sum(0) # same as above

In [None]:
gArray.sum(1) # same as gArray.sum(axis=1)

In [None]:
gArray.sum(None) # same as gArray.sum(axis = None)

In [None]:
gArray.sum() # same as above

In [None]:
gArray.mean(axis=0)

In [None]:
gArray.mean(axis=1)

In [None]:
gArray.std(axis=0, ddof=0) # std without correction (population std)

In [None]:
gArray.std(axis=0, ddof=1) # std with correction (population std)

### max, min, argmax, argmin

In [19]:
gArray.max(0) # highest score of each exam

array([99, 95, 98])

In [None]:
gArray.argmax(axis=0) # who has highest score in each exam?

In [None]:
gArray.min(axis=1)

In [None]:
gArray.argmin() # which element in the whole array is the smallest? (default axis = None.)

In [None]:
gArray

### Sort and argsort

In [None]:
gArray

In [None]:
gCopy = gArray.copy() # make a copy so we don't mess with the orignal data
gCopy.sort(axis = 1) # each row is sorted independently
gCopy

In [None]:
np.sort(gArray, axis = 1) # returns a sorted array and keep the original array unchanged

In [None]:
np.sort(gArray, 0) # each col is sorted independently

In [None]:
np.sort(gArray) # default is actually sorting rows

In [None]:
np.sort(gArray, axis = None) # sort everything, return a single list

In [None]:
gArray.sort(axis=None) # good that this does not work; makes no sense to sort this way ... 

In [None]:
gArray.argsort(axis=0) # who has the highest, second highest, ..., score in each exam

In [None]:
gArray

In [None]:
gArray.argsort(axis=1)

In [None]:
gArray.argsort()

In [None]:
gArray.argsort(None)

### any, all, where, nonzero

In [15]:
gArray < 65

array([[False, False,  True],
       [False,  True,  True],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False,  True],
       [False, False, False]])

In [16]:
np.any(gArray < 65, axis=1)

array([ True,  True, False, False, False, False, False, False,  True,
       False])

In [17]:
np.where(np.any(gArray < 65, axis=1))

(array([0, 1, 8], dtype=int64),)

In [None]:
np.any(gArray < 65, axis = 1).nonzero()

In [None]:
gArray > 75

In [18]:
np.all(gArray > 75, axis = 1)

array([False, False, False,  True,  True, False, False, False, False,
       False])

In [None]:
np.all(gArray > 75, axis = 1).nonzero()

In [None]:
(gArray > 75).nonzero() # returns row and col indices of True in two lists

## Matrix

### dot product between two vectors

In [None]:
a = b = np.arange(5)
a.dot(b)


### Weighted average

In [None]:
gArray

In [None]:
gArray.dot([0.3, 0.3, 0.4])

### rescaing each column by a differnet scaling factor

In [None]:
import numpy as np
scaling = [1.1, 1.05, 1.03]
np.diag(scaling)
gArray.dot(np.diag(scaling))


### rescaling by max

In [None]:
maxInExam = gArray.max(axis=0) # max of each column
gArray.dot(np.diag(100/maxInExam)).round() # devide by the max, times 100, round to integer