Numeric Computing - NumPy

In [1]:
import sys
import numpy as np

Basic NumPy arrays

In [2]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [3]:
a = np.array([1,2,3,4])

In [4]:
b = np.array([0, .5, 1, 1,5, 2])

In [5]:
a[0], a[1]    # accessing index 0, and 1 of array a 

(1, 2)

In [6]:
a[0:]

array([1, 2, 3, 4])

In [7]:
a[1:3]  # index 1 (inclusive) - 3 (exclusive)

array([2, 3])

In [8]:
a[1:-1]   # index 1 (inclusive) - last element (exclusive) -> second to last element

array([2, 3])

In [10]:
a[::2]  # first colon indicates  start from beginning , the second colon indicates go until the end
        # the 2 indicates the step / stride value.

array([1, 3])

In [11]:
b

array([0. , 0.5, 1. , 1. , 5. , 2. ])

In [12]:
b[0], b[1], b[2]

(0.0, 0.5, 1.0)

In [13]:
b[[0, 2, -1]]  # first, third , and last element

array([0., 1., 2.])

Array Types

In [14]:
a

array([1, 2, 3, 4])

In [15]:
a.dtype

dtype('int64')

In [16]:
b

array([0. , 0.5, 1. , 1. , 5. , 2. ])

In [18]:
b.dtype

dtype('float64')

In [20]:
np.array([1, 2, 3, 4], dtype=float) # rep each int as float instead 

array([1., 2., 3., 4.])

In [22]:
np.array([1, 2, 3, 4], dtype=np.int8) # convert to signed 8 bit

array([1, 2, 3, 4], dtype=int8)

In [23]:
c = np.array(['a', 'b', 'c'])

In [24]:
c.dtype  # returns U1 which is unicode string with a fixed length of 1 character

dtype('<U1')

Dimensions and Shapes

In [25]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

In [26]:
A.shape   # 2 rows, 3 columns

(2, 3)

In [27]:
A.ndim   # 2  -> 2D array

2

In [28]:
A.size  # total elements

6

In [29]:
# 3 Dimensional Array -> shape of 2, 2, 3
# 2 layers (2D arrays), each containing 2 rows and 3 columns
B = np.array([
    [
        [12, 11, 10],
        [9, 8, 7],
    ],
    [
        [6, 5, 4],
        [3, 2, 1]
    ]
])

In [30]:
B

array([[[12, 11, 10],
        [ 9,  8,  7]],

       [[ 6,  5,  4],
        [ 3,  2,  1]]])

In [31]:
B.shape   # 2 layers (2D arrays), each containing 2 rows and 3 columns

(2, 2, 3)

In [32]:
B.ndim   # 3-Dimensional array

3

In [33]:
B.size   # 12 total elements

12

Indexing and Slicing of Matrices

In [36]:
# Square matrix
A = np.array([
#.   0. 1. 2
    [1, 2, 3], # 0
    [4, 5, 6], # 1
    [7, 8, 9]  # 2
])

In [37]:
A[1]  # access the second row 

array([4, 5, 6])

In [38]:
A[1][0] # access the second row, first column

4

In [None]:
# A[d1, d2, d3, d4]

In [39]:
A[1, 0]  # access the second row, first column

4

In [40]:
A[0:2]  # access first A[0] and second row A[1] excluding last row A[2]

array([[1, 2, 3],
       [4, 5, 6]])

In [41]:
A[:, :2] # all rows and the first two columns

array([[1, 2],
       [4, 5],
       [7, 8]])

In [42]:
A[:2, :2] #  first two rows and the first two columns, the top-left portion of the matrix

array([[1, 2],
       [4, 5]])

In [43]:
A[:2, 2:] # first two rows and the columns starting from the third column (index 2) and going to the end

array([[3],
       [6]])

In [44]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [45]:
# replace the second row of the matrix A with a new array [10, 10, 10]
A[1] = np.array([10, 10, 10])

In [46]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [ 7,  8,  9]])

In [47]:
# replace the entire third row of the matrix A with 99
A[2] = 99

In [48]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [99, 99, 99]])

Summary Statistics

In [49]:
a = np.array([1, 2, 3, 4])

In [50]:
a.sum() # 1 + 2 + 3 + 4 = 10

10

In [51]:
a.mean() # 10 / 4 = 2.5

2.5

In [52]:
# standard deviation 
a.std()

1.118033988749895

In [53]:
a.var() # variance of elements

1.25

In [54]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [55]:
A.sum()

45

In [56]:
A.mean()

5.0

In [57]:
A.std()

2.581988897471611

In [58]:
A.sum(axis=0)  # column-wise sum of the array (1 + 4 + 7 = 12, 2 + 5 + 8 = 15 ....)

array([12, 15, 18])

In [59]:
A.sum(axis=1)

array([ 6, 15, 24])

In [60]:
A.mean(axis=0)

array([4., 5., 6.])

In [61]:
A.mean(axis=1)

array([2., 5., 8.])

In [62]:
A.std(axis=0)

array([2.44948974, 2.44948974, 2.44948974])

In [63]:
A.std(axis=1)

array([0.81649658, 0.81649658, 0.81649658])

Broadcasting and Vectorized Operations

In [68]:
# creates a NumPy array a containing integers from 0 to 3
a = np.arange(4)   # arange() generates a sequence of numbers within a specified range

In [69]:
a

array([0, 1, 2, 3])

In [70]:
a + 10   # add 10 to each element

array([10, 11, 12, 13])

In [71]:
a * 10  # multiply each element by 10

array([ 0, 10, 20, 30])

In [72]:
a

array([0, 1, 2, 3])

In [73]:
a += 100 # add 100 to a ( a = a + 100 )

In [74]:
a

array([100, 101, 102, 103])

In [75]:
l = [0, 1, 2, 3]

In [76]:
[i * 10 for i in l] # iterates over each element i in the list l and multiplies by 10

[0, 10, 20, 30]

In [77]:
a = np.arange(4)

In [78]:
a

array([0, 1, 2, 3])

In [80]:
b = np.array([10, 10, 10, 10])

In [81]:
b

array([10, 10, 10, 10])

In [82]:
a + b   # (10 + 0 = 10, 10 + 1 = 11, 10 + 2 = 12, 10 + 3 = 13)

array([10, 11, 12, 13])

In [83]:
a * b

array([ 0, 10, 20, 30])

Boolean Arrays   ( Masks )

In [84]:
a = np.arange(4)

In [85]:
a

array([0, 1, 2, 3])

In [86]:
a[0], a[-1]

(0, 3)

In [87]:
a[[0, -1]]

array([0, 3])

In [88]:
# boolean mask to select specific elements
a[[True, False, False, True]] 

array([0, 3])

In [89]:
a

array([0, 1, 2, 3])

In [90]:
a >= 2  # only 2, 3 will be true

array([False, False,  True,  True])

In [91]:
a[a>=2]  # 2 and 3 are true

array([2, 3])

In [92]:
a.mean()

1.5

In [93]:
a[a > a.mean()] # values greater than the mean 

array([2, 3])

In [94]:
a[~(a > a.mean())] # ~ negates the boolean values, effectively selecting elements that are not greater than the mean

array([0, 1])

In [95]:
a[(a == 0) | (a == 1)]  # only elements equal to 0 and 1 

array([0, 1])

In [96]:
a[(a <= 2) & (a % 2 == 0)]  # elements less than or equal to 2  and also have a modulus value of 0

array([0, 2])

In [98]:
A = np.random.randint(100, size=(3, 3)) #  3x3, filled w/ integers between 0 (inclusive) and 100 (exclusive).

In [99]:
A

array([[80, 16, 78],
       [19, 25, 51],
       [94, 53, 80]])

In [100]:
A[np.array([
    [True, False, True],
    [False, True, False],
    [True, False, True]
])]

array([80, 78, 25, 94, 80])

In [101]:
A > 30  # bool index for elements greater than 30

array([[ True, False,  True],
       [False, False,  True],
       [ True,  True,  True]])

In [102]:
A[A > 30]  # elements greater than 30

array([80, 78, 51, 94, 53, 80])

Linear Algebra

In [105]:
# 3 x 3 matrix

A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [104]:
# 3 x 2 matrix 

B = np.array([
    [6, 5],
    [4, 3],
    [2, 1]
])

In [106]:
A.dot(B)  # Dot product / matrix multiplication 

# each element result[i, j] is computed as the dot product of the i-th row 
# of matrix A and the j-th column of matrix B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [107]:
# Another way to perform matrix multiplication BETTER WAY

A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [108]:
B.T    # obtain the transpose of the matrix B.

# Get transpose of a matrix by interchanging its rows and columns

array([[6, 4, 2],
       [5, 3, 1]])

In [109]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [111]:
B.T @ A   # matrix multiplication between the transpose of matrix B and matrix A


# Valid operation since the number of columns in the transposed matrix B.T is 
# the same as the number of columns in matrix A

array([[36, 48, 60],
       [24, 33, 42]])

Size of Objects in Memory: Ints & Floats

In [112]:
# An integer in Python is > 24bytes
sys.getsizeof(1)

28

In [113]:
# Longs are even larger
sys.getsizeof(10**100)

72

In [114]:
# Numpy size is much smaller
np.dtype(int).itemsize

8

In [115]:
# Numpy size is much smaller
np.dtype(np.int8).itemsize

1

In [116]:
np.dtype(float).itemsize

8

Size of Objects in Memory: Lists

In [117]:
# A one-element list
sys.getsizeof([1])

64

In [118]:
# An array of one element in numpy
np.array([1]).nbytes

8

Size of Objects in Memory: Performance

In [119]:
l = list(range(100000))

In [120]:
a = np.arange(100000)

In [121]:
%time np.sum(a ** 2)

CPU times: user 102 µs, sys: 153 µs, total: 255 µs
Wall time: 257 µs


333328333350000