# NumPy Introduction

In [1]:
import numpy as np

# Creating Arrays

## array()

In [2]:
# array() accepts any sequence-like object

np.array([17, 9, 3, -5, -1])

array([17,  9,  3, -5, -1])

## arange()

In [3]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
# arange(start, stop) returns a numpy array

np.arange(5,11)

array([ 5,  6,  7,  8,  9, 10])

In [5]:
# arange(start, stop, step_size) returns a numpy array

np.arange(5,11,2)

array([5, 7, 9])

## Random numbers

### rand()

In [6]:
# Returns a random number between 0 and 1 (exclusive).

np.random.rand()

0.31511483633968373

In [7]:
# Optionally, give a shape.

np.random.rand(5,5)

array([[0.55357718, 0.35207896, 0.94546438, 0.1038947 , 0.52505351],
       [0.74428923, 0.35827487, 0.25531908, 0.68505037, 0.56475476],
       [0.20151129, 0.81622112, 0.91101268, 0.70237952, 0.29614193],
       [0.92253565, 0.93307836, 0.2662594 , 0.15805657, 0.84235039],
       [0.61095776, 0.57409032, 0.2984173 , 0.3523244 , 0.8406534 ]])

### randint()

In [8]:
# Returns a random integer between start and stop (exclusive).
# randint(start, exclusive stop, (shape)) 

np.random.randint(1, 101)

20

In [9]:
# Optionally, give a shape (as tuple).
    
np.random.randint(1, 101, (5,5))

array([[28, 84, 19, 46, 88],
       [94, 60, 95, 57, 17],
       [11, 25, 62, 43, 41],
       [12, 19, 98, 94,  5],
       [37, 83, 63, 33, 74]])

### uniform()

In [10]:
# Returns a random decimal number uniformly between start and stop (exclusive).
# uniform(start, exclusive stop, (shape)) 

np.random.uniform(1,10)

3.567066929796134

In [11]:
# Optionally, give a shape (as tuple).

np.random.uniform(1,10, (5,5))

array([[5.75748352, 4.22320336, 3.21387771, 6.61041958, 1.6721569 ],
       [3.75827812, 4.7103626 , 2.08907535, 5.6796849 , 4.73523768],
       [4.14154512, 9.79130929, 4.73888362, 8.81028252, 6.35240947],
       [4.20261175, 8.47697532, 6.16863552, 2.01061578, 6.41714991],
       [2.6836976 , 1.12979343, 2.05945192, 1.18596816, 1.91565495]])

### randn()

In [13]:
# Returns a random number from a normal distribution (approx. -3 to 3; 68-96-99.7 Rule).

np.random.randn()

-0.01742388663201233

In [14]:
# Optionally, give a shape.

np.random.randn(5,5)

array([[-0.5763517 , -1.28982403, -1.43558955,  1.53212456,  0.39517139],
       [-0.54843039, -0.53875795, -1.28718008, -0.30856232, -0.25192088],
       [ 0.59947606,  0.5730308 ,  0.31330857, -1.1172102 , -1.60146226],
       [-1.58988323, -0.94910372,  0.75897641,  0.62425324, -0.95076654],
       [ 0.64742752,  0.27555859, -1.01478917, -0.56835409,  0.05184789]])

## All of the elements in an array must be the same type.

In [15]:
np.array([17, 9, 3, -5, -1])

array([17,  9,  3, -5, -1])

## Multidimensional array (Matrix)

In [16]:
# Nested sequences of equal-length lists.

np.array([[1,2,3,4,5], [6,7,8,9,19], [11,12,13,14,15]])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 19],
       [11, 12, 13, 14, 15]])

### reshape()

In [17]:
np.arange(1, 16)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [18]:
# reshape(n_rows, n_columns) changes the shape of a numpy array.

X = np.arange(1, 16).reshape(3,5)
X

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

## Get the shape of an array

In [19]:
# Shape is returned as a tuple (# rows, # columns).

X.shape

(3, 5)

## Get the data type of an array

In [21]:
X.dtype

dtype('int32')

# Arithmetic with NumPy Arrays
### Element-wise operations

In [22]:
# A Python List

a_list = [1,2,3,4,5]

In [23]:
# List
a_list*2

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

In [24]:
# A NumPy array

arr = np.array([1,2,3,4,5])

In [25]:
# Array
arr*2

array([ 2,  4,  6,  8, 10])

In [26]:
arr**2

array([ 1,  4,  9, 16, 25], dtype=int32)

In [27]:
arr + 100

array([101, 102, 103, 104, 105])

In [28]:
arr

array([1, 2, 3, 4, 5])

In [29]:
arr2 = np.arange(6,11)
arr2

array([ 6,  7,  8,  9, 10])

In [30]:
# Element-wise addition 

arr + arr2

array([ 7,  9, 11, 13, 15])

In [31]:
# Elment-wise multiplication 

arr*arr2

array([ 6, 14, 24, 36, 50])

# Useful Array Methods

In [32]:
arr = np.array([17, 9, 3, -5, -1])

### max()

In [33]:
arr.max()

17

### min()

In [34]:
arr.min()

-5

### mean()

In [35]:
arr.mean()

4.6

### std()

In [36]:
arr.std()

7.735631842325487

### sort()

In [37]:
# Returns a copy of the array.

sorted(arr)

[-5, -1, 3, 9, 17]

In [38]:
arr

array([17,  9,  3, -5, -1])

In [39]:
# Changes/sorts the array in-place.

arr.sort()

In [40]:
arr

array([-5, -1,  3,  9, 17])

## Array Axis

In [41]:
# Multidimensional array

X = np.arange(1, 16).reshape(3,5)
X

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [42]:
# Returns the mean of the full array.

X.mean()

8.0

In [43]:
# Sum all elements in the array

X.sum()

120

## axix=0

In [44]:
# Returns the mean of each column (row-wise). 

X.mean(axis=0)

array([ 6.,  7.,  8.,  9., 10.])

In [45]:
# Sum the columns (row-wise)

X.sum(axis=0)

array([18, 21, 24, 27, 30])

## axis=1

In [46]:
# Returns the mean of each row (column-wise) 

X.mean(axis=1)

array([ 3.,  8., 13.])

# Array Indexing and Slicing

In [47]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [48]:
arr[3]

3

In [49]:
arr[3:8]

array([3, 4, 5, 6, 7])

In [50]:
arr[:-1]

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [51]:
arr[-1]

9

In [52]:
# Returns a reversed copy of the array

arr[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

## 2d Array (Matrix) indexing

In [53]:
arr_2d = np.arange(12).reshape(4,3)
arr_2d

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

### Select a row

In [54]:
# Returns a row (a sample)

arr_2d[0]          

array([0, 1, 2])

### Select a column

In [55]:
# Returns a given column (a single feature)

arr_2d[:, 0]  

array([0, 3, 6, 9])

## Boolean (conditional) selection

In [56]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [57]:
arr > 5      # boolean array

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [58]:
arr[arr>5]      # you will use this A LOT in pandas!

array([ 6,  7,  8,  9, 10])

### Use a tilde to invert a condition

In [59]:
arr[~(arr>5)]

array([1, 2, 3, 4, 5])

# Methods for Boolean Arrays

### sum()

In [60]:
(arr>5).sum()

5

### any()

In [61]:
(arr>5).any()

True

### all()

In [62]:
(arr>5).all()

False

# Universal functions

In [63]:
arr = np.arange(10, -1, -1)
arr

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

## percentile()

In [64]:
np.percentile(arr, [25, 75])   # interquartile range

array([2.5, 7.5])

## min()

In [65]:
np.min(arr)

0

## argmin()

In [66]:
# index position of the smallest value

np.argmin(arr)

10

## max()

In [67]:
np.max(arr)

10

## argmax()

In [68]:
# index position of the largest value

np.argmax(arr)

0

## median()

In [69]:
arr

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [70]:
np.median(arr)

5.0

## sqrt()

In [71]:
np.sqrt(arr)

array([3.16227766, 3.        , 2.82842712, 2.64575131, 2.44948974,
       2.23606798, 2.        , 1.73205081, 1.41421356, 1.        ,
       0.        ])

## argsort()

In [72]:
an_arr = np.array([12, 3, 2, 8])

In [73]:
# index position of the smallest to the largest values

np.argsort(an_arr)

array([2, 1, 3, 0], dtype=int64)

## maximum()

In [74]:
x = [5, 10, 15, 20]
y = [2, 4, 20, 40]

In [75]:
# Returns the element-wise maximum of the elements

np.maximum(x,y)

array([ 5, 10, 20, 40])

## minimum()

In [76]:
# Returns the element-wise minimum of the elements

np.minimum(x,y)

array([ 2,  4, 15, 20])

# Conditional Logic 
## where()

In [77]:
arr = np.array([-10, -15, -20, 10, 15, 20])

In [78]:
# A vectorized version of the ternary expression x if condition else y

np.where(arr > 0, 1, -1)

array([-1, -1, -1,  1,  1,  1])

In [79]:
np.where(arr > 0, 1, arr)

array([-10, -15, -20,   1,   1,   1])