In [2]:
import numpy as np

## 1. DataTypes and attributes

NOTE: Important to remember the main type in NumPy is ndarray, even seemingly different kinds of arrays are still ndarray's. This means an operation you do on one array, will work on another

In [3]:
# 1-dimensonal array, also referred to as a vector
a1 = np.array([1, 2, 3])

# 2-dimensional array, also referred to as matrix
a2 = np.array([[1, 2.0, 3.3],
               [4, 5, 6.5]])

# 3-dimensional array, also referred to as a matrix
a3 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
                [[10, 11, 12],
                 [13, 14, 15],
                 [16, 17, 18]]])

In [4]:
a1.shape, a1.ndim, a1.dtype, a1.size, type(a1)

((3,), 1, dtype('int32'), 3, numpy.ndarray)

In [5]:
a2.shape, a2.ndim, a2.dtype, a2.size, type(a2)

((2, 3), 2, dtype('float64'), 6, numpy.ndarray)

In [6]:
a3.shape, a3.ndim, a3.dtype, a3.size, type(a3)

((2, 3, 3), 3, dtype('int32'), 18, numpy.ndarray)

In [7]:
a1

array([1, 2, 3])

In [8]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [9]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [10]:
# Create a DataFrame from a NumPy array
import pandas as pd

df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.5


## 2. Creating arrays

In [11]:
sample_array = np.array([1, 2, 3])
sample_array

array([1, 2, 3])

In [12]:
sample_array.dtype

dtype('int32')

In [13]:
ones = np.ones((2,3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [14]:

ones.dtype

dtype('float64')

In [15]:
type(ones)

numpy.ndarray

In [16]:

zeros = np.zeros((2, 3))

In [17]:
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [18]:

range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [19]:

random_array = np.random.randint(0, 10, size=(3, 5))
random_array

array([[6, 1, 5, 6, 8],
       [9, 1, 0, 4, 6],
       [6, 2, 9, 7, 3]])

In [20]:

random_array.size

15

In [21]:

random_array.shape

(3, 5)

In [22]:
random_array_2 = np.random.random((5, 3))
random_array_2

array([[0.7711143 , 0.0422414 , 0.03231472],
       [0.60786082, 0.35306312, 0.61454733],
       [0.71531423, 0.4088326 , 0.26995222],
       [0.01489003, 0.0794792 , 0.65387128],
       [0.10441907, 0.46461248, 0.23023715]])

In [23]:
random_array_2.shape


(5, 3)

In [24]:
random_array_3 = np.random.rand(5, 3)
random_array_3

array([[0.41447327, 0.87625883, 0.46503244],
       [0.08562813, 0.21469601, 0.80601659],
       [0.79614354, 0.19571371, 0.45567788],
       [0.31030731, 0.77456275, 0.97947525],
       [0.46407235, 0.99021882, 0.85179635]])

In [27]:
# Pseudo-random numbers
np.random.seed(seed=99999)
random_array_4 = np.random.randint(10, size=(5, 3))
random_array_4

array([[0, 3, 1],
       [8, 1, 3],
       [8, 5, 6],
       [0, 6, 0],
       [4, 0, 9]])

In [28]:
np.random.seed(7)
random_array_5 = np.random.random((5, 3))
random_array_5

array([[0.07630829, 0.77991879, 0.43840923],
       [0.72346518, 0.97798951, 0.53849587],
       [0.50112046, 0.07205113, 0.26843898],
       [0.4998825 , 0.67923   , 0.80373904],
       [0.38094113, 0.06593635, 0.2881456 ]])

In [29]:
random_array_5 = np.random.random((5, 3))
random_array_5

array([[0.90959353, 0.21338535, 0.45212396],
       [0.93120602, 0.02489923, 0.60054892],
       [0.9501295 , 0.23030288, 0.54848992],
       [0.90912837, 0.13316945, 0.52341258],
       [0.75040986, 0.66901324, 0.46775286]])

In [30]:
random_array_4

array([[0, 3, 1],
       [8, 1, 3],
       [8, 5, 6],
       [0, 6, 0],
       [4, 0, 9]])

# 3. Viewing arrays and matrices

In [31]:
np.unique(random_array_4)

array([0, 1, 3, 4, 5, 6, 8, 9])

In [32]:
a1

array([1, 2, 3])

In [33]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [34]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [35]:
a1[0]

1

In [36]:
a2[0]

array([1. , 2. , 3.3])

In [37]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [38]:
a3[:2, :2, :2]

array([[[ 1,  2],
        [ 4,  5]],

       [[10, 11],
        [13, 14]]])

In [39]:
a4 = np.random.randint(10, size=(2, 3, 4, 5))
a4

array([[[[6, 7, 7, 9, 3],
         [0, 7, 7, 7, 0],
         [5, 4, 3, 1, 3],
         [1, 3, 4, 3, 1]],

        [[9, 5, 9, 1, 2],
         [3, 2, 2, 5, 7],
         [3, 0, 9, 9, 3],
         [4, 5, 3, 0, 4]],

        [[8, 6, 7, 2, 7],
         [3, 8, 6, 6, 5],
         [6, 5, 7, 1, 5],
         [4, 4, 9, 9, 0]]],


       [[[6, 2, 6, 8, 2],
         [4, 1, 6, 1, 5],
         [1, 6, 9, 8, 6],
         [5, 9, 7, 5, 4]],

        [[9, 6, 8, 1, 5],
         [5, 8, 3, 7, 7],
         [9, 4, 7, 5, 9],
         [6, 2, 0, 5, 3]],

        [[0, 5, 7, 1, 8],
         [4, 9, 0, 2, 0],
         [7, 6, 2, 9, 9],
         [5, 1, 0, 0, 9]]]])

In [40]:
a4.shape, a4.ndim

((2, 3, 4, 5), 4)

In [41]:
# Get the first 4 numbers of the inner most arrays
a4[:, :, :, :1]

array([[[[6],
         [0],
         [5],
         [1]],

        [[9],
         [3],
         [3],
         [4]],

        [[8],
         [3],
         [6],
         [4]]],


       [[[6],
         [4],
         [1],
         [5]],

        [[9],
         [5],
         [9],
         [6]],

        [[0],
         [4],
         [7],
         [5]]]])

# 4. Manipulating & comparing arrays

#### Arithmetic

In [42]:
a1

array([1, 2, 3])

In [43]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [44]:
a1 + ones

array([2., 3., 4.])

In [45]:
a1 - ones

array([0., 1., 2.])

In [46]:
a1 * ones

array([1., 2., 3.])

In [47]:
a1

array([1, 2, 3])

In [48]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [49]:
a1 * a2

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.5]])

In [50]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [51]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [52]:
# How can you reshape a2 to be compatible with a3? 
# Search: "How to reshape numpy array"
a2 * a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [53]:
a1 / ones

array([1., 2., 3.])

In [54]:
a2 / a1

array([[1.        , 1.        , 1.1       ],
       [4.        , 2.5       , 2.16666667]])

In [55]:
# Floor division removes the decimals (rounds down)
a2 // a1

array([[1., 1., 1.],
       [4., 2., 2.]])

In [56]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [57]:
a2 ** 2

array([[ 1.  ,  4.  , 10.89],
       [16.  , 25.  , 42.25]])

In [58]:
np.square(a2)

array([[ 1.  ,  4.  , 10.89],
       [16.  , 25.  , 42.25]])

In [59]:
a1 + ones

array([2., 3., 4.])

In [60]:
np.add(a1, ones)

array([2., 3., 4.])

In [61]:
a1 % 2

array([1, 0, 1], dtype=int32)

In [62]:
a1 / 2

array([0.5, 1. , 1.5])

In [63]:
a2 % 2

array([[1. , 0. , 1.3],
       [0. , 1. , 0.5]])

In [64]:
np.exp(a1)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [65]:
np.log(a1)

array([0.        , 0.69314718, 1.09861229])

# Aggregation
### Aggregation = performing the same operation on a number of things

In [66]:
listy_list = [1, 2, 3]
type(listy_list)

list

In [67]:
sum(listy_list)

6

In [68]:
sum(a1)

6

In [69]:
np.sum(a1)

6

#### Use Python's methods (sum()) on Python datatypes and use NumPy's methods on NumPy arrays (np.sum()).

In [70]:
# Creative a massive NumPy array
massive_array = np.random.random(100000)
massive_array.size

100000

In [71]:
massive_array[:10]

array([0.62342345, 0.62955693, 0.9099729 , 0.96251949, 0.5850998 ,
       0.16489774, 0.39159332, 0.94455493, 0.34339118, 0.70507037])

In [72]:
%timeit sum(massive_array) # Python's sum()
%timeit np.sum(massive_array) # NumPy's np.sum()

28.9 ms ± 6.65 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
65.5 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [73]:
65500/28.9

2266.4359861591697

In [74]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [75]:
np.mean(a2)

3.6333333333333333

In [76]:
np.max(a2)

6.5

In [77]:
np.min(a2)


1.0

Standard deviation and variance are measures of 'spread' of data.

The higher standard deviation and the higher variance of data, the more spread out the values are.

The lower standard deviation and lower variance, the less spread out the values are.

In [79]:
# Standard deviation = a measure of how spread out a group of numbers is from the mean
np.std(a2)

1.8226964152656422

In [80]:
# Variance = measure of the average degree to which each number is different to the mean
# Higher variance = wider range of numbers
# Lower variance = lower range of numbers
np.var(a2)

3.3222222222222224

In [81]:
# Standard deviation = squareroot of variance
np.sqrt(np.var(a2))

1.8226964152656422

In [83]:
# Demo of std and var
high_var_array = np.array([1, 100, 200, 300, 4000, 5000])
low_var_array = np.array([2, 4, 6, 8, 10])

In [84]:
np.var(high_var_array), np.var(low_var_array)

(4296133.472222221, 8.0)