In [5]:
import numpy as np
import random

numbers = np.array([2, 3, 5, 7, 11])
twoDim = np.array([[2, 3, 5, 7, 11], [2, 3, 5, 7, 11]])
floats = np.array([1/i for i in range(1, 6)])

print(floats) # [1.         0.5        0.33333333 0.25       0.2       ]
print(type(numbers)) # <class 'numpy.ndarray'>
# Checking the type of the numpy array
print(numbers.dtype) #  dtype('int64'). printing will result to (int64)

# Get the number of dimensions
print(numbers.ndim) # 1
print(twoDim.ndim) # 2

# Get the shape of an array
print(numbers.shape) # (5,)
print(twoDim.shape) # (2, 5)

# Get the size of an array, which is the number of rows * the number of columns
print(numbers.size) # (5,) = 5 
print(twoDim.size) # (2, 5) = 10

# Get the amount of byte that is used to store an item in the array
print(numbers.itemsize) # 8
print(twoDim.itemsize) # 8

# Print the rows and columns in a two-dimensional array
for row in twoDim:
  for column in row:
    print(column, end=' ')
  print()
# Prints
# 2 3 5 7 11 
# 2 3 5 7 11

# flattening a multi-dimensional array into 1
for column in twoDim.flat:
  print(column, end=' ') # 2 3 5 7 11 2 3 5 7 11 

print('\n')

"""Filling arrays with specific values"""
zeros = np.zeros(5) #
print(zeros, end='\n') #[0. 0. 0. 0. 0.]

# with specific data types
ones = np.ones((2, 4), dtype=int)
print(ones, end='\n')
# [[1 1 1 1]
#  [1 1 1 1]]

values = np.full((3, 5), 13)
print(values, end='\n')
# [[13 13 13 13 13]
#  [13 13 13 13 13]
#  [13 13 13 13 13]]

"""Creating arrays from ranges"""
zero_to_five = np.arange(5)
print(zero_to_five, end='\n') # [0 1 2 3 4]
five_to_ten = np.arange(5, 10) # [5 6 7 8 9]
ten_to_one = np.arange(10, 1, -2) # [10, 8, 6, 4, 2]

# Creating a floating
floating_np = np.linspace(0.0, 1.0, num=10)
print(floating_np, end='\n') # [0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556 0.66666667 0.77777778 0.88888889 1.        ]

# Creating a one-dimensional array and reshaping into n-dimensional array
reshape = np.arange(1, 21).reshape(4, 5)
print(reshape, end='\n')
# [[ 1  2  3  4  5]
#  [ 6  7  8  9 10]
#  [11 12 13 14 15]
#  [16 17 18 19 20]]


# If you have more than 1000 elements to in n-dimensions, numpy will summarize the array rather than print all items
large_data = np.arange(1, 100001).reshape(4, 25000)
print(large_data, end='\n')
# [[     1      2      3 ...  24998  24999  25000]
#  [ 25001  25002  25003 ...  49998  49999  50000]
#  [ 50001  50002  50003 ...  74998  74999  75000]
#  [ 75001  75002  75003 ...  99998  99999 100000]]
another_large_data = np.arange(1, 100001).reshape(100, 1000)
print(another_large_data, end='\n')
# ...
#  [ 97001  97002  97003 ...  97998  97999  98000]
#  [ 98001  98002  98003 ...  98998  98999  99000]
#  [ 99001  99002  99003 ...  99998  99999 100000]]


"""Performance using ipython magic"""
%timeit rolls_list = [random.randrange(1, 7) for i in range(6_000_000)]
# 4.24 s ± 28.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit rolls_array = np.random.randint(1, 7, 6_000_000)
# 61.4 ms ± 419 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
%timeit rolls_array = np.random.randint(1, 7, 60_000_000)
# 793 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit rolls_array = np.random.randint(1, 7, 600_000_000)
# 7.54 s ± 99.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit -n3 -r2 rolls_array = np.random.randint(1, 7, 6_000_000)
# 65.3 ms ± 7.66 ms per loop (mean ± std. dev. of 2 runs, 3 loops each)



[1.         0.5        0.33333333 0.25       0.2       ]
<class 'numpy.ndarray'>
int64
1
2
(5,)
(2, 5)
5
10
8
8
2 3 5 7 11 
2 3 5 7 11 
2 3 5 7 11 2 3 5 7 11 

[0. 0. 0. 0. 0.]
[[1 1 1 1]
 [1 1 1 1]]
[[13 13 13 13 13]
 [13 13 13 13 13]
 [13 13 13 13 13]]
[0 1 2 3 4]
[0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]
[[     1      2      3 ...  24998  24999  25000]
 [ 25001  25002  25003 ...  49998  49999  50000]
 [ 50001  50002  50003 ...  74998  74999  75000]
 [ 75001  75002  75003 ...  99998  99999 100000]]
[[     1      2      3 ...    998    999   1000]
 [  1001   1002   1003 ...   1998   1999   2000]
 [  2001   2002   2003 ...   2998   2999   3000]
 ...
 [ 97001  97002  97003 ...  97998  97999  98000]
 [ 98001  98002  98003 ...  98998  98999  99000]
 [ 99001  99002  99003 ...  99998  99999 100000]]
4.69 s ± 114 ms per loop (mean ± std. dev. of 7 ru

Numpy Array Operators


In [7]:
import numpy as np

numbers = np.arange(1, 6)
new_numbers = numbers * 2 # [2 4 6 8 10]
# This is called broadcasting. It is a way to apply an operation to every element in an array
# This is equivalent to numbers = numbers * 2
# What this is effectively doing is multiplying each element in the array by 2
# i.e [1, 2, 3, 4, 5] * 2 = [2, 4, 6, 8, 10]
# As you can see, the original array is not modified
print(numbers) # [1 2 3 4 5]
print(new_numbers) # [ 2  4  6  8 10]
# You can also do augmented assignment
# numbers *= 2
# print(numbers) # [ 2  4  6  8 10]

# Doing this with a list will result to increasing the size of the list
numbers_without_numpy = [1, 2, 3, 4, 5]
numbers_without_numpy *= 2
# As you can see, the original list is modified
# This is because the list is mutable
print(numbers_without_numpy) # [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

# You can also do this with a two-dimensional array
twoDim = np.arange(1, 11).reshape(2, 5)
print(twoDim)
# [[ 1  2  3  4  5]
#  [ 6  7  8  9 10]]
twoDim *= 2
print(twoDim)
# [[ 2  4  6  8 10]
#  [12 14 16 18 20]]

numbers2 = np.linspace(1.1, 5.5, 5)
print(numbers2) # [1.1 2.2 3.3 4.4 5.5]
numbers * numbers2 # [ 1.1  4.4  9.9 17.6 27.5]
print(numbers * numbers2) # [ 1.1  4.4  9.9 17.6 27.5]

numbers >= 3 # [False False  True  True  True]
numbers2 < numbers # [ True  True False False False]

numbers2 == numbers # [False False  False False False]
print(numbers2 == numbers) # [False False  False False False]

[1 2 3 4 5]
[ 2  4  6  8 10]
[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
[[ 2  4  6  8 10]
 [12 14 16 18 20]]
[1.1 2.2 3.3 4.4 5.5]
[ 1.1  4.4  9.9 17.6 27.5]
[False False False False False]


Numpy Calculation Methods

In [17]:
# Descriptive statistics
grades = np.array([[87, 96, 70], [100, 87, 90], [94, 77, 90], [100, 81, 82]])
print(grades)
# [[ 87  96  70]
#  [100  87  90]
#  [ 94  77  90]
#  [100  81  82]]

grades.sum() # 1054
grades.min() # 70
grades.max() # 100
grades.mean() # 87.83333333333333
grades.std() # 88.792357792739987
grades.var() # 77.30555555555556

# You can also do this for a specific axis
# axis=0 means the columns
# axis=1 means the rows
grades.sum(axis=0) # array([381, 341, 332])
grades.mean(axis=0) # array([ 95.25,  85.25,  83.  ])

# Avg for each student
grades.mean(axis=1) # array([ 84.33333333,  92.33333333,  87.        ,  87.66666667])

# Self-check
# Using numpy random-number generation, to create an array of twelve random grades in the range 60-100, then reshape the array into a 3x4 array.
# Finally, use the array to print the average of all grades, the average of each test, and the average of each student.

student_grades = np.random.randint(60, 101, 12).reshape(3, 4)
print(student_grades)
student_grades.mean() # 82.25
column_grades = student_grades.mean(axis=0) # array([79.        , 87.        , 97.66666667, 87.66666667])
each_student_avh = student_grades.mean(axis=1) # array([71.75, 73.75, 81.75])

[[ 87  96  70]
 [100  87  90]
 [ 94  77  90]
 [100  81  82]]
[[ 85  92  98  68]
 [ 61  80  95 100]
 [ 91  89 100  95]]


array([79.        , 87.        , 97.66666667, 87.66666667])

Numpy Universal Function


In [18]:
numbers = np.array([1, 4, 9, 16, 25, 36,])

np.sqrt(numbers) # [ 1.  2.  3.  4.  5.  6.]

numbers2 = np.arange(1, 7) * 10
print(numbers2) # [10 20 30 40 50 60]
np.add(numbers, numbers2) # [11 24 39 56 75 96]

np.multiply(numbers2, 5) # [ 50 100 150 200 250 300]

numbers3 = numbers2.reshape(2, 3)
numbers4 = np.array([2, 4, 6])

# Broadcasting can work in multiple dimensions
np.multiply(numbers3, numbers4)
# array([[ 20,  80, 180],
#        [ 80, 200, 360]])

[10 20 30 40 50 60]


array([[ 20,  80, 180],
       [ 80, 200, 360]])

Indexing and Slicing in numpy

In [None]:
grades = np.array([[87, 96, 70], [100, 87, 90], [94, 77, 90], [100, 81, 82]])
print(grades)
# [[ 87  96  70]
#  [100  87  90]
#  [ 94  77  90]
#  [100  81  82]]

# Indexing
grades[0, 1] # 96
grades[1] # array([100,  87,  90])
# Slicing
# This will return the first two rows
grades[0:2] # array([[ 87,  96,  70], [100,  87,  90]])
# Row 1 and 2, column 1
grades[0:2, 1] # array([96, 87])
# Row 1 and 3
grades[[1, 3]] # array([[100,  87,  90], [100,  81,  82]])
# All the rows, column 1
grades[:, 1] # array([ 96,  87,  77,  81])
# All the rows, column 1 and 2
grades[:, 1:3] # array([[ 96,  70], [ 87,  90], [ 77,  90], [ 81,  82]])

Views: Shallow Copies

In [25]:
numbers = np.arange(1, 6)
numbers2 = numbers.view()
print(numbers) # [1 2 3 4 5]
print(numbers2) # [1 2 3 4 5]

id(numbers) # 140404516911312
id(numbers2) # 140404485650448

# Change the second element of numbers2
numbers2[1] *= 10
# As you can see, numbers is also changed
# This is because numbers2 is a view of numbers
# This means that numbers2 is a reference to numbers
print(numbers) # [ 1 20  3  4  5]
print(numbers2) # [ 1 20  3  4  5]

[1 2 3 4 5]
[1 2 3 4 5]
[ 1 20  3  4  5]
[ 1 20  3  4  5]


Deep Copies

In [27]:
numbers = np.arange(1, 6)
numbers2 = numbers.copy()
print(numbers) # [1 2 3 4 5]
print(numbers2) # [1 2 3 4 5]

numbers2[1] *= 10
print(numbers) # [ 1 2 3 4 5]
print(numbers2) # [ 1 20  3  4  5]

[1 2 3 4 5]
[1 2 3 4 5]
[1 2 3 4 5]
[ 1 20  3  4  5]
