In [2]:

import numpy as np

In [3]:
# How to create a basic array
arr = np.array(([1,2,3]))
print(arr)#

[1 2 3]


In [4]:
# array filled with o’s
np.zeros(2)

array([0., 0.])

In [5]:
# array filled with 1’s
np.ones(2)

array([1., 1.])

In [6]:
#numpy.empty is a function in the NumPy library of Python that is used to create a new array of a specified size and data type, 
#without initializing the elements of the array to any particular value. The function creates a new array in memory with the 
#specified size, but does not initialize the values in the array.
#The reason for using numpy.empty over other functions like numpy.zeros or numpy.ones is that it is faster since it doesn't need 
#to initialize the values. If you know that you will immediately fill the array with data, or if you don't care about the initial 
#values in the array, then numpy.empty can be a useful function to use.
np.empty(2)

array([1., 1.])

In [7]:
#create an array with a range of elements:
np.arange(4)

array([0, 1, 2, 3])

In [8]:
#  even an array that contains a range of evenly spaced intervals. To do this, you will specify the first number,
# last number, and the step size.
np.arange(2,9,2)

array([2, 4, 6, 8])

In [9]:
# np.linspace() to create an array with values that are spaced linearly in a specified interval
np.linspace(0,10,5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [10]:
# While the default data type is floating point (np.float64), you can explicitly specify which data type you want
# using the dtype keyword.
x = np.ones(2)
print(x.dtype)
y = np.ones(2, dtype=np.int64)
print(y.dtype)

float64
int64


<h2>Adding, removing, and sorting elements<h2>

In [11]:
arr = np.array([2, 1, 5, 3, 7, 4, 6, 8])
arr
np.sort(arr)

array([1, 2, 3, 4, 5, 6, 7, 8])

<h3>Argsort</h3>
<h4> returning the indices that would sort the array in ascending order</h4>

In [12]:
x = np.array([3, 1, 2])
sorted_indices = np.argsort(x)
# sorted indices [1, 3, 0, 2] are used to sort the original array [3, 1, 4, 2], resulting in the sorted array [1, 2, 3, 4]
print(sorted_indices)

[1 2 0]


<h3>Lexsort</h3>
<h4>In this example, we have three arrays of keys (names, ages, and heights) that we want to sort lexicographically. We pass these arrays as a tuple to lexsort, and it returns an array of indices (idx) that can be used to sort the original arrays. Finally, we use these indices to sort the original arrays and print the sorted results.
Lexicographical order is an order in which words or strings are arranged based on the alphabetical order of their component letters.</h4>

In [13]:

# define the keys to sort
names = np.array(['Alice', 'Bob', 'Charlie', 'Bob', 'Alice'])
ages = np.array([25, 35, 30, 20, 40])
heights = np.array([170, 180, 175, 165, 185])

# sort the keys lexicographically
idx = np.lexsort((ages, heights, names))
print(idx)
# use the indices to sort the original arrays
sorted_names = names[idx]
sorted_ages = ages[idx]
sorted_heights = heights[idx]

print(sorted_names)     # ['Alice' 'Bob' 'Bob' 'Charlie' 'Alice']
print(sorted_ages)      # [25 20 35 30 40]
print(sorted_heights)   # [170 165 180 175 185]


[0 4 3 1 2]
['Alice' 'Alice' 'Bob' 'Bob' 'Charlie']
[25 40 20 35 30]
[170 185 165 180 175]


<h3>Searchsorted</h3>
<h4>Returns the indexes where values could be inserted</h4>

In [14]:
# define the sorted input array to search
sorted_array = np.array([1, 2, 3, 4, 5])

# define the values to search for
values = np.array([0, 3, 6])

# get the indices where the values would need to be inserted
idx = np.searchsorted(sorted_array, values)

print(idx)   # [0 2 5]

[0 2 5]


<h3>Partition</h3>
<h4>partition is a function in the NumPy library of Python that performs a partial sort on an input array, dividing it into two parts: the first part contains the k smallest elements (in no particular order), and the second part contains the remaining elements (also in no particular order).</h4>

In [15]:
# define the input array to partition
a = np.array([3, 1, 4, 2, 5])

# partition the array to get the two smallest elements first
partitioned_a = np.partition(a, 2)

print(partitioned_a)   # [1 2 3 4 5]

[1 2 3 4 5]


In [16]:
#concatenate
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])
np.concatenate((a, b))

array([1, 2, 3, 4, 5, 6, 7, 8])

In [17]:
x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6]])
np.concatenate((x, y), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

<h3>How do you know the shape and size of an array?</h3>

In [18]:
array_example = np.array([[[0, 1, 2, 3],
                           [4, 5, 6, 7]],

                          [[0, 1, 2, 3],
                           [4, 5, 6, 7]],

                          [[0 ,1 ,2, 3],
                           [4, 5, 6, 7]]])

In [19]:
# To find the number of dimensions of the array
array_example.ndim

3

In [20]:
# To find the total number of elements in the array
array_example.size

24

In [21]:
# And to find the shape of your array
array_example.shape

(3, 2, 4)

<h3>Can you reshape an array?</h3>

In [22]:
a = np.arange(6)
print(a)

[0 1 2 3 4 5]


In [23]:
# you can reshape this array to an array with three rows and two columns
b = a.reshape(3, 2)
print(b)

[[0 1]
 [2 3]
 [4 5]]


In [24]:
# C, multi-dimensional arrays are stored in row-major order, also known as C order. 
#This means that the elements in the last dimension of the array are stored together in contiguous memory
# Fortran, multi-dimensional arrays are stored in column-major order, also known as Fortran order. 
#This means that the elements in the first dimension of the array are stored together in contiguous memory
np.reshape(a, newshape=(1, 6), order='C')

array([[0, 1, 2, 3, 4, 5]])

<h3>How to convert a 1D array into a 2D array (how to add a new axis to an array)</h3>

In [25]:
a = np.array([1, 2, 3, 4, 5, 6])
a.shape

(6,)

In [26]:
#use np.newaxis to add a new axis:
a2 = a[np.newaxis, :]
a2.shape

(1, 6)

In [27]:
# You can explicitly convert a 1D array with either a row vector or a column vector using np.newaxis. 
# For example, you can convert a 1D array to a row vector by inserting an axis along the first dimension:
row_vector = a[np.newaxis, :]
row_vector.shape

(1, 6)

In [28]:
# for a column vector, you can insert an axis along the second dimension:
col_vector = a[:, np.newaxis]
col_vector.shape

(6, 1)

In [29]:
#You can also expand an array by inserting a new axis at a specified position with np.expand_dims.
# You can use np.expand_dims to add an axis at index position 1 with:
b = np.expand_dims(a, axis=1)
b.shape

(6, 1)

In [30]:
#You can add an axis at index position 0 with:
c = np.expand_dims(a, axis=0)
c.shape

(1, 6)

<h3>Indexing and slicing</h3>

In [31]:
#You can index and slice NumPy arrays in the same ways you can slice Python lists.

data = np.array([1, 2, 3])
print(data[1])
print(data[0:2])
print(data[1:])
print(data[-2:])

2
[1 2]
[2 3]
[2 3]


In [32]:
a = np.array([[1 , 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(a[a < 5])
print(a[a>=5])
print(a[a%2==0])
print(a[a%3==0])
print(a[(a>=5) & (a<=10)])

[1 2 3 4]
[ 5  6  7  8  9 10 11 12]
[ 2  4  6  8 10 12]
[ 3  6  9 12]
[ 5  6  7  8  9 10]


In [33]:
ab = (a>5) | (a==5)
print(ab)

[[False False False False]
 [ True  True  True  True]
 [ True  True  True  True]]


In [34]:
# You can use np.nonzero() to print the indices of elements that are, for example, less than 5:
b = np.nonzero(a < 5)
print(b)

(array([0, 0, 0, 0], dtype=int64), array([0, 1, 2, 3], dtype=int64))


In [35]:
# In above example, a tuple of arrays was returned: one for each dimension. 
# The first array represents the row indices where these values are found, 
# and the second array represents the column indices where the values are found.

# If you want to generate a list of coordinates where the elements exist, you can zip the arrays, 
# iterate over the list of coordinates, and print them. For example:

list_of_coordinates= list(zip(b[0], b[1]))

for coord in list_of_coordinates:
    print(coord)

(0, 0)
(0, 1)
(0, 2)
(0, 3)


In [36]:
# You can also use np.nonzero() to print the elements in an array that are less than 5 with:

print(a[b])

[1 2 3 4]


In [37]:
# If the element you’re looking for doesn’t exist in the array, then the returned array of indices will be empty. For example:

not_there = np.nonzero(a == 42)
print(not_there)

(array([], dtype=int64), array([], dtype=int64))


<h4>How to create an array from existing data</h4>

In [38]:
# You can create a new array from a section of your array any time by specifying where you want to slice your array
a = np.array([1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [39]:
arr1 = a[3:8]
arr1

array([4, 5, 6, 7, 8])

In [40]:
a1 = np.array([[1, 1],
               [2, 2]])

a2 = np.array([[3, 3],
               [4, 4]])

In [41]:
np.vstack((a1, a2))

array([[1, 1],
       [2, 2],
       [3, 3],
       [4, 4]])

In [42]:
np.hstack((a1, a2))

array([[1, 1, 3, 3],
       [2, 2, 4, 4]])

In [43]:
# You can split an array into several smaller arrays using hsplit. 
# You can specify either the number of equally shaped arrays to return or the columns after which the division should occur.

x = np.arange(1, 25).reshape(2, 12)
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]])

In [44]:
# If you wanted to split this array into three equally shaped arrays
np.hsplit(x, 3)

[array([[ 1,  2,  3,  4],
        [13, 14, 15, 16]]),
 array([[ 5,  6,  7,  8],
        [17, 18, 19, 20]]),
 array([[ 9, 10, 11, 12],
        [21, 22, 23, 24]])]

In [45]:
# If you wanted to split your array after the third and fourth column, you’d run:
np.hsplit(x, (3, 4))

[array([[ 1,  2,  3],
        [13, 14, 15]]),
 array([[ 4],
        [16]]),
 array([[ 5,  6,  7,  8,  9, 10, 11, 12],
        [17, 18, 19, 20, 21, 22, 23, 24]])]

<h4>Views</h4>

In [46]:
# You can use the view method to create a new array object that looks at the same data as the original array (a shallow copy).
# Views are an important NumPy concept! NumPy functions, as well as operations like indexing and slicing, 
# will return views whenever possible. This saves memory and is faster (no copy of the data has to be made). 
# However it’s important to be aware of this - modifying data in a view also modifies the original array!

a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

In [47]:
#Now we create an array b1 by slicing a and modify the first element of b1. This will modify the corresponding element in
# a as well!

b1 = a[0, :]
b1

array([1, 2, 3, 4])

In [48]:
b1[0] = 99
b1

array([99,  2,  3,  4])

In [49]:
a

array([[99,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [50]:
# Using the copy method will make a complete copy of the array and its data (a deep copy).
b2 = a.copy()
b2

array([[99,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

<h4>Basic array operations</h4>

In [51]:
# You can add the arrays together with the plus sign.
data = np.array([1, 2])
ones = np.ones(2, dtype=int)
print(data + ones)
print(data - ones)
print(data * ones)
print(data / ones)

[2 3]
[0 1]
[1 2]
[1. 2.]


In [52]:
a.sum()

176

In [53]:
# To add the rows or the columns in a 2D array
b = np.array([[1, 1], [2, 2]])

# You can sum over the axis of rows 
b.sum(axis=0)

array([3, 3])

In [54]:
# You can sum over the axis of columns 
b.sum(axis=1)

array([2, 4])

<h3>Broadcasting</h3>
<h4>NumPy understands that the multiplication should happen with each cell. That concept is called broadcasting. Broadcasting is a mechanism that allows NumPy to perform operations on arrays of different shapes. The dimensions of your array must be compatible, for example, when the dimensions of both arrays are equal or when one of them is 1. If the dimensions are not compatible, you will get a ValueError.</h4>

In [55]:
a = np.array([1, 2, 3])
b = np.array([[4], [5], [6]])
c = a + b
c

array([[5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])

More useful array operations

In [56]:
print(c.max())
print(c.min())
print(c.sum())

9
5
63


In [57]:
# a.min(axis=0) is a NumPy method that returns the minimum value of an array a along its first axis (axis 0). 
# In other words, it computes the minimum value of each column of the array.
c.min(axis=0)

array([5, 6, 7])

<h3>Creating matrices</h3>

In [58]:
data = np.array([[1, 2], [3, 4], [5, 6]])
data

array([[1, 2],
       [3, 4],
       [5, 6]])

In [59]:
print(data[0, 1])
print(data[1:3])
print(data[0:2, 0]) # row then column

2
[[3 4]
 [5 6]]
[1 3]


In [60]:
# You can aggregate matrices the same way you aggregated vectors:

print(data.max())
print(data.min())
print(data.sum())

6
1
21


In [61]:
print(data.max(axis=0))
print(data.max(axis=1))

[5 6]
[2 4 6]


In [62]:
data = np.array([[1, 2], [3, 4]])
ones = np.array([[1, 1], [1, 1]])
data + ones

array([[2, 3],
       [4, 5]])

In [63]:
data = np.array([[1, 2], [3, 4], [5, 6]])
ones_row = np.array([[1, 1]])
data + ones_row

array([[2, 3],
       [4, 5],
       [6, 7]])

In [64]:
a = np.ones((4, 3, 2))
a

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [65]:
# here are often instances where we want NumPy to initialize the values of an array. 
# NumPy offers functions like ones() and zeros(), and the random.Generator class for random number generation for that. 
# All you need to do is pass in the number of elements you want it to generate:
rng = np.random.default_rng()  # the simplest way to generate random numbers
rng.random(3) 

array([0.66097857, 0.32467277, 0.33378759])

In [66]:
# You can also use ones(), zeros(), and random() to create a 2D array if you give them a tuple describing the dimensions 
# of the matrix:
print(np.ones((3, 2)))
print(np.zeros((3, 2)))
print(rng.random((3, 2)))

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[[0. 0.]
 [0. 0.]
 [0. 0.]]
[[0.83290347 0.11023244]
 [0.18139704 0.911226  ]
 [0.40969434 0.2850167 ]]


In [67]:
# Random integres between 0 to 5 of shape (2, 4)
print(rng.integers(5, size=(2, 4)) )

[[2 1 3 1]
 [4 1 3 0]]


<h3>How to get unique items and counts</h3>

In [68]:
a = np.array([11, 11, 12, 13, 14, 15, 16, 17, 12, 13, 11, 14, 18, 19, 20])
unique_values = np.unique(a)
print(unique_values)

[11 12 13 14 15 16 17 18 19 20]


In [69]:
# indices
unique_values, indices_list = np.unique(a, return_index=True)
print(indices_list)

[ 0  2  3  4  5  6  7 12 13 14]


In [70]:
# occurance count
unique_values, occurrence_count = np.unique(a, return_counts=True)
print(occurrence_count)

[3 2 2 2 1 1 1 1 1 1]


In [71]:
# This also works with 2D arrays! If you start with this array:
a_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]])
unique_values = np.unique(a_2d)
print(unique_values)

[ 1  2  3  4  5  6  7  8  9 10 11 12]


In [72]:
#Unique rows
unique_rows = np.unique(a_2d, axis=0)
print(unique_rows)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [73]:
# Unique rows, indices and occurance count
unique_rows, indices, occurrence_count = np.unique(
     a_2d, axis=0, return_counts=True, return_index=True)
print(unique_rows)
print(indices)
print(occurrence_count)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[0 1 2]
[2 1 1]


<h3>Transposing and reshaping a matrix</h3>

In [74]:
# You may also need to switch the dimensions of a matrix. This can happen when, for example, you have a model that 
# expects a certain input shape that is different from your dataset. This is where the reshape method can be useful.
# You simply need to pass in the new dimensions that you want for the matrix.
a = np.array([11, 11, 12, 13, 14, 15])
a.reshape(2, 3)

array([[11, 11, 12],
       [13, 14, 15]])

In [75]:
a.reshape(3, 2)

array([[11, 11],
       [12, 13],
       [14, 15]])

In [76]:
arr = np.arange(6).reshape((2, 3))
print(arr)
print(arr.shape)

[[0 1 2]
 [3 4 5]]
(2, 3)


In [77]:
arr1 = arr.transpose()
print(arr1)
print(arr1.shape)

[[0 3]
 [1 4]
 [2 5]]
(3, 2)


In [78]:
# You can also use arr.T:
arr.T

array([[0, 3],
       [1, 4],
       [2, 5]])

<h3>How to reverse an array</h3>