# Numpy Tutorial

## 1. Numpy Activation

In [2]:
import numpy as np

print(np.__version__)

1.19.2


## 2. Numpy Arrays

### Creating Arrays, Slicing Arrays, Arrays of 0s or 1s
### np.array(), np.zeros(), np.ones()

### Axis 0 = Rows, Axis 1 = Columns. Axis 0 is the "first" axis.

![image.png](attachment:ec2cdb6f-5e8f-4b6c-9846-70eed0b32b53.png)![image.png](attachment:319c1aeb-c5f2-46dd-b3be-052c92119075.png)

In [3]:
# Creates a 2-d array (6x6)

a = np.array([(14,94,40,70,76,22),(7,65,29,72,76,72), (68,6,59,27,64,55),
            (1,45,72,72,61,11),(77,6,35,58,37,66),(48,37,23,22,42,83)],
             dtype=np.int32)
a

array([[14, 94, 40, 70, 76, 22],
       [ 7, 65, 29, 72, 76, 72],
       [68,  6, 59, 27, 64, 55],
       [ 1, 45, 72, 72, 61, 11],
       [77,  6, 35, 58, 37, 66],
       [48, 37, 23, 22, 42, 83]])

In [8]:
# Slicing numpy array

# print(a[1][1])
print(a[1,1])

65


In [5]:
print(a[3])

[ 1 45 72 72 61 11]


In [10]:
# Create a 3x4 2-d array with zeroes

a = np.zeros((3,4))
a

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [12]:
# Create a 2x3x4 3-d array with ones

a = np.ones((2,3,4))
a

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

### Creating a range of values with Numpy
### np.arange()

In [13]:
# Create an array that starts from zero and ends at 8
# numpy.arange(start,stop,step)

a = np.arange(0,9)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [14]:
# Create array that starts from 10 and ends at 19 with intervals of 2

a = np.arange(10,20,2)
a

array([10, 12, 14, 16, 18])

In [15]:
# Create an array that contains the dates in month of Aug 2017

d = np.arange("2017-08-01", "2017-09-01", dtype="datetime64")
d

array(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
       '2017-08-05', '2017-08-06', '2017-08-07', '2017-08-08',
       '2017-08-09', '2017-08-10', '2017-08-11', '2017-08-12',
       '2017-08-13', '2017-08-14', '2017-08-15', '2017-08-16',
       '2017-08-17', '2017-08-18', '2017-08-19', '2017-08-20',
       '2017-08-21', '2017-08-22', '2017-08-23', '2017-08-24',
       '2017-08-25', '2017-08-26', '2017-08-27', '2017-08-28',
       '2017-08-29', '2017-08-30', '2017-08-31'], dtype='datetime64[D]')

### Linspace - Return evenly spaced numbers over a specified interval
### np.linspace()

In [16]:
# Create array that starts with 0 and ends at 2 with 9 samples in between

d = np.linspace(0,2,9)
d

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

### full() - Return a new array of given shape and type, filled with fill_value
### eye() - Return a 2-D array with ones on the diagonal and zeros elsewhere
### np.full(), np.eye()

In [18]:
# Creates a constant array with a specified value
# np.full(shape, fill_value)

e = np.full((2,2),7)
e

array([[7, 7],
       [7, 7]])

In [21]:
# Creates a 5x5 identity matrix

f = np.eye(5)
f

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### random() - Return random floats in the half-open interval
### randint() - Return random integers from low (inclusive) to high (exclusive)
### np.random.random(), np.random.randint()

In [22]:
# Creates a 3x3 array with random floats in the interval 0.0 to 1.0

g = np.random.random((3,3))
g

array([[0.42737672, 0.55418655, 0.58034527],
       [0.45143146, 0.17187591, 0.7742712 ],
       [0.95279704, 0.79916267, 0.36790602]])

In [23]:
# Create a 3x2x4 array with random numbers
# between 10 and 50 (not including 50)

h = np.random.randint(10,50,(3,2,4))
h

array([[[29, 24, 48, 33],
        [36, 29, 39, 27]],

       [[10, 43, 38, 20],
        [14, 41, 12, 37]],

       [[23, 27, 33, 35],
        [13, 36, 11, 19]]])

### empty() - Return a new array of given shape and type, without initializing entries
### np.empty()

In [29]:
# Create a 3x3 empty array

h = np.empty((3,3))
h

array([[0.42737672, 0.55418655, 0.58034527],
       [0.45143146, 0.17187591, 0.7742712 ],
       [0.95279704, 0.79916267, 0.36790602]])

### loadtxt() - Load data from a text file
### np.loadtxt() - Load data from a text file

### Both functions handle dtype from a Column perspective. However, it can't handle dtype if each Row has a different data type.

In [4]:
# Numpy dtype: https://www.w3schools.com/python/numpy/numpy_data_types.asp
# These's an issue with the code from the notes, can't handle "na" since it can't be casted to int / float

d = np.loadtxt("Data/situps.csv",
               delimiter=',',skiprows=1,
               dtype=[('StudentID','u8'),('NumberOfSitups','i8')]) # np.loadtxt is not great at handling missing or invalid data

d
d.shape

ValueError: invalid literal for int() with base 10: ''

### genfromtxt() - Load data from a text file, with missing values handled as specified
### np.genfromtxt()

In [7]:
d = np.genfromtxt("Data/situps.csv",
                 delimiter=",", skip_header=1,
                 dtype=[("StudentID","u8"),("NumberOfSitups","i8")], # Note that with "i" you need to follow the integer's bit values i2, i4, i8, etc.; but with "u" you can use any arbitrary number like u7, u8
                 missing_values=["na","-"], filling_values=0)

d

array([(1515576,  0), (1532576, 35), (1518169,  0), ..., (1501430, 59),
       (1598536, 14), (1578360, 33)],
      dtype=[('StudentID', '<u8'), ('NumberOfSitups', '<i8')])

In [8]:
d.shape

(10000,)

### reshape() - Gives a new shape to an array without changing its data
### np.reshape()

In [66]:
# Create 25 numbers from 1 to 25 as a 1-d array
# then resape this to a 2d array of shape 5x5

a = np.arange(1,26)
a

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25])

In [67]:
a.reshape(5,5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [71]:
a = np.arange(1,21).reshape(4,5)
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20]])

### Put Different ndarrays into 1 Single ndarray
### np.array([])
### np.concatenate([])

In [73]:
c1 = np.array([1,2,3,4,5])
c2 = np.arange(2,21,2)
c3 = np.random.randint(1,100,10)

c = np.array([c1,c2,c3])
c

  c = np.array([c1,c2,c3])


array([array([1, 2, 3, 4, 5]),
       array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20]),
       array([44, 87, 56, 39, 45, 36,  9,  6,  7,  8])], dtype=object)

In [75]:
c = np.concatenate([c1,c2,c3])
c

array([ 1,  2,  3,  4,  5,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 44, 87,
       56, 39, 45, 36,  9,  6,  7,  8])

## 3. Numpy Data Types

### Integers, Floats, Booleans, Objects, Unicode

In [77]:
# https://numpy.org/devdocs/user/basics.types.html or https://www.w3schools.com/python/numpy/numpy_data_types.asp

# np.int32 # 32-bit integer
# np.int64 # 64-bit integer
# np.float64 # 64-bit decimal number
# np.bool # Boolean values as TRUE or FALSE
# np.object # Python object type
# np.unicode # Fixed-length Unicode string type

In [82]:
import math

# Assigning different data types to PI and evaluating its outputs

math.pi

3.141592653589793

In [86]:
# 32-bit integer

a = np.array([math.pi], dtype=np.int32)
print(a, a.dtype)

[3] int32


In [87]:
# 64-bit float

b = np.array([math.pi], dtype=np.float64)
print(b, b.dtype)

[3.14159265] float64


In [90]:
# Boolean

c = np.array([math.pi], dtype=np.bool)
print(c, c.dtype)

[ True] bool


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  c = np.array([math.pi], dtype=np.bool)


In [91]:
# Object

d = np.array([math.pi], dtype=np.object)
print(d, d.dtype)

[3.141592653589793] object


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  d = np.array([math.pi], dtype=np.object)


In [92]:
# Unicode

e = np.array([math.pi], dtype=np.unicode)
print(e, e.dtype)

['3.141592653589793'] <U17


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  e = np.array([math.pi], dtype=np.unicode)


## 4. Inspecting your Array

### shape, len, ndim, size
### df.shape, len(df), df.ndim, df.size

In [28]:
# shape: The elements of the shape tuple give the lengths of the corresponding array dimensions

a = np.array([1,2,3])
b = np.array([(1,2,3),(4,5,6)])
c = np.array([[(1,2,3,4),(5,6,7,8)], [(9,10,11,12),(13,14,15,16)]])

a.shape

(3,)

In [94]:
b.shape

(2, 3)

In [95]:
c.shape

(2, 2, 4)

In [29]:
# len: Returns number of rows along the first axis

print(len(a));print(len(b));print(len(c))

3
2
2


In [105]:
# ndim: Returns number of dimensions

print(a.ndim);print(b.ndim);print(c.ndim)

1
2
3


In [107]:
# size: Returns total number of elements across all dimensions

print(a.size);print(b.size);print(c.size)

3
6
16


## 5. Subsetting, Slicing and Indexing

### Slicing and Indexing a 1-Dimensional Array
### ndarray[x,y], where x represents the Row index and y the Column index
### E.g. ndarray[1:,:3]. Row 2 till end, Column 1 till Column 2

In [71]:
a = np.arange(9)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [74]:
# Select element at index 2

a[2]

2

In [110]:
# Select elements from index 0 to 7 with step 2

a[:7:2]

array([0, 2, 4, 6])

In [111]:
# Select and reverse elements from index 0 to the end

a[::-1]

array([8, 7, 6, 5, 4, 3, 2, 1, 0])

### Slicing and Indexing a 2-Dimensional Array

In [113]:
# Create an array with 0 to 12 and reshape it into a 4x3 array

b = np.arange(0,12).reshape(4,3)
b

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [114]:
# Select row 3

b[3]

array([ 9, 10, 11])

In [121]:
# Select rows 2 and 3

b[1:3]
# Or b[[1,2]]

array([[3, 4, 5],
       [6, 7, 8]])

In [123]:
# Select all rows, column 3 only

b[:,2]

array([ 2,  5,  8, 11])

In [126]:
# Select rows 2 till end, all columns

b[1:]
# Or b[1:,:3]

array([[ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

### Boolean Indexing

### Masking in python and data science is when you want manipulated data in a collection based on some criteria. The criteria you use is typically of a true or false nature, hence the boolean part. Boolean masking is typically the most efficient way to quantify a sub-collection in a collection.

In [86]:
# Create an array of 36 elements and reshape it to 6x6 array

a = np.arange(36).reshape(6,6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [87]:
# Use boolean indexing to get even numbers only

b = a % 2 == 0
a[b] # Retrieve an array based on the boolean criteria above

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34])

In [94]:
even = a%2==0
# even # Retrieves boolean condition of the array
a%2==0

array([[ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False]])

## 6. Copying Arrays

### copy() - Return an array copy of the given object
### np.copy()

In [144]:
a = np.array([10,2,8,4,6,1,5,9,3,7])

npc = np.copy(a)
npc

array([10,  2,  8,  4,  6,  1,  5,  9,  3,  7])

In [145]:
npc.sort()
npc

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [146]:
len(npc)

10

## 7. Sorting Arrays

### sort() - Return a sorted copy of an array

### np.sort()

In [105]:
a = np.array([10,2,8,4,6,1,5,9,3,7])
b = np.array([("Red","Blue","Yellow"),("Green","Cyan","Magenta")])

print(a);print(b)

[10  2  8  4  6  1  5  9  3  7]
[['Red' 'Blue' 'Yellow']
 ['Green' 'Cyan' 'Magenta']]


In [106]:
a.sort()
a

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [110]:
b.sort(axis=1) # Goes across the array and sort for each Row
b

array([['Blue', 'Green', 'Magenta'],
       ['Cyan', 'Red', 'Yellow']], dtype='<U7')

In [111]:
b.sort(axis=0) # Goes down the array and sort for each Column
b

array([['Blue', 'Green', 'Magenta'],
       ['Cyan', 'Red', 'Yellow']], dtype='<U7')

## 8. Manipulating Array Shapes

### Change Array Shape

### flatten() - Convert your M-D array to a 1-D array and return a copy of the original array
### reshape() - Gives a new shape to an array without changing its data. It creates a new array and does not modify the original array itself
### shape() - The shape attribute an be used to get array dimensions or to change array dimensions
### resize() - The resize() method works just like the reshape() function, but modifies the array it operates on

In [12]:
b = np.arange(24).reshape(6,4)
b

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [13]:
# np.flatten()

c = b.flatten()
c

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [27]:
# np.reshape() - Does NOT modify the original array, unlike resize()

a = np.array([1,2,3,4,5,6])
a

array([1, 2, 3, 4, 5, 6])

In [19]:
b = a.reshape(3,2)
#Or b = np.reshape(a,(3,2))

b

array([[1, 2],
       [3, 4],
       [5, 6]])

In [20]:
# np.shape()

c = np.array([1,2,3,4,5,6])
c

array([1, 2, 3, 4, 5, 6])

In [25]:
# Retrieves the shape of the array

c.shape

(3, 2)

In [26]:
# Changes the shape of the array

c.shape = (3,2)
c

array([[1, 2],
       [3, 4],
       [5, 6]])

In [28]:
# np.resize() - Reshapes and modifies the array it operates on

a = np.array([[0,1],[2,3]]) # 2x2
a.shape

(2, 2)

In [30]:
a.resize(4,1) # Becomes a 4x1 after resizing
a

array([[0],
       [1],
       [2],
       [3]])

In [31]:
a.shape

(4, 1)

### Transpose Array

### transpose() - Reverse or permute the axes of an array; returns the modified array
### Can be called as a function - np.transpose(ndarray) , or an instance method - np.ndarray.transpose()

In [33]:
x = np.array(([[10,20,30,40], [50,60,70,80], [90,85,75,45]]))
x

array([[10, 20, 30, 40],
       [50, 60, 70, 80],
       [90, 85, 75, 45]])

In [38]:
x.shape

(3, 4)

In [44]:
y = x.transpose() # Calling it as an instance method
y

array([[10, 50, 90],
       [20, 60, 85],
       [30, 70, 75],
       [40, 80, 45]])

In [41]:
y.shape

(4, 3)

In [45]:
z = np.transpose(x) # Calling it as a function
z

array([[10, 50, 90],
       [20, 60, 85],
       [30, 70, 75],
       [40, 80, 45]])

In [43]:
z.shape

(4, 3)

### Combine Arrays

### concatenate() - Two or more arrays can be concatenated using concatenate() function along an axis
### The arrays must have the same shape, except in the dimension corresponding to axis (the first by default)

In [49]:
# Concatenating 1-D Arrays

x = np.arange(5)
y = np.arange(6,10)
z = np.arange(11,15)

print(x);print(y);print(z)

[0 1 2 3 4]
[6 7 8 9]
[11 12 13 14]


In [50]:
np.concatenate((x,y,z))

array([ 0,  1,  2,  3,  4,  6,  7,  8,  9, 11, 12, 13, 14])

In [65]:
# Concatenating 2-D Arrays on axis = 0 (the columns, vertical)

x = np.arange(1,5).reshape(2,2)
y = np.arange(6,12).reshape(3,2)
z = np.arange(8,16).reshape(4,2)

print(x);print(y);print(z)

[[1 2]
 [3 4]]
[[ 6  7]
 [ 8  9]
 [10 11]]
[[ 8  9]
 [10 11]
 [12 13]
 [14 15]]


In [57]:
a = np.concatenate((x,y,z))
a

array([[ 1,  2],
       [ 3,  4],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [55]:
a.shape

(9, 2)

In [66]:
# Concatenating 2-D Arrays on axis = 1 (the rows, horizontal)

x = np.arange(1,5).reshape(2,2)
y = np.arange(6,12).reshape(2,3)
z = np.arange(8,16).reshape(2,4)

print(x);print(y);print(z)

[[1 2]
 [3 4]]
[[ 6  7  8]
 [ 9 10 11]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [60]:
a = np.concatenate((x,y,z),axis=1)
a

array([[ 1,  2,  6,  7,  8,  8,  9, 10, 11],
       [ 3,  4,  9, 10, 11, 12, 13, 14, 15]])

In [61]:
a.shape

(2, 9)

### Add / Remove Elements

### append() - Append values to end of an array, values must be same shape as array. If axis not specified, values can be any shape and flattened
### insert() - Insert values along the given axis before the given indices
### delete() - Return a new array with sub-arrays along an axis deleted. For a one dimensional array, this returns those entries not returned by arr[obj]

In [77]:
# append()

a = np.array([(1,2,3),(4,5,6)])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [69]:
a.shape

(2, 3)

In [71]:
b = np.append(a, np.array([(7,8,9)]), axis=0) # Maintain the same number of columns, add a row.
b

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [72]:
b.shape

(3, 3)

In [74]:
c = np.append(a, np.array([(7,8),(9,10)]), axis=1) # Maintain the same number of rows, add a column
c

array([[ 1,  2,  3,  7,  8],
       [ 4,  5,  6,  9, 10]])

In [75]:
c.shape

(2, 5)

In [78]:
# insert()

a = np.array([[1,1],[2,2],[3,3]])
print(a, a.shape)

[[1 1]
 [2 2]
 [3 3]] (3, 2)


In [81]:
b = np.insert(a, 1, 5) # Array is flattened; 5 is added to index position 1
b 

array([1, 5, 1, 2, 2, 3, 3])

In [84]:
c = np.insert(a, 1, 5, axis = 0) # Inserts value along given axis = 0 at index position 1 along the row
c

array([[1, 1],
       [5, 5],
       [2, 2],
       [3, 3]])

In [85]:
d = np.insert(a, 1, 5, axis = 1) # Inserts value along give axis = 1 at index position 1 along the column
d

array([[1, 5, 1],
       [2, 5, 2],
       [3, 5, 3]])

In [86]:
# delete()

arr = np.arange(12).reshape(4,3)
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [89]:
brr = np.delete(arr, 1, 0) # Deletes index position 1, along axis = 0, the row
brr

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [90]:
crr = np.delete(arr, 1, 1) # Deletes index position 1, along axis = 1, the column
crr

array([[ 0,  2],
       [ 3,  5],
       [ 6,  8],
       [ 9, 11]])

## 9. Splitting Arrays

### np.split() - Split an array into multiple sub-arrays
### np.hsplit() - Split an array into multiple sub-arrays horizontally (column-wise); Original Column / N - I.e. You control how many Columns your new sub-arrays get
### np.vsplit() - Split an array into multiple sub-arrays vertically (row-wise); Original Row / N - I.e. You control how many Rows your new sub-arrays get

In [165]:
# np.split() - numpy.split(ary, indices_or_sections, axis=0)
# If indices_or_sections is a 1-D array of sorted integers, the entries indicate where along axis the array is split.
# If indices_or_sections is an integer, N, the array will be divided into N equal arrays along axis

# So if you say (ndarray, 2, axis=0), you will get 2 sub-arrays along the Row 
# So it's Original Row / N = 4 / 2, thus each sub-array will get 2 Rows

# On the other hand if you say (ndarray, 3, axis=1), you will get 3 sub-arrays along the Column
# So it's Original Column / N = 6 / 3, thus each sub-array will get 2 Columns

a = np.arange(24).reshape(4,6)
b = np.split(a, 2, axis = 0)
c = np.split(a, 3, axis = 1)

print(a, a.shape);print(b);print(c)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]] (4, 6)
[array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]]), array([[12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])]
[array([[ 0,  1],
       [ 6,  7],
       [12, 13],
       [18, 19]]), array([[ 2,  3],
       [ 8,  9],
       [14, 15],
       [20, 21]]), array([[ 4,  5],
       [10, 11],
       [16, 17],
       [22, 23]])]


In [163]:
# np.hsplit() - numpy.hsplit(ary, indices_or_sections)
# Equivalent to split with axis=1
# Thus, it's Original Column / N = 4 / 2, thus each sub-array will get 2 Columns

x = np.arange(16.0).reshape(4,4)
y = np.hsplit(x, 2)

print(x);print(y)

[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]
 [12. 13. 14. 15.]]
[array([[ 0.,  1.],
       [ 4.,  5.],
       [ 8.,  9.],
       [12., 13.]]), array([[ 2.,  3.],
       [ 6.,  7.],
       [10., 11.],
       [14., 15.]])]


In [162]:
# np.vsplit() - numpy.vsplit(ary, indices_or_sections)
# Equivalent to split with axis=0
# So, it's Original Row / N = 4 / 2, thus each sub-array will get 2 Rows

x = np.arange(16.0).reshape(4,4)
y = np.vsplit(x, 2)

print(x);print(y)

[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]
 [12. 13. 14. 15.]]
[array([[0., 1., 2., 3.],
       [4., 5., 6., 7.]]), array([[ 8.,  9., 10., 11.],
       [12., 13., 14., 15.]])]


## 10. Converting Arrays

### tolist() - Return the array as a Python list
### astype() - Return a copy of the array, cast to a specified type

In [168]:
# np.tolist()

a = np.array([1,2])
a

array([1, 2])

In [169]:
a.tolist()

[1, 2]

In [172]:
b = np.array([[1,2], [3,4]])
print(b);print(b.tolist())

[[1 2]
 [3 4]]
[[1, 2], [3, 4]]


In [173]:
# np.astype()

a = np.array([1,2])
print(a);print(a.astype(int))

[1 2]
[1 2]


In [174]:
b = np.array([[1,2],[3,4]])
print(b);print(b.astype(float))

[[1 2]
 [3 4]]
[[1. 2.]
 [3. 4.]]


## 11. Arithmetic Operators & Logical Operators

### Arithmetic Operators: +, -, *, /, **, %
### These standard arithmetic operators such as: +, -, *, /, **, %, are applied on individual elements, so, the arrays have to be of the same size

### Logical Operators: >, < , ==
### They are also applied on individual elements, so arrays have to be of same size

In [178]:
# Arithmetic Operators

x = np.array(([10,20,30], [40,50,60]))
y = np.array(([1,2,3], [4,5,6]))

print(x);print(y)

[[10 20 30]
 [40 50 60]]
[[1 2 3]
 [4 5 6]]


In [177]:
print(x+y)
print(x-y)
print(x*y)
print(x/y)
print(x%y)

[[11 22 33]
 [44 55 66]]
[[ 9 18 27]
 [36 45 54]]
[[ 10  40  90]
 [160 250 360]]
[[10. 10. 10.]
 [10. 10. 10.]]
[[0 0 0]
 [0 0 0]]


In [180]:
# Logical Operators

x = np.array(([10,20,30], [40,50,60]))
y = np.array(([1,2,3], [4,5,6]))

print(x);print(y)

[[10 20 30]
 [40 50 60]]
[[1 2 3]
 [4 5 6]]


In [181]:
print(x>y)
print(x<y)
print(x==y)

[[ True  True  True]
 [ True  True  True]]
[[False False False]
 [False False False]]
[[False False False]
 [False False False]]


## 12. Mathematical and Statistical Methods

### np.sum() - Sum of all the elements in the array or along an axis. Zero-length arrays have sum 0

In [7]:
# numpy.sum(a, axis=None, dtype=None, out=None, keepdims=False)

a = np.arange(24).reshape(4,6)

a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [10]:
a.sum(axis=0) # Sum by Column

array([36, 40, 44, 48, 52, 56])

In [11]:
a.sum(axis=1) # Sum by Row

array([ 15,  51,  87, 123])

### np.mean() - Sum of all the elements in the array or along an axis. Zero-length arrays have sum 0

In [12]:
# numpy.mean(a, axis=None, dtype=None, out=None, keepdims=False)

a = np.arange(24).reshape(4,6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [14]:
a.mean(axis=0) # Sums up the Column total and derives the mean

array([ 9., 10., 11., 12., 13., 14.])

In [15]:
a.mean(axis=1) # Sums up across the Row and calculate the average

array([ 2.5,  8.5, 14.5, 20.5])

### np.median() - Compute the median along the specified axis. Returns the median of the array elements

In [16]:
# numpy.median(a, axis=None, out=None, overwrite_input=False, keepdims=False)

a = np.arange(24).reshape(4,6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [22]:
np.median(a) # (11 + 12) / 2 = 11.5

11.5

In [23]:
np.median(a, axis=0) # Gives the median of each Column (6 + 12) / 2

array([ 9., 10., 11., 12., 13., 14.])

In [24]:
np.median(a, axis=1) # Gives the median of each Row (2 + 3) / 2

array([ 2.5,  8.5, 14.5, 20.5])

### np.min() / np.max() - Return the minimum/max along a given axis

In [34]:
# ndarray.min(axis=None, out=None, keepdims=False)
# ndarray.max(axis=None, out=None, keepdims=False)

b = np.arange(24).reshape(8,3)
b

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23]])

In [35]:
b.min()

0

In [36]:
b.max()

23

In [37]:
b.min(axis=0) # Gives the min of each Column

array([0, 1, 2])

In [38]:
b.min(axis=1) # Gives the min of each Row

array([ 0,  3,  6,  9, 12, 15, 18, 21])

In [39]:
b.max(axis=0) # Gives the max of each Column

array([21, 22, 23])

In [40]:
b.max(axis=1) # Gives the max of each Row

array([ 2,  5,  8, 11, 14, 17, 20, 23])

### np.argmin() / np.argmax() - Returns the indices of the minimum values along an axis

In [42]:
# numpy.argmin(a, axis=None, out=None)
# numpy.argmax(a,axis=None, out=None)

b = np.arange(24).reshape(8,3)
b

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23]])

In [43]:
b.argmin()

0

In [45]:
b.argmin(axis=0) # Gives the min value's index position per Column

array([0, 0, 0], dtype=int64)

In [46]:
b.argmin(axis=1) # Gives the min value's index position per Row

array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [47]:
b.argmax()

23

In [48]:
b.argmax(axis=0) # Gives the max value's index position per Column

array([7, 7, 7], dtype=int64)

In [49]:
b.argmax(axis=1) # Gives the max value's index position per Row

array([2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

### np.cumsum() - Return the cumulative sum of the elements along a given axis

In [50]:
# numpy.cumsum(a, axis=None, dtype=None, out=None)

a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [51]:
np.cumsum(a)

array([ 1,  3,  6, 10, 15, 21], dtype=int32)

In [55]:
np.cumsum(a,axis=0) # Cumulative sum down each Column

array([[1, 2, 3],
       [5, 7, 9]], dtype=int32)

In [54]:
np.cumsum(a,axis=1) # Cumulative sum for across the Row

array([[ 1,  3,  6],
       [ 4,  9, 15]], dtype=int32)

### np.cumprod() - Return the cumulative product of the elements along a given axis

In [62]:
# numpy.cumprod(a, axis=None, dtype=None, out=None)

a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [63]:
np.cumprod(a)

array([  1,   2,   6,  24, 120, 720], dtype=int32)

In [64]:
np.cumprod(a,axis=0) # Goes down the array and derive the cumulative product for each Column

array([[ 1,  2,  3],
       [ 4, 10, 18]], dtype=int32)

In [65]:
np.cumprod(a,axis=1) # Goes across the array and derive the cumulative product for each Row

array([[  1,   2,   6],
       [  4,  20, 120]], dtype=int32)

### np.std() - Compute the standard deviation along the specified axis

In [77]:
# numpy.stdnumpy.std(a, axis=None,dtype=None,out=None)

a = np.array([[1,2],[3,4]])
a

array([[1, 2],
       [3, 4]])

In [78]:
np.std(a,dtype=np.float64)

1.118033988749895

In [79]:
np.std(a,axis=0,dtype=np.float64) # Goes down the array and calculates s.d. for the entire Column

array([1., 1.])

In [80]:
np.std(a,axis=1,dtype=np.float64) # Goes across the array and calculates s.d. for the entire Row

array([0.5, 0.5])

### np.var() - Compute the variance along the specified axis

In [81]:
# numpy.var(a, axis=None, dtype=None, out=None, ddof=0)

a = np.array([[1,2],[3,4]])
a

array([[1, 2],
       [3, 4]])

In [82]:
np.var(a,dtype=np.float64)

1.25

In [83]:
np.var(a,axis=0,dtype=np.float64) # Goes down the array and calculates variance for the Column

array([1., 1.])

In [84]:
np.var(a,axis=1,dtype=np.float64) # Goes across the array and calculates variance for the Row

array([0.25, 0.25])

### np.unravel_index(indices, shape) - Return index position of a value within N-Dimensional Array

### https://numpy.org/doc/stable/reference/generated/numpy.argmax.html?highlight=argmax#numpy.argmax

In [13]:
a = np.random.randint(1,1000,(3,4))
a, a.max()

(array([[325, 969, 220, 242],
        [576, 658, 784, 235],
        [162, 570, 512, 407]]),
 969)

In [14]:
np.unravel_index(np.argmax(a), a.shape) # Returns index position of max value in array a

(0, 1)

## 13. File Input / Output

### np.save() - Saving a single array as binary format
### np.savez() - Saving multiple arrays to a ZIP file

In [96]:
# numpy.save(file, arr, allow_pickle=True, fix_imports=True)

arr = np.arange(10)

np.save("myarray.npy", arr)

In [97]:
# numpy.savez(file, *args, **kwds)

a = np.arange(10)
b = np.arange(20).reshape(2,10)

np.savez("array_archive.npz", a=a, b=b)

### np.load() - Reading a file containing single or multiple arrays

In [100]:
# numpy.load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')

arr = np.load("myarray.npy")
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [99]:
arch = np.load("array_archive.npz")

print(arch["a"]);print(arch["b"])

[0 1 2 3 4 5 6 7 8 9]
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]


### np.savetxt() - Save arrays to a text file

In [101]:
# numpy.save(file, arr, allow_pickle=True, fix_imports=True)

x = y = z = np.arange(0,5,1)

# Save single array using defaults
np.savetxt("test1.txt", x) # x is an array

In [103]:
# Save single array ensuring output is formatted as 0 decimal place

np.savetxt("test2.txt", x, fmt="%.0f")

In [104]:
# Save multiple arrays, using comma "," as separator

np.savetxt("text3.txt", (x,y,z), fmt="%.0f", delimiter=",")

### The End