# Cap 03 - The Numpy Library

In [None]:
# Imports
import numpy as np

In [None]:
# ndarray -> n-dimensional array
a = np.array([1,2,3])
a

array([1, 2, 3])

In [4]:
a.dtype

dtype('int64')

In [None]:
# Dimensão do array
print(a.ndim)
# Size of array with 3 values
print(a.size)
# The format of the array, showing that it has only three values ​​in "row" and no values ​​in "column"
print(a.shape)

1
3
(3,)


In [None]:
# Creating a matrix
b = np.array([[1.3, 2.4],[0.3, 4.1]])
print(b.dtype)
print(b.ndim)
print(b.size)
print(b.shape)

float64
2
4
(2, 2)


In [None]:
# Size in bytes
print(b.itemsize) 
# Location in the memory
print(b.data)

8
<memory at 0x0000028DACB4BE00>


In [37]:
# The array() function, in addition to lists, can accept tuples and sequences of tuples.
c = np.array(((1,2,3),(4,5,6)))
# It can also accept sequences of tuples and interconnected lists.
d = np.array([(1,2,3),[4,5,6],(7,8,9)])

print(c,'\n')
print(d)

[[1 2 3]
 [4 5 6]] 

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [None]:
# remembering we can use string type too.
# all supported types are in Python Data Analytics With Pandas, Numpy, and Matplotlib.docx.
e = np.array([['a','b'],['c','d']])
e

array([['a', 'b'],
       ['c', 'd']], dtype='<U1')

In [2]:
# For set a type to your list or tuples
f = np.array( [ [1,2,3],[4,5,6] ], dtype=complex )
f

array([[1.+0.j, 2.+0.j, 3.+0.j],
       [4.+0.j, 5.+0.j, 6.+0.j]])

In [None]:
# The zeros() function, for example, creates a full array of zeros with dimensions
# defined by the shape argument. For example, to create a two-dimensional array 3x3,
# you can use:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [None]:
# While the ones() function:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [36]:
# arange() function generates NumPy arrays
# with numerical sequences that respond to particular rules depending on the passed
# arguments:
print(np.arange(0,10))
print(np.arange(4,10), '\n')
print(np.arange(0,11))
print(np.arange(4,11))

[0 1 2 3 4 5 6 7 8 9]
[4 5 6 7 8 9] 

[ 0  1  2  3  4  5  6  7  8  9 10]
[ 4  5  6  7  8  9 10]


In [None]:
# It is also possible to generate a sequence of values with precise intervals between them
# adding the third argument:
np.arange(0,11,2)
# In addition, this third argument can also be a float.

array([ 0,  2,  4,  6,  8, 10])

In [16]:
# reshape() function divides a linear array in different parts in the manner
# specified by the shape argument:
np.arange(0,12).reshape(3,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [3]:
# Remembering that to cut the array, the values ​​must be divisible by the size of the array
# or you will get a error: "cannot reshape array of size 11 into shape (3,4)"
np.arange(0,12).reshape(3,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [19]:
# linspace() function the third argument defines the
# number of elements into which we want the interval to be split.
np.linspace(0,11,5, dtype=int)

array([ 0,  2,  5,  8, 11])

In [35]:
# random functon:
print(np.random.random(3), '\n')
print(np.random.random((3,3)))

[0.86627533 0.34843748 0.65692947] 

[[0.72990689 0.87374301 0.90667513]
 [0.9140526  0.31783006 0.40313131]
 [0.60395049 0.52680533 0.22082555]]


### Basic Operations

In [4]:
# arithmetic operators
a = np.arange(1,5)
print(a, '\n')
print('Add: ', a + 4)
print('Sub: ', a - 4)
print('Multi: ', a * 4)
print('Div: ', a / 4)

[1 2 3 4] 

Add:  [5 6 7 8]
Sub:  [-3 -2 -1  0]
Multi:  [ 4  8 12 16]
Div:  [0.25 0.5  0.75 1.  ]


In [5]:
# These operators can also be used between two arrays. In NumPy, these operations
# are element-wise, that is, the operators are applied only between corresponding
# elements. These are objects that occupy the same position, so that the end result will be
# a new array containing the results in the same location of the operands.
b = np.arange(5, 9)
print( b )
print('Add: ', a + b)
print('Sub: ', a - b)
print('Multi: ', a * b)
print('Div: ', a / b)


[5 6 7 8]
Add:  [ 6  8 10 12]
Sub:  [-4 -4 -4 -4]
Multi:  [ 5 12 21 32]
Div:  [0.2        0.33333333 0.42857143 0.5       ]


In [45]:
# these operators are also available for functions, provided that the value
# returned is a NumPy array
print(a * np.sin(b), '\n')
print(a * np.sqrt(b))

[-0.95892427 -0.558831    1.9709598   3.95743299] 

[ 2.23606798  4.89897949  7.93725393 11.3137085 ]


In [26]:
# Moving on to the multidimensional case, even here the arithmetic operators
# continue to operate element-wise
A = np.arange(0,9).reshape(3,3)
B = np.ones((3,3), dtype=int)
A+B

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [27]:
# Increment and Decrement Operators
A += 2
print(A, '\n')
B -= 3
print(B, '\n')
C = np.arange(0,9).reshape(3,3)
C *= 2
print(C)

[[ 2  3  4]
 [ 5  6  7]
 [ 8  9 10]] 

[[-2 -2 -2]
 [-2 -2 -2]
 [-2 -2 -2]] 

[[ 0  2  4]
 [ 6  8 10]
 [12 14 16]]


In [29]:
# Universal Functions (ufunc)
# A universal function, generally called ufunc, is a function operating on an array in an
# element-by-element
a = np.arange(1,5)
print('square root:', np.sqrt(a))
print('logarithm:', np.log(a))
print('sin:', np.sin(a))


square root: [1.         1.41421356 1.73205081 2.        ]
logarithm: [0.         0.69314718 1.09861229 1.38629436]
sin: [ 0.84147098  0.90929743  0.14112001 -0.7568025 ]


In [32]:
# Aggregate Functions
# Aggregate functions perform an operation on a set of values, an array for example, and
# produce a single result.
a = np.array([3.3, 4.5, 1.2, 5.7, 0.3])
print(a, '\n')
print('Sum:', a.sum())
print('Min:', a.min())
print('Max:', a.max())
print('Mean:', a.mean())
print('Std:', a.std())


[3.3 4.5 1.2 5.7 0.3] 

Sum: 15.0
Min: 0.3
Max: 5.7
Mean: 3.0
Std: 2.0079840636817816


In [2]:
# Indexing, Slicing, and Iterating
a = np.arange(10, 20)
print(a[1:5])
print(a[1:5:2])

[11 12 13 14]
[11 13]


In [3]:
print(a[1:5:2])
print(a[::2])
print(a[:5:2])
print(a[:5:])

[11 13]
[10 12 14 16 18]
[10 12 14]
[10 11 12 13 14]


In [7]:
# In the case of a two-dimensional array:
A = np.arange(10,19).reshape((3,3))
print(A, '\n')
print('Just the first row: ', A[0,:])
print('Just the first Column: ', A[:,0])

[[10 11 12]
 [13 14 15]
 [16 17 18]] 

Just the first row:  [10 11 12]
Just the first Column:  [10 13 16]


In [8]:
# extracting a smaller matrix
A[0:2, 0:2]

array([[10, 11],
       [13, 14]])

In [None]:
# There are two ways to go through the matrix, one of them is printing line by line 
# completely and the other is using the flat method to print element by element.
# 1º
for row in A:
    print(row)
print()    
# 2º
for item in A.flat:
    print(item)

[10 11 12]
[13 14 15]
[16 17 18]

10
11
12
13
14
15
16
17
18


In [16]:
# Traversing the array in a more elegant way with a native numpy function:
# Calculating the average values first by column and then by row
print(A, '\n')
print(np.apply_along_axis(np.mean, axis=0, arr=A))

print(np.apply_along_axis(np.mean, axis=1, arr=A))

[[10 11 12]
 [13 14 15]
 [16 17 18]] 

[13. 14. 15.]
[11. 14. 17.]


In [17]:
def foo(x):
 return x/2

In [19]:
print(np.apply_along_axis(foo, axis=1, arr=A))
print()
print(np.apply_along_axis(foo, axis=0, arr=A))

[[5.  5.5 6. ]
 [6.5 7.  7.5]
 [8.  8.5 9. ]]

[[5.  5.5 6. ]
 [6.5 7.  7.5]
 [8.  8.5 9. ]]


### Conditions and Boolean Arrays

In [3]:
A = np.random.random((4,4))
A

array([[0.01495975, 0.36799583, 0.53390223, 0.70885788],
       [0.09358262, 0.89420981, 0.25256544, 0.29001822],
       [0.35496044, 0.51599453, 0.21995106, 0.1277313 ],
       [0.75477294, 0.52256646, 0.96868286, 0.0206336 ]])

In [5]:
A < 0.5

array([[ True,  True, False, False],
       [ True, False,  True,  True],
       [ True, False,  True,  True],
       [False, False, False,  True]])

In [6]:
# inserting the previous condition directly inside the square brackets,
# you will extract all elements smaller than 0.5, so as to obtain a new array
A [A < 0.5]

array([0.01495975, 0.36799583, 0.09358262, 0.25256544, 0.29001822,
       0.35496044, 0.21995106, 0.1277313 , 0.0206336 ])

### Shape Manipulation

In [3]:
a = np.random.random(12)
print(a, '\n')
A = a.reshape(3, 4)
print(A)

[0.81596912 0.88890246 0.11396472 0.13969635 0.92803776 0.68228102
 0.19776702 0.18125685 0.18545605 0.34224896 0.63031597 0.51572866] 

[[0.81596912 0.88890246 0.11396472 0.13969635]
 [0.92803776 0.68228102 0.19776702 0.18125685]
 [0.18545605 0.34224896 0.63031597 0.51572866]]


In [None]:
# You can convert a two-dimensional array into a one-dimensional array, by using the ravel() function
a = a.ravel()
a

array([0.81596912, 0.88890246, 0.11396472, 0.13969635, 0.92803776,
       0.68228102, 0.19776702, 0.18125685, 0.18545605, 0.34224896,
       0.63031597, 0.51572866])

In [6]:
# Or even here acting directly on the shape attribute of the array itself.
a.shape = (12)
a

array([0.81596912, 0.88890246, 0.11396472, 0.13969635, 0.92803776,
       0.68228102, 0.19776702, 0.18125685, 0.18545605, 0.34224896,
       0.63031597, 0.51572866])

### Array Manipulation

In [17]:
# Joining Arrays
# stacking, you can perform vertical stacking with the vstack() function
# and the hstack() performs horizontal

A = np.zeros((3,3))
B = np.ones((3,3))
print(np.vstack((A,B)),'\n')
print(np.hstack((A,B)))

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]] 

[[0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 1. 1. 1.]]


In [None]:
# Two other functions performing stacking between multiple arrays are column_stack() and row_stack().
# Which are stacked as columns or rows in order to form a new two-dimensional array

a = np.array([0, 1, 2])
b = np.array([3, 4, 5])
c = np.array([6, 7, 8])
print(np.column_stack((a, b, c)), '\n')
print(np.row_stack((a, b, c)))

[[0 3 6]
 [1 4 7]
 [2 5 8]] 

[[0 1 2]
 [3 4 5]
 [6 7 8]]


  print(np.row_stack((a, b, c)))


### Splitting Arrays

In [None]:
# You have a set of functions that work both horizontally with the hsplit()
# Function and vertically with the vsplit() function.

A = np.arange(16).reshape(4,4)
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [21]:
# If you want to split the array horizontally, meaning the width of the array is
# Divided into two parts, the 4x4 matrix A will be split into two 2x4 matrices.

[B,C] = np.hsplit(A, 2)
print(B,'\n')
print(C)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]] 

[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


In [None]:
# the height of the array is divided into two parts, the 4x4 matrix A will be split into two 4x2 matrices

[B,C] = np.vsplit(A, 2)
print(B,'\n')
print(C)

[[0 1 2 3]
 [4 5 6 7]] 

[[ 8  9 10 11]
 [12 13 14 15]]


#### Split

In [None]:

# If you use the option axis = 1, then the indexes will be columns; if instead the option is axis = 0, 
# then they will be row indexes.
# For example, if you want to divide the matrix into three parts:

[A1,A2,A3] = np.split(A,[1,3], axis=1)
print(A1)
print(A2)
print(A3)


[[ 0]
 [ 4]
 [ 8]
 [12]]
[[ 1  2]
 [ 5  6]
 [ 9 10]
 [13 14]]
[[ 3]
 [ 7]
 [11]
 [15]]


#### Dividing by row

In [None]:
[A1,A2,A3] = np.split(A,[1,3], axis=0)
print(A1)
print(A2)
print(A3)

# This feature also includes the functionalities of the vsplit() and hsplit() functions.

[[0 1 2 3]]
[[ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14 15]]


### Copies or Views of Objects

In [28]:
# Numpy basically creates a view of your array, or BASICALLY a pointer to where your array is.

a = np.array([1,2,3,4])
b = a
print(b,'\n')

a[2] = 0
print(b)


[1 2 3 4] 

[1 2 0 4]


In [None]:
# Whether you pass the array as above or just pass the sliced ​​value of this array, 
# it is just a view of the original and not a copy.

c = a[0:2]
print(c)

a[0] = 0
print(c)

[1 2]
[0 2]


In [32]:
# Now if you really want a copy of the array a, just use the copy() function.

a = np.array([1,2,3,4])
c = a.copy()
print(a)
print(c, '\n')

a[0] = 0
print(a)
print(c)


[1 2 3 4]
[1 2 3 4] 

[0 2 3 4]
[1 2 3 4]


#### Vectorization

In [None]:
# Vectorization, along with the broadcasting, is the basis of the internal implementation
# of NumPy. Vectorization is the absence of an explicit loop during the developing of the
# code.

# For example, NumPy allows you to express the multiplication
# of two arrays Or even two matrices shown:

print (a * b, '\n')
B = A
print (A * B)

# If it weren't for the ease of numpy's vectorization, we would have to do these calculations 
# in a more "extensive" way using a for loop.


[ 0  4  0 16] 

[[  0   1   4   9]
 [ 16  25  36  49]
 [ 64  81 100 121]
 [144 169 196 225]]


#### Broadcasting


In [None]:
#  It is basically a way for numpy to perform (in the background) operations
#  between arrays with different dimensions, where it, for example, autocompletes 
#  the array with the smallest dimension to match the other or others and thus be
#  able to perform the desired operation, such as addition or multiplication.

#### Structured Arrays

In [None]:
# This type of array contains structs or records instead of individual items.
# For example, if you want to specify a struct consisting of an integer, a character
# string of length 6 and a Boolean value, you will specify the three types of data in the
# dtype option with the right order using the corresponding specifiers.

# bytes b1
# int i1, i2, i4, i8
# unsigned ints u1, u2, u4, u8
# floats f2, f4, f8
# complex c8, c16
# fixed length strings a<n>

#Below, the s type is a new a (fixed length strings), a was depreciated in numpy 2.0
structured = np.array([(1, 'First', 0.5, 1+2j),(2, 'Second', 1.3,
2-2j), (3, 'Third', 0.8, 1+3j)],dtype=('i2, S6, f4, c8'))

structured

array([(1, b'First', 0.5, 1.+2.j), (2, b'Second', 1.3, 2.-2.j),
       (3, b'Third', 0.8, 1.+3.j)],
      dtype=[('f0', '<i2'), ('f1', 'S6'), ('f2', '<f4'), ('f3', '<c8')])

In [10]:
# You can also use the data type explicitly specifying int8, uint8, float16, complex64,
# and so forth.

structured = np.array([(1, 'First', 0.5, 1+2j),(2, 'Second', 1.3,2-2j),
(3, 'Third', 0.8, 1+3j)],dtype=('int16, S6, float32, complex64'))

structured

array([(1, b'First', 0.5, 1.+2.j), (2, b'Second', 1.3, 2.-2.j),
       (3, b'Third', 0.8, 1.+3.j)],
      dtype=[('f0', '<i2'), ('f1', 'S6'), ('f2', '<f4'), ('f3', '<c8')])

In [8]:
print(structured[1])

(2, b'Second', 1.3, 2.-2.j)


In [None]:
# You can call all elements of the same type by passing f (which stands for field) + position:
print(structured['f0'])
print(structured['f1'])
print(structured['f2'])
print(structured['f3'])

[1 2 3]
[b'First' b'Second' b'Third']
[0.5 1.3 0.8]
[1.+2.j 2.-2.j 1.+3.j]


In [25]:
# We can also pass aliases to field types, as if it were a dictionary:

structured = np.array([(1,'First',0.5,1+2j), (2,'Second',1.3,2-2j), (3,'Third',0.8,1+3j)],
dtype=[('id','i2'),('position','S6'),('value','f4'),('complex','c8')])

structured

array([(1, b'First', 0.5, 1.+2.j), (2, b'Second', 1.3, 2.-2.j),
       (3, b'Third', 0.8, 1.+3.j)],
      dtype=[('id', '<i2'), ('position', 'S6'), ('value', '<f4'), ('complex', '<c8')])

In [27]:
# And of course, call them by their alias:
structured['position']

array([b'First', b'Second', b'Third'], dtype='|S6')

### Reading and Writing Array Data on Files

In [28]:
# NumPy provides a pair of functions called save() and load() that enable you to save
# and then later retrieve data stored in binary format.

data=([[ 0.86466285, 0.76943895, 0.22678279],
[ 0.12452825, 0.54751384, 0.06499123],
[ 0.06216566, 0.85045125, 0.92093862],
[ 0.58401239, 0.93455057, 0.28972379]])

In [29]:
# name of the ouput file, data you want to save
np.save('saved_data',data)

In [30]:
# When you need to recover the data stored in a .npy file, you use the load() function

loaded_data = np.load('saved_data.npy')
loaded_data

array([[0.86466285, 0.76943895, 0.22678279],
       [0.12452825, 0.54751384, 0.06499123],
       [0.06216566, 0.85045125, 0.92093862],
       [0.58401239, 0.93455057, 0.28972379]])

#### Reading Files with Tabular Data

In [None]:
data = np.genfromtxt('ch3_data.csv', delimiter=',', names=True)
data