In [3]:
#Numpy: provides support for large multidimensional arrays and matrices 
#Uses algorithms written in C
#References: https://numpy.org/doc/stable/reference/arrays.ndarray.html#array-methods

In [2]:
import numpy as np

In [3]:
#Example 1: Curve grades

CURVE_CENTER = 80

grades = np.array([72, 35, 64, 88, 51, 90, 74, 12])

def curve (grades) :
    average = grades.mean()
    change = CURVE_CENTER - average
    new_grades = grades + change
    return np.clip(new_grades,grades,100)

curve(grades)

'''
np.clip(array, min, max) => limitar los valores de un array a un rango dado. 
Cualquier valor por debajo de min será reemplazado por min
Cualquier valor por encima de max sera reemplazado por max.

Line 8 also provides another example of broadcasting. 
For the second argument to clip(), you pass grades, ensuring that each newly 
curved grade doesn't go lower than the original grade. But for the third argument, 
you pass a single value: 100. NumPy takes that value and broadcasts it against 
every element in new_grades, ensuring that none of the newly curved grades exceeds 
a perfect score.
'''

#Vectorization is the process of performing the same operation in the same way for each element in an array.

#Broadcasting is the process of extending two arrays of different shapes and figuring out how to perform a vectorized calculation between them.


array([ 91.25,  54.25,  83.25, 100.  ,  70.25, 100.  ,  93.25,  31.25])

In [10]:
#Example 2: Shape

temperatures = np.array([
    29.3, 42.1, 18.8, 16.1, 38.0, 12.5,
    12.6, 49.9, 38.6, 31.3, 9.2, 22.2
    ]).reshape(2, 2, 3)

temperatures.shape #(2,2,3)

a = np.swapaxes(temperatures, 1, 2)

a.shape #(2,3,2)

# axes are zero-indexed and identify which dimension is which

(2, 3, 2)

In [12]:
#Example 3: max()

table = np.array([
    [5, 3, 7, 1],
    [2, 6, 7 ,9],
    [1, 1, 1, 1],
    [4, 3, 2, 0],
    ])

table.max() #9
table.max(axis=0) #array([5, 6, 7, 9]) vertical
table.max(axis=1) #array([7, 9, 1, 4]) horizontal

array([5, 6, 7, 9])

In [14]:
# Arrays can be broadcast against each other if their dimensions match or if one of the 
# arrays has a size of 1.

#Example 4: broadcast
'''
Array A has the shape (4, 1, 8), and array B has the shape (1, 6, 8). 
Based on the rules above, you can operate on these arrays together:

In axis 0, A has a 4 and B has a 1, so B can be broadcast along that axis.
In axis 1, A has a 1 and B has a 6, so A can be broadcast along that axis.
In axis 2, the two arrays have matching sizes, so they can operate successfully.
'''

A = np.arange(32).reshape(4, 1, 8)
B = np.arange(48).reshape(1, 6, 8)
C= A + B
C.shape # (4, 6, 8)

(4, 6, 8)

In [15]:
# Data Science Operations: Filter, Order, Aggregate

#NumPy arrays use commas between axes, so you can index multiple axes 
#in one set of square brackets.

#Example 1: Cuadrado Magico

square = np.array([
    [16, 3, 2, 13],
    [5, 10, 11, 8],
    [9, 6, 7, 12],
    [4, 15, 14, 1]
    ])

for i in range(4):
    assert square[:, i].sum() == 34
    assert square[i, :].sum() == 34

assert square[:2, :2].sum() == 34
assert square[2:, :2].sum() == 34
assert square[:2, 2:].sum() == 34
assert square[2:, 2:].sum() == 34

In [19]:
#Masking and Filtering

'''
A mask is an array that has the exact same shape as your data, but instead of your values, 
it holds Boolean values: either True or False. You can use this mask array to index into 
your data array in nonlinear and complex ways. 
It will return all of the elements where the Boolean array has a True value.
'''

# linespace(inicio, stop, cantidad=50) => generar un array de números igualmente espaciados en un intervalo específico
# el paso se calcula en base a los parametros
numbers = np.linspace(5, 50, 24, dtype=int).reshape(4, -1)
# el -1 en reshape le indica que calcule automaticamente la cantidad de elementos en la dimension

mask = numbers % 4 == 0

a = numbers[mask] #Masking
b = numbers[numbers % 4 == 0] #creando mascara inline

array([ 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48])

In [24]:
#Example 2: Normal distribution

from numpy.random import default_rng

rng = default_rng()

values = rng.standard_normal(10000)

std = values.std()

filtered = values[(values > -2 * std) & (values < 2 * std)] #filtrar numeros en un rango

filtered.size
values.size

filtered.size / values.size

# NumPy designates & and | as the vectorized, element-wise operators to combine Booleans
# If you try to do A and B, then you’ll get a warning about how the truth value for an array 
# is weird, because the and is operating on the truth value of the whole array, not element 
# by element.

0.9544

In [31]:
#Transposing, Sorting, and Concatenating

a = np.array([
    [1, 2],
    [3, 4],
    [5, 6],
    ])

a.T
a.transpose()
np.sort(a)
np.sort(a, axis=None) #flat the array
np.sort(a, axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [34]:
c = np.array([
    [4, 8],
    [6, 1]
    ])

b = np.array([
    [3, 5],
    [7, 2],
    ])

np.hstack((c, b))
np.hstack((b, c))
np.concatenate((b, c))
np.concatenate((b, c), axis=None)

array([3, 5, 7, 2, 4, 8, 6, 1])

In [None]:
#Aggregating functions:
#   - .sum()
#   - .mean()
#   - .max()
#   - .std()