In [1]:
import pandas as pd, numpy as np

In [3]:
# Definamos una matriz de 2 renglones por 3 columnas
arr = np.array([[1,2,3],[4,5,6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [5]:
arr.shape

(2, 3)

Una de las principales ventajas de usar numpy es el poder usar vectorización de funciones aplicadas a un np.array. o.e. Poder aplicar una función a cada arreglo sin la necesidad de usar un for loop, lo cual lo hace más eficiente.

In [6]:
%%timeit -n 5
sum([i**2 for i in range(1000000)]) #list compr.

294 ms ± 3.14 ms per loop (mean ± std. dev. of 7 runs, 5 loops each)


In [7]:
%%timeit -n 5
(np.arange(1000000)**2).sum() #numpy arange

2.58 ms ± 536 µs per loop (mean ± std. dev. of 7 runs, 5 loops each)


In [8]:
arr[0]

array([1, 2, 3])

In [9]:
arr[1]

array([4, 5, 6])

In [10]:
arr = np.array([[1,2,3,4],[4,5,6,5], [7,8,9,0], [3,4,5,6]])
arr

array([[1, 2, 3, 4],
       [4, 5, 6, 5],
       [7, 8, 9, 0],
       [3, 4, 5, 6]])

In [11]:
arr.shape

(4, 4)

## Operaciones principales con Numpy arrays

In [12]:
a1 = np.array([
    [2,4,6],
    [3,5,1]
])

### Broadcasting operations

In [13]:
# la suma de un escalar a cada elemento del array.
a1 +1

array([[3, 5, 7],
       [4, 6, 2]])

In [15]:
# también la siguiente versión para incrementar una variable.
a1 +=1
a1
#Al utilizar += se asigna el valor a la variable.

array([[3, 5, 7],
       [4, 6, 2]])

In [16]:
# al elevar la potencial por un escalar se eleva cada elemento del array
a1**2

array([[ 9, 25, 49],
       [16, 36,  4]])

In [17]:
#multiplicación entrada a entrada
a1*a1

array([[ 9, 25, 49],
       [16, 36,  4]])

In [18]:
#Comparación lógica entrada a entrada
a1 <= 3

array([[ True, False, False],
       [False, False,  True]])

## np.arange, np.linspace

Podemos crear rangos de números [a,b) usando la función `arange` (análogo a `range` en Python)

In [19]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [20]:
# recuerdan el start, stop, step?
np.arange(2,74,4)

array([ 2,  6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 66,
       70])

In [21]:
# a diferencia de range, np.arange nos permite tomar pasos fraccionarios
np.arange(1,11,0.5) #lñimite superior NO INCLUSIVO

array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. , 10.5])

Usamos la función `linspace` cuando deseamos un arreglo de $n$ elementos entre $a$ y $b$ (inclusivo); $a < b$

In [30]:
a, b = 2, 10
np.linspace(a,b,20) #inicio, fin, número de buckets / bins

array([ 2.        ,  2.42105263,  2.84210526,  3.26315789,  3.68421053,
        4.10526316,  4.52631579,  4.94736842,  5.36842105,  5.78947368,
        6.21052632,  6.63157895,  7.05263158,  7.47368421,  7.89473684,
        8.31578947,  8.73684211,  9.15789474,  9.57894737, 10.        ])

## Índices

In [33]:
arr = np.arange(30).reshape(2,15)
arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [34]:
arr = arr.reshape(10,3)
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23],
       [24, 25, 26],
       [27, 28, 29]])

In [35]:
#Seleccionar la primera fila
arr[0]

array([0, 1, 2])

In [36]:
arr[0,:] #primer renglón, todas las columnas

array([0, 1, 2])

In [37]:
#Selecciona la primera columna
arr[:,0]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27])

In [38]:
arr[:,1] #

array([ 1,  4,  7, 10, 13, 16, 19, 22, 25, 28])

In [39]:
arr[:,1:]

array([[ 1,  2],
       [ 4,  5],
       [ 7,  8],
       [10, 11],
       [13, 14],
       [16, 17],
       [19, 20],
       [22, 23],
       [25, 26],
       [28, 29]])

In [40]:
arr[:,[0,2]] # extrae primera y tercera columna

array([[ 0,  2],
       [ 3,  5],
       [ 6,  8],
       [ 9, 11],
       [12, 14],
       [15, 17],
       [18, 20],
       [21, 23],
       [24, 26],
       [27, 29]])

In [42]:
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23],
       [24, 25, 26],
       [27, 28, 29]])

In [41]:
#podemos utilizar índices para encontrar elementos
fila, columna = 0,2
arr[fila, columna]

2

In [43]:
# Podemos encontrar más de un elemento (-2,1) y (-1,-1)
filas, cols = [-2,-1], [1,-1]
arr[filas, cols]

array([25, 29])

In [44]:
arr[[0,-1]] # equivalente arr[[0,-1],:]

array([[ 0,  1,  2],
       [27, 28, 29]])

In [45]:
#Podemos asignar un valor a filas específicas
arr[[0,-1]] = 0
arr

array([[ 0,  0,  0],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23],
       [24, 25, 26],
       [ 0,  0,  0]])

In [46]:
#podemos asignar varios valores de la misma dimensión a la que se le hizo la selección
arr[[0,-1]] = np.random.randint(-100,-1, size=(2,3))

In [47]:
arr

array([[ -8, -93, -80],
       [  3,   4,   5],
       [  6,   7,   8],
       [  9,  10,  11],
       [ 12,  13,  14],
       [ 15,  16,  17],
       [ 18,  19,  20],
       [ 21,  22,  23],
       [ 24,  25,  26],
       [-27, -39, -95]])

In [49]:
arr[5][::-1] #toma el sexto renglón y lo imprime al revés.

array([17, 16, 15])

In [50]:
arr[::-1]

array([[-27, -39, -95],
       [ 24,  25,  26],
       [ 21,  22,  23],
       [ 18,  19,  20],
       [ 15,  16,  17],
       [ 12,  13,  14],
       [  9,  10,  11],
       [  6,   7,   8],
       [  3,   4,   5],
       [ -8, -93, -80]])

## BROADCASTING : es la manera numpy manipula arrays de dimensiones diferentes. Para A, B, dos dimensiones son compatibles cuando:
1. son iguales
2. Una dimensión es igual a 1

In [51]:
A = np.arange(25).reshape(5,5)
B = np.arange(5).reshape(1,5)

print(A)
print('\n', B)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]

 [[0 1 2 3 4]]


In [52]:
A*B

array([[ 0,  1,  4,  9, 16],
       [ 0,  6, 14, 24, 36],
       [ 0, 11, 24, 39, 56],
       [ 0, 16, 34, 54, 76],
       [ 0, 21, 44, 69, 96]])

In [53]:
A+B

array([[ 0,  2,  4,  6,  8],
       [ 5,  7,  9, 11, 13],
       [10, 12, 14, 16, 18],
       [15, 17, 19, 21, 23],
       [20, 22, 24, 26, 28]])

In [54]:
## np.zeros --> arreglo de puros ceros
np.zeros(shape = 10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [55]:
# la función utiliza por default números float
np.zeros(shape=(5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [56]:
#podemos definir que los números sean enteros:
np.zeros(shape=(5,5), dtype=int)

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [57]:
#np.ones --> arreglo de 1's.  
np.ones(shape=4)

array([1., 1., 1., 1.])

In [58]:
np.ones(shape=(5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [60]:
arr1 = np.arange(11)

In [61]:
arr1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [62]:
arr1.sum()

55

In [63]:
arr1.min()

0

In [66]:
arr1.max()

10

In [67]:
arr1.mean()

5.0

In [68]:
arr1.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45, 55])

In [69]:
#Para tener valores replicables necesitamos usar seed()

from numpy.random import seed, randint
seed(42)

a2 = randint(5,10, size=(5,10))
a2

array([[8, 9, 7, 9, 9, 6, 7, 7, 7, 9],
       [8, 7, 9, 6, 8, 6, 8, 9, 5, 8],
       [6, 9, 8, 5, 5, 7, 7, 6, 8, 8],
       [7, 8, 8, 5, 7, 9, 7, 9, 5, 6],
       [8, 5, 8, 6, 6, 5, 6, 9, 6, 8]])

In [70]:
#calculas la media por RENGLONES
a2.mean(axis = 0)

array([7.4, 7.6, 8. , 6.2, 7. , 6.6, 7. , 8. , 6.2, 7.8])

In [72]:
a2.mean(axis = 1) #la media por columnas

array([7.8, 7.4, 6.9, 7.1, 6.7])

In [73]:
#np.unique()
np.unique(a2)

array([5, 6, 7, 8, 9])

In [74]:
np.unique(a2, return_index= True )
# devuelve un tuple con el valor y la posicion dentro del arreglo donde se encuentra el primer evento

(array([5, 6, 7, 8, 9]), array([18,  5,  2,  0,  1]))

In [75]:
#unique() también puede devolver el conteo de los valores
np.unique(a2, return_counts=True)

(array([5, 6, 7, 8, 9]), array([ 7, 10, 10, 13, 10]))

In [82]:
## np.where()
a3 = np.array([-1,0,1,-2,1,3,-4])
np.where(a3 > 0)

(array([2, 4, 5]),)

In [83]:
indicesw = np.where(a3 > 0)
a3[indicesw]

array([1, 1, 3])

In [84]:
A = np.array([
    [1,2,3],
    [9,3,2]
])
A

array([[1, 2, 3],
       [9, 3, 2]])

In [85]:
# TAMBIÉN PODEMOS CREARLO COMO MATRIX
B = np.matrix([
    [1,2,3],
    [9,3,2]
])

In [86]:
B

matrix([[1, 2, 3],
        [9, 3, 2]])

In [87]:
#transponer la matriz A
B.T

matrix([[1, 9],
        [2, 3],
        [3, 2]])

In [88]:
#PARA la multiplicación matricial podemos usar el operador @
A@B

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 3)

In [89]:
A@B.T

matrix([[14, 21],
        [21, 94]])

In [90]:
#Otra forma:
np.dot(A, B.T) #PRODUCTO PUNTO. 

matrix([[14, 21],
        [21, 94]])

## numpy.linalg

In [91]:
from numpy import linalg

In [92]:
D = np.array([2,1,2,4]).reshape(2,2)

In [93]:
D

array([[2, 1],
       [2, 4]])

In [94]:
linalg.inv(D)

array([[ 0.66666667, -0.16666667],
       [-0.33333333,  0.33333333]])

In [95]:
D@linalg.inv(D)

array([[1., 0.],
       [0., 1.]])

In [98]:
np.dot(D,linalg.inv(D))

array([[1., 0.],
       [0., 1.]])

In [99]:
np.eye(5) #para matriz identidad

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [96]:
#la función eig() --> obtener los eigenvalores y los eigenvectores
linalg.eig(D)

(array([1.26794919, 4.73205081]),
 array([[-0.80689822, -0.34372377],
        [ 0.59069049, -0.9390708 ]]))

Si tenemos para una matriz A, vector v y escalar k!=0 tales que Av = kv, entonces v eigenvector de A asociado al eigenvalor k