In [None]:
import pandas as pd
import numpy as np

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Numpy is very similar to python's list

Numpy arrays are a lot faster than python list as numpy is based on C programming language

Numpy stands for numerical python and is the backbone for many machine learning and data science tools based on python

numpy uses vectorization via broadcasting (avoiding loops)

## DataTypes and Atributes

In [None]:
a0 = np.array(0)
type(a0)
a0.shape
a0.size
a0.dtype
a0.ndim

In [None]:
a1 = np.array([1, 2, 3, 4, 5])
type(a1)
a1.shape
a1.size
a1.dtype
a1.ndim

In [None]:
a2 = np.array([[1, 2., 3],
               [4, 5, 6]])
type(a2)
a2.size
a2.dtype
a2.shape
a2.ndim

In [None]:
a3 = np.array([[[1, 2, 3],
                [4, 5, 6]]])
a3
type(a3)
a3.dtype
a3.ndim
a3.size
a3.shape

In [None]:
a3.squeeze()
a3.shape

In [None]:
a3.reshape((2, 3))

In [None]:
a4 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
               [[-1, -2, -3],
                [-4, -5, -6],
                [-7, -8, -9]]])
a4
type(a4)
a4.dtype
a4.ndim
a4.size
a4.shape

In [None]:
# Create a Series from a numpy array
pd.Series(a1)
# pd.Series(a2) __> Error

In [None]:
# Create DataFrame from a numpy array
pd.DataFrame(a1)
pd.DataFrame(a2)
# pd.DataFrame(a3) __> Error

In [None]:
L = [1, 2, 3]
A = np.array(L)

for e in L: print(e)
print('---')
for e in A: print(e)    

In [None]:
L.append(4)
L

In [None]:
# A.append(4) --> Error
np.append(A, 4)

Genrally speaking, the size of a list can change but the size of an array is fixed. 

Actually, adding a new element to an array is instantiating a new array.

In [None]:
L + [5]

In [None]:
L
2 * L

In [None]:
A + np.array([5])
# 4 is added to all elements of the array. It is called broadcasting. Numpy understand that we are trying add a number to all elements

In [None]:
A + 5

In [None]:
# A + np.array([4, 5, 6, 7]) --> Error
# A + np.array([2, 3]) --> Error

A + np.array([4, 5, 6])

In [None]:
A
2 * A

In [None]:
L
L * 2

## Creating numpy arrays

In [None]:
ones = np.ones(shape=(2, 3, 4))
ones

In [None]:
zeros = np.zeros((2, 3, 4))
zeros

In [None]:
np.linspace(0, 3, 10)
np.linspace(0, 10, 3)

In [None]:
np.empty((2, 3))

In [None]:
np.arange(0, 6, 2)

In [None]:
np.random.seed(42)
np.random.randint(0, 12, size=(2, 3, 1))

In [None]:
np.random.seed(42)
np.random.randint(0, 12, size=(3, 3))

In [None]:
np.random.seed(42)
np.random.random((3, 1, 4))

In [None]:
np.random.seed(42)
np.random.rand(3, 1, 4)

## Viewing arrays and matrices

In [None]:
np.random.seed(42)
a = np.random.randint(0, 10, size=(2, 3, 4, 5))
a

In [None]:
# Get the first 4 numbers of the inner most arrays
a[:,:,:,:4]

## Manipulating and comparing arrays

In [None]:
np.array([1, 2, 3]) + np.ones((1, 3))

np.add(
    np.array([1, 2, 3]),
    np.ones((1, 3))
)

In [None]:
np.array([[1, 2, 3], [4, 5, 6]]) + np.array([1, 2, 3])

np.add(
    np.array([[1, 2, 3], [4, 5, 6]]),
    np.array([1, 2, 3])
)

In [None]:
np.array([[1, 2, 3], [4, 5, 6]]) * np.array([1, 2, 3])

np.multiply(
    np.array([[1, 2, 3], [4, 5, 6]]),
    np.array([1, 2, 3])
)

In [None]:
np.array([[1, 2, 3], [4, 5, 6]]) / np.array([1, 2, 3])

np.divide(
    np.array([[1, 2, 3], [4, 5, 6]]),
    np.array([1, 2, 3])
)

In [None]:
np.array([[1, 2, 3], [4, 5, 6]]) // np.array([1, 2, 3])

np.floor_divide(
    np.array([[1, 2, 3], [4, 5, 6]]),
    np.array([1, 2, 3])
)

In [None]:
np.array([[1, 2, 3], [4, 5, 6]]) % np.array([1, 2, 3])

np.mod(
    np.array([[1, 2, 3], [4, 5, 6]]),
    np.array([1, 2, 3])
)

In [None]:
np.array([[1, 2, 3], [4, 5, 6]]) ** 2

np.square(np.array([[1, 2, 3], [4, 5, 6]]))

In [None]:
np.exp(np.array([[1, 2, 3], [4, 5, 6]]))

In [None]:
np.log(np.array([[1, 2, 3], [4, 5, 6]]))

### Aggregation
Perfroming the same operation on a number of things

**Use python's methods eg.`sum()` on python datatypes and numpy's methods eg. `np.sum()` on numpy's arrays**

In [None]:
massive_array = np.random.random(10000)
%timeit sum(massive_array)
%timeit np.sum(massive_array)

In [None]:
a = np.random.randint(0, 10, size=(2, 4))
a

np.sum(a)
np.max(a), np.min(a), np.mean(a), np.median(a)
np.std(a), np.var(a)

### Reshaping and Transposing

In [None]:
a2
a2.shape
# a2.reshape(2, 8) --> Error
a2.reshape(3, 2)
a2.reshape(3, 2, 1)
a2.reshape(6, 1)
a2.reshape(1, 6)

In [None]:
a2
a2.T

### Dot Product

In [None]:
np.random.seed(0)
mat_1 = np.random.randint(0, 10, (5, 3))
mat_2 = np.random.randint(0, 10, (5, 3))

In [None]:
# Element-wise multiplication
mat_1 * mat_2

In [None]:
# Dot Product
np.dot(mat_1, mat_2.T)
mat_1 @ mat_2.T

np.dot(mat_1.T, mat_2)
mat_1.T @ mat_2

In [None]:
a1 = np.random.randint(0, 10, (2, 3))
a2 = np.random.randint(0, 10, (2, 3))

a1 > a2
a1 == a2

### Sorting arrays

In [None]:
np.random.seed(0)
a = np.random.randint(0, 10, (3, 5))
a
np.sort(a)
np.sort(a, axis=None)

In [None]:
a
np.sort(a)
np.argsort(a)

In [None]:
np.argmin(a), np.argmax(a)
np.argmin(a, axis=1)
np.argmin(a, axis=0)

In [None]:
a = np.array([[1, 'abcd1234', 1.6], [2, 3.9, True]])

a.dtype
a[0, 0].dtype, a[0, 1].dtype, a[1,2].dtype
(a[0, 0], a[0, 1], a[1,2])

In [None]:
testarray1D = np.array([1, 2, 3, 4])

testarray1D

testarray1D[-1:], testarray1D[2:], testarray1D[1:4], testarray1D[:3]

# access with list and boolean
testarray1D[[0, 2]]
testarray1D[[True, False, True, False]]

In [None]:
testarray2D = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
testarray2D

testarray2D.shape
testarray2D[0]
testarray2D[1]
# testarray2D[2] # Error, uncomment and see
testarray2D[-1]
testarray2D[-2]
# testarray2D[-3] # Error, uncomment and see
testarray2D[:,:]
testarray2D[0,0:2]
testarray2D[:, 0:2]

# access with boolean
k = np.array([[True, False, True, False], 
              [True, False, True, False]])
testarray2D[k]

In [None]:
mat1 = np.array([[1, 2, 3],
                 [4, 5, 6]])
mat2 = np.array([[7, 8, 9],
                 [10, 11, 12]])

mat1.dot(mat2.T)
mat1 @ mat2.T

# mat1[[1]] + mat2[[0]]

See video about numpy shift: <a href='https://www.youtube.com/watch?v=jYXvVq2eecc'>video on youtube</a>

In [None]:
a = np.array([1, 3, 4, 7, 9])
np.diff(a)
np.diff(a, n=2)
np.diff(a, n=3)
np.diff(a, n=4)
np.diff(a, n=5)
np.diff(a, n=6)

In [None]:
b = np.array([[1, 2, 3],
          [4, 5, 6]])
b
np.diff(b)
np.diff(b, axis=1)
np.diff(b, axis=0)

In [None]:
arr = np.linspace(1, 10, 10)
print(arr)

arr_slice = arr[:5]
print(arr_slice)

arr_slice[:] = 99
print(arr_slice)

print(arr)