# Vectorized Operations and Numpy

## What are Numpy and Numpy Arrays?
- **Numpy:** the core tool (library) for performance numerical computing with Python
- **Numpy arrays:** multi-dimentional data structures in Numpy (e.g. 1-D vector, 2-D matrix, 3-D data object, etc)

User guide can be found [here](https://numpy.org/doc/stable/user/index.html).

In [None]:
# import numpy by following the convention
import numpy as np

## Creating Arrays

In [None]:
# manual construction of arrays
# 1-D
a = np.array([0,1,2,3])
a

In [None]:
# if we print the array:
print(a)

In [None]:
# 2-D
b = np.array([[1,2,3],[5,6,7]])
b

In [None]:
# check for array dimension
# Question: what will be the type of x?
x = a.ndim, b.ndim
print(x)
print(type(x))

In [None]:
# check for shape of the array
# Question: what will be the type of a.shape and b.shape respectively?
a.shape, b.shape

In [None]:
# functions for creating arrays
a = np.arange(1,9,2) # start, end(exclusive), step
print(a)
a = np.arange(10)
print(a)

In [None]:
a = np.linspace(0,1,6) # start, end, num-points 
a

In [None]:
a = np.ones((3,2)) # a matrix of ones
a

In [None]:
a= np.zeros((2,3)) # a matrix of zeros
a

In [None]:
a = np.eye(3) # an identify matrix
a

In [None]:
a = np.diag(np.array([1,2,3,4])) # a diagonal matrix
a

In [None]:
# generating random numbers
# set seed
np.random.seed(1234)
# generate a vector of length 4, in which elements are iid draws from UNIF(0,1)
a = np.random.rand(4)
print(a)
# generate a vector of length 4, in which elements are iid draws from standard normal
a = np.random.randn(4)
print(a)

## Basic Data Types:

In [None]:
a = np.array([1,2,3],dtype=float)
a.dtype

In [None]:
a = np.array([True, False, True])
a.dtype

## Indexing and Slicing
#### Slicing vectors

In [None]:
# indices begin at 0
a = np.arange(10)
print(a)
a[0], a[1], a[-1]

In [None]:
# slicing
a[2:5:2] #[start:end:step]

In [None]:
a[::]

In [None]:
a[::-1]

#### Slicing matrices

In [None]:
# matrices
a = np.diag(np.arange(3))
a

In [None]:
# slice an element in matrix
a[1,1], a[1,2]

In [None]:
# numpy array is mutable, and thus we could assign new values to it
a[1,1] = 10
a

In [None]:
# the second column of a
a[:,1]

In [None]:
# the first row of a
a[0,:]

## Compies and Views

In [None]:
# a slicing operation creates a view on the original array
# Question: What will be the first element of a?
a = np.arange(10)
print(a)
b = a[::2]
print(b)
b[0] = 100
print(a)

In [None]:
# force a copy
a = np.arange(10)
print(a)
b = a[::2].copy()
print(b)
b[0] = 100
a

## Numerical Operations on Arrays

#### Elementwise operations

In [None]:
# with scalars
a = np.array([1,2,3,4])
a + 1
2**a

In [None]:
# arithmetic operations are elementwise
# operations to the matrices and vectors are applied elementwise by default
b = np.ones(4)
print(a - b)
print(a*b)

In [None]:
# array multiplications
c = np.ones((3,3))
c*c

In [None]:
# matrix multiplication
c.dot(c)

#### Comparisons

In [None]:
# this is how lists are compared
list(a) == list(b)

In [None]:
# Numpy arrays
a == b

In [None]:
a > b

In [None]:
# array-wise comparison
np.array_equal(a,b)

In [None]:
# transcendental functions
print(np.sin(a))
print(np.log(a))
print(np.exp(a))

In [None]:
# shape mismatches (this will cause an error)
b = np.array([1,2])
a + b

## Basic Reductions

In [None]:
# computing sums
a = np.array([1,2,3,4])
a.sum()

In [None]:
a = np.array([[1,2],[3,4]])
a.sum()

In [None]:
print(a.sum(axis=0)) # column sum
print(a.sum(axis=1)) # row sum

In [None]:
# other reductions
a = np.array([1,2,3,4])
a.min()
a.max()
a.argmin()
a.argmax()
a.mean()
a.std()

## Shape Manipulation

In [None]:
# flattening
a = np.array([[1,2],[3,4]])
b= a.ravel()
b

In [None]:
# reshaping
# call method directly from array
c = b.reshape((2,2))
c

In [None]:
# or we can use this format
np.reshape(b, (2,2))

## Sorting Data

In [None]:
a = np.array([[6,3,1],[9,1,4]]) # sort each row
b = np.sort(a,axis=1)
b

In [None]:
c = np.sort(a,axis=0) # sort each column
c

In [None]:
# finding minima and maxima
a = np.array([4,22,3,9])
print(np.argmax(a))
print(np.argmin(a))

## Efficiency

In [None]:
a = range(1000)
%timeit [i**2 for i in a]

In [None]:
# Question: Will this be faster?
b = np.arange(1000)
%timeit b**2

In [None]:
a = range(10000)
%timeit [i+1 for i in a]

In [None]:
c = np.arange(10000)
%timeit c+1