# Introduction to Numpy

In [1]:
import sys
import numpy as np

In [2]:
a = np.array([1,2,3,4])

In [None]:
b = np.array([0,.5,1,1.5,2])

In [3]:
# Everything works the same way as with python list such as indexing and slicing
a[0]

1

In [None]:
a[1:3]

In [None]:
# Only one difference or advantage is that we can pass a list of indices to the array
# it will result in a new numpy array with the elements corresponding to the indices

b[[0,2,-1]]

In [None]:
# Use dtype to determine the data type of the array

a.dtype

In [None]:
b.dtype

In [None]:
# We can also specify the data type of the array when creating it

np.array([1,2,3,4], dtype = float)

In [None]:
np.array([1,2,3,4], dtype = np.int8)

## Dimensions and shapes

In [None]:
# numpy can be used to crate multi-dimensional arrays

c = np.array([
    [1,2,3],
    [4,5,6]
])

In [None]:
# We can use the shape attribute to determine the shape of the array

c.shape

In [None]:
# use ndim to determine the number of dimensions

c.ndim

In [None]:
# use size to determine the number of elements in the array

c.size

In [None]:
# let's create a 3 dimensional array

d = np.array([
    [
        [1,2,3],
        [4,5,6]
    ],
    [
        [7,8,9],
        [10,11,12]
    ]
])

In [None]:
d

In [None]:
d.shape

In [None]:
d.ndim

In [None]:
d.size

- Be careful when creating arrays with different dimensions
- Irregular arrays will result in a wrong shape or output

In [None]:
# indexing and slicing multi-dimensional arrays

A = np.array([
#    0 1 2
    [1,2,3], # 0
    [4,5,6], # 1
    [7,8,9]  # 2
])

A

In [None]:
A[1]

In [None]:
A[1][0]

In [None]:
# other way to index multi-dimensional arrays
# is to pass a list of indices
# in this case the list of indices corresponds to the dimensions of the array
# for example, if we want to get the element 6, we need to pass the indices [1,2]
# Hence, A[1,2] i.e. row 1, column 2
# Code: A[d1,d2,d3,...,dn]

A[1,2]

In [None]:
# slicing multi-dimensional arrays

A[:, :2]

In [None]:
A[:2, :2]

In [None]:
A[:2, 1:]

In [None]:
# assigning values to multi-dimensional arrays

A[1] = np.array([10,10,10])

A

## Built ins

In [None]:
a = np.array([1,2,3,4])

In [None]:
a.sum()

In [None]:
a.mean()

In [None]:
a.std()

In [None]:
a.var()

In [None]:
# Same thing can be done with multi-dimensional arrays

A = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])

In [None]:
A.sum()

In [None]:
A.mean()

In [None]:
# Or we can specify the axis along which we want to perform the operation

A.sum(axis = 0)

In [None]:
A.sum(axis = 1)

## Broadcasting and vectorized operations

In [None]:
# Arrage function creates a range of numbers

a = np.arange(4)

In [None]:
a

In [None]:
# We can perform vectorized operations on numpy arrays

a + 10

# It will create a new array where 10 is added to every element of the array

In [None]:
a * 10

In [None]:
a

In [None]:
# we can make these changes permanent by assigning the result to the array

a += 10

In [None]:
a

# in this cas eth array a is modified

In [None]:
# we can add multiple arrays together

a = np.arange(4)
b = np.array([10,10,10,10])

In [None]:
a + b

In [None]:
# what happens if we add two arrays with different shapes
# numpy will try to broadcast the arrays to match the shape

c = np.array([10,10,10])

# a + c

## Boolean arrays
- We can use boolean arrays to filter out elements from an array
- These are also called masks

In [None]:
a = np.arange(4)
a

In [None]:
# boolean values v=can be used to define which elements to keep and which to discard
a[[True, False, True, False]]

In [None]:
# we can also use comparison operators to create boolean arrays

a >= 2

# this will return a boolean array

In [None]:
# we can use this boolean array to filter out elements from the array

a[a >= 2]

In [None]:
# we can also use logical operators to combine multiple conditions

a[(a > 2) | (a < 1)]

# this will return elements that are greater than 2 or less than 1

In [None]:
# can also be used on multi-dimensional arrays

A = np.random.randint(100, size = (3,3))

A

In [None]:
A > A.mean()

In [None]:
A[A > A.mean()]

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Linear Algebra

In [None]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [None]:
B = np.array([
    [6, 5],
    [4, 3],
    [2, 1]
])

In [None]:
A.dot(B)

In [None]:
A @ B

In [None]:
B.T

In [None]:
A

In [None]:
B.T @ A

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Size of objects in Memory

### Int, floats

In [None]:
# An integer in Python is > 24bytes
sys.getsizeof(1)

In [None]:
# Longs are even larger
sys.getsizeof(10**100)

In [None]:
# Numpy size is much smaller
np.dtype(int).itemsize

In [None]:
# Numpy size is much smaller
np.dtype(np.int8).itemsize

In [None]:
np.dtype(float).itemsize

### Lists are even larger

In [None]:
# A one-element list
sys.getsizeof([1])

In [None]:
# An array of one element in numpy
np.array([1]).nbytes

### And performance is also important

In [None]:
l = list(range(100000))

In [None]:
a = np.arange(100000)

In [None]:
%time np.sum(a ** 2)

In [None]:
%time sum([x ** 2 for x in l])

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Useful Numpy functions

### `random`

In [None]:
np.random.random(size=2)

In [None]:
np.random.normal(size=2)

In [None]:
np.random.rand(2, 4)

---
### `arange`

In [None]:
np.arange(10)

In [None]:
np.arange(5, 10)

In [None]:
np.arange(0, 1, .1)

---
### `reshape`

In [161]:
# reshape returns a new array in the specified shape

np.arange(10).reshape(2, 5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [162]:
np.arange(10).reshape(5, 2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

---
### `linspace`

In [163]:
# linspace returns evenly spaced numbers over a specified interval

np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [164]:
np.linspace(0, 1, 20)

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [166]:
# false means that the last number is not included

np.linspace(0, 1, 20, False)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

---
### `zeros`, `ones`, `empty`

In [167]:
# zeros returns an array of zeros
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [None]:
np.zeros((3, 3))

In [None]:
np.zeros((3, 3), dtype=np.int)

In [4]:
# ones returns an array of ones
np.ones(5)

array([1., 1., 1., 1., 1.])

In [None]:
np.ones((3, 3))

In [5]:
# empty returns an array without initializing its values to any particular value
np.empty(5)

array([1.05518373e-311, 1.05518373e-311, 1.05518373e-311, 1.05518373e-311,
       1.05518373e-311])

In [6]:
np.empty((2, 2))

array([[0.00000000e+000, 1.05507325e-311],
       [1.05507325e-311, 1.05507325e-311]])

---
### `identity` and `eye`

In [7]:
# identity returns the identity matrix of the specified size
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [8]:
# eye returns a 2D array with ones on the diagonal and zeros elsewhere
np.eye(3, 3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [9]:
np.eye(8, 4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [10]:
# we can specify the position of the diagonal using the k parameter
np.eye(8, 4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [11]:
np.eye(8, 4, k=-3)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [12]:
"Hello World"[6]

'W'

![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)