<a href="https://colab.research.google.com/github/tommy-swimmer/mlearn/blob/master/Numpy_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Arrays vs Lists


In [None]:
import numpy as np

In [None]:
L = [1,2,3] # create a list

In [None]:
A = np.array([1,2,3]) # create an array containing the same items

In [None]:
for e in L: # for each in L, print.
  print(e)

1
2
3


In [None]:
for e in A:
  print(e)

1
2
3


In [None]:
L.append(4)

In [None]:
L

[1, 2, 3, 4]

In [None]:
A.append(4) 
# Generally speaking, the size of a list can change, but the size of an array are fixed.

AttributeError: ignored

In [None]:
# + means contatenate when using lists.
L + [5]

[1, 2, 3, 4, 5]

In [None]:
# Broadcasting operation, adding 4 to each element in A.
A + np.array([4])

array([5, 6, 7])

In [None]:
# Vector addition
A + np.array([4,5,6])

array([5, 7, 9])

In [None]:
# Scalar multiplication
2 * A

array([2, 4, 6])

In [None]:
# For list, does repetition.
2 * L

[1, 2, 3, 4, 1, 2, 3, 4]

In [None]:
L + L # Returns the same thing

[1, 2, 3, 4, 1, 2, 3, 4]

In [None]:
# Using a for loop to add in lists.
L2 = []

for e in L:
  L2.append(e + 3)

In [None]:
L2 # Confirm 3 has been added to each element in L

[4, 5, 6, 7]

In [None]:
# another way to do this
L2 = [e + 3 for e in L]
L2

[4, 5, 6, 7]

In [None]:
# Create a new list, L2, loop through each element and append.
L2 = []
for e in L:
  L2.append(e**2)

In [None]:
L2

[1, 4, 9, 16]

In [None]:
# This is all way easier in numpy though.
A**2

array([1, 4, 9])

In [None]:
np.sqrt(A)

array([1.        , 1.41421356, 1.73205081])

In [None]:
np.log(A)

array([0.        , 0.69314718, 1.09861229])

In [None]:
np.exp(A)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [None]:
# Hyperbolic tangent
np.tanh(A)

array([0.76159416, 0.96402758, 0.99505475])

There's a lot more functions to explore!

A list looks like an array, but is more of a data structure.

**A numpy array is for strictly doing math**

# The Dot Product

In [None]:
a = np.array([1,2])
b = np.array([3,4])

In [None]:
dot = 0
for e, f in zip(a, b):
  dot += e * f
dot

11

In [None]:
dot = 0
for i in range(len(a)):
  dot += a[i] * b[i]
dot

11

In [None]:
# Using regular multiply operator, does element-wise.
a * b

array([3, 8])

In [None]:
# Dot product is just the sum of these elements.
np.sum(a * b)

11

In [None]:
(a * b).sum() # a slicker way of doing this.

11

In [None]:
np.dot(a, b) # dot function

11

In [None]:
a.dot(b) # another dot function syntax

11

In [None]:
a @ b # another way...

11

In [None]:
# Finding the magnitude of a vector.
amag = np.sqrt((a * a).sum())

In [None]:
amag

2.23606797749979

In [None]:
# Magnitude function
np.linalg.norm(a) # you get the same answer

2.23606797749979

In [None]:
# Calculate the cosine of the angle.
cosangle = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))

cosangle # This is the cosine of the angle

0.9838699100999074

In [None]:
angle = np.arccos(cosangle)
angle # in radians, NOT DEGREES

0.17985349979247847

# Speed Test 

**Using numpy is drastically faster than using lists for computational purposes**

# Matrices

Just an introduction to the essentials. Don't use numpy.matrix as a general rule!

In [None]:
# Make a simple matrix, list of lists.
L = [[1,2],[3,4]]
L

[[1, 2], [3, 4]]

In [None]:
L[0] # Rows come first, then columns.

[1, 2]

In [None]:
# Element of column 1, from this row.
L[0][1]

2

In [None]:
# Building the same matrix in numpy.
A = np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])

In [None]:
A[0][1]

2

In [None]:
A[0,1] # This is kind of like MATLAB!

In [None]:
# Select a column of this matrix.
A[:,0] # Column at index 0.

array([1, 3])

In [None]:
# Transpose of matrix A
A.T 

array([[1, 3],
       [2, 4]])

In [None]:
# Exponentiates every element
np.exp(A)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [None]:
# Numpy is intelligent enough to know what you want to do with this
# even though it's a list. You don't have to convert it into a numpy
# array first, it'll do it for you.
np.exp(L)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [None]:
# Create a new matrix 2x3

B = np.array([[1,2,3],[4,5,6]])
B

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
# Matrix multiplication performed by dot function.
A.dot(B)

array([[ 9, 12, 15],
       [19, 26, 33]])

In [None]:
# The determinant
np.linalg.det(A)

-2.0000000000000004

In [None]:
# The inverse.
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [None]:
# Should get the identity matrix back.

np.linalg.inv(A).dot(A)

# This is very good, but not exact due to the computational methods used.

array([[1.00000000e+00, 0.00000000e+00],
       [1.11022302e-16, 1.00000000e+00]])

In [None]:
# The matrix trace
np.trace(A)

5

In [None]:
# Diag function
np.diag(A) # vector containing the diagonal elements.

array([1, 4])

In [None]:
np.diag([1,4]) # gives you a matrix from a vector.

array([[1, 0],
       [0, 4]])

In [None]:
# Eigenvalues and eigenvectors of A
np.linalg.eig(A) # eigen decomposition.

(array([-0.37228132,  5.37228132]), array([[-0.82456484, -0.41597356],
        [ 0.56576746, -0.90937671]]))

In [None]:
Lam, V = np.linalg.eig(A)

In [None]:
# We would expect to see TRUE and TRUE
V[:,0] * Lam[0] == A @ V[:,0]

array([ True, False])

In [None]:
# Checking the values given.
V[:,0] * Lam[0], A @ V[:,0]
# Values are the same, but numerical precision is off slightly.

(array([ 0.30697009, -0.21062466]), array([ 0.30697009, -0.21062466]))

In [None]:
# Use the numpy allclose function
np.allclose(V[:,0] * Lam[0], A @ V[:,0])

True

In [None]:
# Another way to do this.
np.allclose(V @ np.diag(Lam), A @ V)

True

Look up the numpy.linalg.eigh function if needed. **Might be useful for signal analysis**

# Solving Linear Systems

The admission fee at a small fair is 1.50 for children and 4.00 for adults. On a certain day, 2200 people enter the fair, and $5050 is collected. How many children and how many adults attended?

x_1 + x_2 = 2200 <br />
1.5x_1 + 4x_2 = 5050

In [None]:
# Creating matrices
A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200, 5050])

In [None]:
# Solving the linear system.
np.linalg.solve(A, b)

array([1500.,  700.])

Answer is: 1500 children and 700 adults.

# Generating Data

In [None]:
# Generating array of zeros
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [None]:
# Generating an array of ones
np.ones((2, 3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [None]:
10 * np.ones((2, 3))

array([[10., 10., 10.],
       [10., 10., 10.]])

In [None]:
# The identity matrix.
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

**This is important!**

In [None]:
# How to generate arrays with random numbers.
np.random.random() # single random number

0.08606795551665847

In [None]:
# 2x3 array with random numbers. Draws from uniform distribution 
# with range 0:1
np.random.random((2, 3))

array([[0.18153128, 0.03471917, 0.24354411],
       [0.79812231, 0.3155928 , 0.34648205]])

In [None]:
# Random numbers with Gaussian distribution
# Draws from standard normal distribution. 
np.random.randn(2, 3)

array([[ 0.99466386, -3.27417003,  0.36151012],
       [-0.61380937,  0.89895897, -0.25954124]])

In [None]:
R = np.random.randn(10000)

In [None]:
# Calculate the mean.
R.mean()

0.006194717049569887

In [None]:
np.mean(R)

0.006194717049569887

In [None]:
# Calculate the variance.
R.var()

1.019335326989961

In [None]:
# Calculate the standard deviation
R.std()

1.009621378037312

In [None]:
R = np.random.randn(10000, 3)

In [None]:
R.mean(axis = 0) # Calculates the mean of each column.

array([ 0.00127393, -0.0049547 ,  0.00045197])

In [None]:
R.mean(axis=1).shape

(10000,)

In [None]:
# Covariance

np.cov(R) # gives us a very large array.

array([[ 1.50892885,  0.65316554,  0.66242393, ...,  0.08081074,
         1.11488028,  0.25615888],
       [ 0.65316554,  0.28322116,  0.28999948, ...,  0.07007909,
         0.48106771,  0.12451382],
       [ 0.66242393,  0.28999948,  0.3125868 , ...,  0.27012279,
         0.47922576,  0.20358264],
       ...,
       [ 0.08081074,  0.07007909,  0.27012279, ...,  2.53219092,
        -0.05028284,  0.99544822],
       [ 1.11488028,  0.48106771,  0.47922576, ..., -0.05028284,
         0.82852115,  0.14654817],
       [ 0.25615888,  0.12451382,  0.20358264, ...,  0.99544822,
         0.14654817,  0.42475396]])

In [None]:
# Check shape
np.cov(R).shape

(10000, 10000)

In [None]:
# Transpose R first
np.cov(R.T)

array([[ 9.98514537e-01, -1.64587370e-02,  1.41068628e-03],
       [-1.64587370e-02,  9.80463217e-01, -8.66197710e-04],
       [ 1.41068628e-03, -8.66197710e-04,  9.93715812e-01]])

In [None]:
# Using rowvar
np.cov(R, rowvar=False)

array([[ 9.98514537e-01, -1.64587370e-02,  1.41068628e-03],
       [-1.64587370e-02,  9.80463217e-01, -8.66197710e-04],
       [ 1.41068628e-03, -8.66197710e-04,  9.93715812e-01]])

In [None]:
# Generating random integers.
np.random.randint(0, 10, size=(3,3))

array([[0, 0, 9],
       [3, 2, 2],
       [1, 6, 7]])

In [None]:
# Choice function
np.random.choice(10, size=(3, 3))

array([[2, 5, 6],
       [3, 1, 6],
       [2, 2, 1]])