<a href="https://colab.research.google.com/github/maggieliuzzi/numpy_matplotlib_pandas_scipy/blob/master/Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

**Numpy Arrays**

In [53]:
np.random.randn(10)  # type: numpy.ndarray

array([-0.77368352,  0.70659765,  0.31812008,  0.01628257,  1.77377952,
        0.16904545,  0.01168565, -1.47649196, -0.12047315, -0.28133481])

In [0]:
L = [1,2,3]
A = np.array([1,2,3])

**Vector Operations**

In [2]:
L + [4]

[1, 2, 3, 4]

In [7]:
A + [4]  # same as doing:  A + np.array(4) OR A + np.array([4])

array([5, 6, 7])

In [10]:
A + [4,5,6]  # A + np.array([4,5,6])

array([5, 7, 9])

In [0]:
2 * A  # whereas: 2 * L: [1, 2, 3, 1, 2, 3]
2.5 * A

array([2.5, 5. , 7.5])

In [19]:
A ** 2
np.sqrt(A)

array([1, 4, 9])

**Functions** (element-wise)

In [21]:
np.log(A)
np.exp(A)

array([0.        , 0.69314718, 1.09861229])

In [23]:
np.tanh(A)  # hyperbolic tangent

array([0.76159416, 0.96402758, 0.99505475])

In [25]:
np.sin(A)
np.cos(A)

array([ 0.54030231, -0.41614684, -0.9899925 ])

**Dot Products**

In [45]:
a = np.array([1, 2])
b = np.array([3, 4])
np.dot(a, b)  # np.sum(a*b)  # (a*b).sum
a.dot(b)  # a must be numpy array
a @ np.array(b)  # a and b must be np arrays  

# aT b = ||a|| ||b|| cos(theta a,b)
# ||a||: amag
mag_a = np.linalg.norm(a)  # np.sqrt((a**2).sum())
mag_b = np.linalg.norm(b)
cosangle = a.dot(b) / (mag_a * mag_b)  # aT b // ||a|| ||b||  # cosine of angle
angle = np.arccos(cosangle)  # arc cosine
angle

0.17985349979247847

**Matrices**

In [48]:
M = np.array([[1,2],[3,4]])
M

array([[1, 2],
       [3, 4]])

In [90]:
np.diag(A)  # diagonal matrix from a vector

array([[1, 0],
       [0, 2]])

In [58]:
M[1]  # row 1

array([3, 4])

In [59]:
M[0,1]  # M[0][1]  # Element in row 0 column 1

2

In [60]:
M[:,0]  # column 0
M[:,1]  # column 1
# : selects everything in a dimension

array([2, 4])

In [81]:
np.diag(M)

array([1, 4])

**Functions** (element-wise)

In [62]:
np.exp(M)  # M can be a normal list

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

**Matrix Operations**

In [61]:
M.T  # transpose

array([[1, 3],
       [2, 4]])

In [86]:
N = np.array([[1,2,3],[4,5,6]])  # np.diag(N) is [1,5]
N

array([[1, 2, 3],
       [4, 5, 6]])

In [70]:
M.dot(N)

array([[ 9, 12, 15],
       [19, 26, 33]])

In [79]:
np.linalg.det(M)  # not exactly accurate in a computer

-2.0000000000000004

In [110]:
inv_M = np.linalg.inv(M)  # slower and less accurate than alternatives for solving linear systems
inv_M

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [78]:
inv_M.dot(M)  # inaccurate operation in a computer

array([[1.00000000e+00, 0.00000000e+00],
       [1.11022302e-16, 1.00000000e+00]])

In [80]:
np.trace(M)

5

In [0]:
lam, V = np.linalg.eig(M)  # Eigenvalues and Eigenvectors  # Eigenvectors are stored as column vectors inside V

In [109]:
V[:,0] * lam[0] == M @ V[:,0]  # eigenvalue * eigenvector == A * eigenvector  # incorrect due to numerical precision errors
V[:,0] * lam[0], M @ V[:,0]  # (array([ 0.30697009, -0.21062466]), array([ 0.30697009, -0.21062466]))
np.allclose(V[:,0] * lam[0], M @ V[:,0])
np.allclose(V @ np.diag(lam), M @ V)  # checking all eigenvectors simultaneously
# if matrix is symmetric (eg.covariance) (or complex Hermitian), use np.linalg.eigh 
# (Hermitian: complex analog of matrix transpose: it does a transpose and takes the complex conjugates of the elements

True

**Solving Linear Systems**

In [116]:
A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200, 5050])
x = np.linalg.solve(A, b)  # don't use: x = np.linalg.inv(A).dot(b) for A-1*b
x

array([1500.,  700.])

**Generating Data**

In [4]:
np.linspace(0, 20, 50)  # 1D array with 1000 evenly-spaced points between 0 and 20  # eg. for x axis

array([ 0.        ,  0.40816327,  0.81632653,  1.2244898 ,  1.63265306,
        2.04081633,  2.44897959,  2.85714286,  3.26530612,  3.67346939,
        4.08163265,  4.48979592,  4.89795918,  5.30612245,  5.71428571,
        6.12244898,  6.53061224,  6.93877551,  7.34693878,  7.75510204,
        8.16326531,  8.57142857,  8.97959184,  9.3877551 ,  9.79591837,
       10.20408163, 10.6122449 , 11.02040816, 11.42857143, 11.83673469,
       12.24489796, 12.65306122, 13.06122449, 13.46938776, 13.87755102,
       14.28571429, 14.69387755, 15.10204082, 15.51020408, 15.91836735,
       16.32653061, 16.73469388, 17.14285714, 17.55102041, 17.95918367,
       18.36734694, 18.7755102 , 19.18367347, 19.59183673, 20.        ])

In [121]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [122]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [123]:
10 * np.ones((2, 3))

array([[10., 10., 10.],
       [10., 10., 10.]])

In [124]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [148]:
np.random.random()  # always positive  # from uniform 0-1 distribution

0.6968309256853044

In [153]:
np.random.randn()  # standard normal/Gaussian distribution (mean: 0, variance: 1)
np.random.randn(2,3)  # 2x3 matrix containing random numbers (between -1 and 1)

array([[ 0.00436367,  1.52521762, -0.2501105 ],
       [-0.03900539, -1.81912731,  1.25784415]])

In [149]:
np.random.random((2,3))  # 2x3 matrix containing random positive numbers  # draws from uniform 0-1 distribution

array([[0.01289143, 0.60652645, 0.27478846],
       [0.6681979 , 0.9767852 , 0.9352879 ]])

In [155]:
r = np.random.randn(10000)
r

array([ 1.35097834,  2.45191723,  0.44876413, ..., -0.46690469,
       -0.00431452, -0.31205972])

**Statistics**

In [158]:
r.mean()  # close to 0 as expected  # instance method == top level function: np.mean(r)


-0.0072003573945527335

In [160]:
r.var()  # close to 1 as expected

1.0133484411347566

In [163]:
r.std()  # square root of variance  # np.sqrt(r.var())

1.0066520953809

In [165]:
M = np.random.randn(10000, 3)
M

array([[ 0.41803257,  0.5080218 , -0.23891995],
       [ 0.59351521, -1.16365795,  1.03650054],
       [ 0.49109263,  0.90540846, -0.46352583],
       ...,
       [-0.12455966,  0.19443439,  0.74630738],
       [-1.15966418,  0.03576764, -0.27105537],
       [ 0.77792551, -0.96063799, -0.87088753]])

In [167]:
# M.mean()  # mean of entire matrix (uncommon operation)
M.mean(axis=0)  # mean of each column, so output is a length 3 array

array([0.00951076, 0.00953755, 0.00023548])

In [0]:
row_means = M.mean(axis=1)  # mean of each row  # row_means.shape  # (10000,)

In [171]:
np.cov(M)  # covariance (variance for vectors)  # shape: (10000, 10000)  # cov treats cols as vector observations, unlike other libraries in the ML stack, eg. scikit-learn, tf or pytorch, which takes rows as sample observations

array([[ 0.16626777, -0.3487562 ,  0.28115619, ..., -0.15927154,
        -0.04116981,  0.14328359],
       [-0.3487562 ,  1.35409848, -0.69379651, ...,  0.17324359,
        -0.39699988,  0.42033706],
       [ 0.28115619, -0.69379651,  0.49282252, ..., -0.24244322,
         0.01117075,  0.12180175],
       ...,
       [-0.15927154,  0.17324359, -0.24244322, ...,  0.19412173,
         0.1643114 , -0.32349292],
       [-0.04116981, -0.39699988,  0.01117075, ...,  0.1643114 ,
         0.38547053, -0.59517024],
       [ 0.14328359,  0.42033706,  0.12180175, ..., -0.32349292,
        -0.59517024,  0.95820711]])

In [175]:
np.cov(M, rowvar=False)  # np.cov(M.T)  # shape: (3,3)  # close to the identity matrix, as expected

array([[ 1.00297159, -0.00482338, -0.00894683],
       [-0.00482338,  1.00958087, -0.00628021],
       [-0.00894683, -0.00628021,  1.00935383]])

In [178]:
np.random.randint(0, 10, (3,3))  # size=(3,3))  # not including 10

array([[2, 0, 4],
       [5, 9, 9],
       [0, 4, 3]])

In [180]:
np.random.choice(10, (3,3))  # randomly selects items from a 1D input array  # also accepts arrays, replace (default: True) and allows for choosing distribution (default: uniform) 

array([[2, 7, 5],
       [6, 3, 6],
       [4, 2, 3]])