# Numpy run through

In [0]:
# Industry standard to import as np
import numpy as np

## Basics

In [0]:
# making a million uniformly, random generated set of numbers between zero and one
randomNumbers = np.random.uniform(0, 1, size=1000000)

In [0]:
# taking the mean of that set
randomNumbers.mean()


# this is done so quickly because it is compiled in C in the backend

0.5006128350065049

In [0]:
# numpy has a function just for creating arrays of zeros
a = np.zeros(10)
a

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [0]:
# checking it's type to see what's what
type(a)

numpy.ndarray

In [0]:
# checking the type of what's stored inside of the arrray
type(a[0])

numpy.float64

In [0]:
# but maybe we don't want floats? we want integers
# syntax is dtype (data type)
b = np.zeros(10, dtype=int)
type(b[0])

numpy.int64

In [0]:
# we can even check the shape of our array
b.shape
print(b)

[0 0 0 0 0 0 0 0 0 0]


In [0]:
# and hey, want to give that array some backbone
# and reshape it? then use a tuple, my dude
b.shape = (10, 1)
print(b)

[[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]


In [0]:
# fill an array full of zeros and then shape it
# Two rows with 5 columns
c = np.zeros(10)
c.shape = (2, 5)
c


array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [0]:
# bunch of ones
array_of_ones = np.ones(10)
print(array_of_ones)
print()

# bunch of empty values to be added later
# BUT REMEMBER AN ARRAY IS ALL THE SAME DATA TYPE
# whatever numbers you see in 
empty_array = np.empty(3)
print(empty_array)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

[0.39215686 0.70980392 0.80392157]


In [0]:
# setting up a grid of evenly spaced numbers
spaced_array = np.linspace(1, 10, 4) # 4 digits, between 1.0 and 10.0
spaced_array

array([ 1.,  4.,  7., 10.])

In [0]:
# you can create numpy arrays from python lists or tuples with np.array()
list = [[1, 2, 3, 4], [10, 20, 30, 40], [50, 60, 70, 80]]
numpy_list = np.array(list)
numpy_list

array([[ 1,  2,  3,  4],
       [10, 20, 30, 40],
       [50, 60, 70, 80]])

In [0]:
# similar concept, but np.asarry() but doesn't make a copy of the array in np
array = np.zeros(10)
print(array)
array is np.asarray(array)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


True

In [0]:
array is np.array(array)

False

### Indexing arrays

In [0]:
# you can index arrays just like a list
# it goes [outermost section index, next outmost, next outmost, until section you want index]
big_array =np.array([[1,2,3],[2,3,4],[4,5,6],[6,7,8]])


In [0]:
# first index of the first index of the full array
big_array[0,0]

1

In [0]:
# second, second
big_array[1, 1]

3

In [0]:
#last item in the last section of array
big_array[-1, -1]

8

In [0]:
nd_array =np.array([[[1,2,3],[2,3,4],[4,5,6],[6,7,8]],
                   [[9,8,7],[6,5,4],[3,2,1],[8,7,6]]])



In [0]:
# first array,  first array, first item
nd_array[0,0,0]

1

In [0]:
# last array, last section, last item in that section
nd_array[-1,-1,-1]

6

In [0]:
nd_array[0, 0]

array([1, 2, 3])

In [0]:
# you can extract columns are rows like so
# it'll technically run without the colon, but good practice for readability
nd_array[0, :]

array([[1, 2, 3],
       [2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [0]:
nd_array[0, 0, :]

array([1, 2, 3])

### Numpy Array Methods

In [0]:
k = np.array((1, 5, 2, 4, 3, 5))

In [0]:
# sorting the array 
k.sort()
k

array([1, 2, 3, 4, 5, 5])

In [0]:
# sum the array
k.sum()

20

In [0]:
# mean
k.mean()

3.3333333333333335

In [0]:
# max value in array
k.max()

5

In [0]:
# need to know index of the biggest number? (might be good for mean vs median)
k.argmax()

4

In [0]:
# printing the cumulative product of the elements 
# (so index 0 + index 1, show sum @ index 1, 
# then that sum plus index 2 and show that sum, and so forth)
print("Original, sorted array for reference :\n",k, "\nCumulative Summing of array: ")
k.cumsum()

Original, sorted array for reference :
 [1 2 3 4 5 5] 
Cumulative Summing of array: 


array([ 1,  3,  6, 10, 15, 20])

In [0]:
# variance
k.var()

2.2222222222222223

In [0]:
#standard deviation
k.std()

1.4907119849998598

In [0]:
# change the shape
k.shape = (2, 3)
k

array([[1, 2, 3],
       [4, 5, 5]])

In [0]:
# transpose an array
k.T

array([[1, 4],
       [2, 5],
       [3, 5]])

*This is important for thinking about matrix multiplication*

#### Interesting tid bits about numpy methods

In [0]:
# interesting, if you have an ascending array (numbers progressively going up)
# you can return the the index of the first number bigger than your search
# with method searchsorted() (meaning if you perform 
# arrayVariableName.searchsorted(x) it'll return the first element 
# in the array that is >= x)
arr = np.array((1, 2, 3, 4, 5))

In [0]:
arr.searchsorted(2.9)
# returns index 2, which is number 3

2

In [0]:
arr.searchsorted(4.2)
# returns array 4 which is number 5

4

In [0]:
# searchsorted() does NOT work on arrays in descending order
descending_arr = np.array((5,4,3,2,1))

In [0]:
descending_arr.searchsorted(3.5)

5

In [0]:
descending_arr.searchsorted(3)

0

## Generating random data
[Numpy Docs](https://docs.scipy.org/doc/numpy/reference/routines.random.html?highlight=numpy%20random#module-numpy.random)

[Numpy docs at SciPy](https://docs.scipy.org/doc/numpy/reference/routines.random.html)

In [0]:
random_values_in_a_shape = np.random.rand(1, 10)
random_values_in_a_shape

array([[0.69556079, 0.57819189, 0.01856553, 0.45801811, 0.03054476,
        0.73861746, 0.42823007, 0.24856549, 0.1156036 , 0.22843729]])

In [0]:
sample_from_standard_normal_distribution = np.random.randn(1, 10)
sample_from_standard_normal_distribution

array([[-0.8463874 ,  0.08172225, -0.44262901, -0.11213948,  0.09171028,
        -0.34423985,  1.78312416, -1.32236769,  0.7043338 ,  0.76415305]])

#### randint()


```
randint(low[, high, size, dtype])	
```
Returns random integer(s) from low (inclusive) to high (exclusive).
Data type is np.int by default


In [0]:
one_random_int_between_1_and_9 = np.random.randint(1, 10)
one_random_int_between_1_and_9

1

In [0]:
ten_random_int_array_between_1_and_9 = np.random.randint(1, 10, 10)
ten_random_int_array_between_1_and_9


array([8, 2, 3, 8, 6, 3, 7, 9, 1, 4])

In [0]:
# syntax is start (inclusive), high/stop (exclusive), size=(default is 1, 1)
random_2_x_4_array_from_1_to_10 = np.random.randint(1, high=11, size=(2, 4))
random_2_x_4_array_from_1_to_10

array([[ 9,  6,  5, 10],
       [ 9,  3,  2,  1]])

#### random.random_sample(), random.random(), random.ranf(),  or random.sample()


```
random_sample([size])
```
or

```
random([size])
```
or

```
ranf([size])
```
or

```
sample([size])
```

**All** return a random sample of floats between 0 and 1 (exclusive) in a nicely formatted array


In [0]:
# takes a list as the size

random_float_sample_between_zero_and_just_under_one = np.random.random_sample(10)
random_float_sample_between_zero_and_just_under_one

array([0.92058339, 0.21013644, 0.85995138, 0.99034373, 0.49669269,
       0.59616834, 0.31523762, 0.52436179, 0.05034597, 0.18143326])

In [0]:
random_float_sample_of_two_arrays = np.random.random_sample([2, 20])
random_float_sample_of_two_arrays

array([[0.38299904, 0.36644004, 0.54304106, 0.73379193, 0.05793993,
        0.62102052, 0.18089466, 0.8382367 , 0.10737901, 0.45240991,
        0.68537936, 0.16880801, 0.67660043, 0.05975467, 0.76340987,
        0.87679131, 0.52286963, 0.44784238, 0.95897922, 0.33545974],
       [0.61081149, 0.11419361, 0.43048066, 0.55863775, 0.57762937,
        0.93966148, 0.66238151, 0.99867229, 0.81313705, 0.90615923,
        0.83105368, 0.83909728, 0.07410365, 0.94264963, 0.53341541,
        0.44045538, 0.3611984 , 0.74123504, 0.86907127, 0.67691111]])

In [0]:
showing_random_random = np.random.random([1, 7])
showing_random_random

array([[0.67555573, 0.19699413, 0.9147253 , 0.30000202, 0.10106367,
        0.16002321, 0.1770636 ]])

### choice()



```
 choice(a, size=None, replace=True, p=None)
```
* a - 1-d array-like or integer (start is inclusive, stop exclusive)
* size - integer or tuple of integers, default is 1
* replace - True by default, meaning that items can be repeated, False means every returned item in the array must be unique
* p - 1-D array-like probabilities associated with each number in "a", default is uniform
  - number of float numbers have to match the size of "a" AND probabilities have to add up to 1 (aka 100%)



In [0]:

set_of_10_random_integers = np.random.choice(5, 10)
set_of_10_random_integers

array([2, 1, 2, 4, 3, 3, 4, 0, 3, 2])

In [0]:
custom_probabilities = np.random.choice(5, 10, p=[0.3, 0.1, 0.1, 0.5, 0])
custom_probabilities

array([2, 2, 3, 2, 0, 3, 3, 0, 3, 3])

This code is taken from Numpy docs about how you can also use strings

- this goes over all of the items in the list aa_milne_arr
and prints 5 items, with each item in the list having a different probability of being selected

- notice how it also returns the datatype, which is very nice

In [0]:
aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher']
np.random.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3])


array(['piglet', 'pooh', 'rabbit', 'pooh', 'Christopher'], dtype='<U11')

### Multivariate normal, multiormal, aka Gaussian distribution

```
multivariate_normal(mean, cov[, size, check_valid, tol])
```

   *  mean : 1-D array_like, of length N
      Mean of the N-dimensional distribution.
   *  cov : 2-D array_like, of shape (N, N)
      Covariance matrix of the distribution. It must be symmetric and
      positive-semidefinite for proper sampling.
  *   size : int or tuple of ints, optional
      Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are
      generated, and packed in an `m`-by-`n`-by-`k` arrangement.  Because
      each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``.
      If no shape is specified, a single (`N`-D) sample is returned.
  *   check_valid : { 'warn', 'raise', 'ignore' }, optional
      Behavior when the covariance matrix is not positive semidefinite.
   *  tol : float, optional
      Tolerance when checking the singular values in covariance matrix.
   

[Gaussian distribution](https://en.wikipedia.org/wiki/Normal_distribution)


run this if you want more specifics:


```
help(np.random.multivariate_normal)
```




In [0]:
help(np.random.multivariate_normal)

Help on built-in function multivariate_normal:

multivariate_normal(...) method of mtrand.RandomState instance
    multivariate_normal(mean, cov[, size, check_valid, tol])
    
    Draw random samples from a multivariate normal distribution.
    
    The multivariate normal, multinormal or Gaussian distribution is a
    generalization of the one-dimensional normal distribution to higher
    dimensions.  Such a distribution is specified by its mean and
    covariance matrix.  These parameters are analogous to the mean
    (average or "center") and variance (standard deviation, or "width,"
    squared) of the one-dimensional normal distribution.
    
    Parameters
    ----------
    mean : 1-D array_like, of length N
        Mean of the N-dimensional distribution.
    cov : 2-D array_like, of shape (N, N)
        Covariance matrix of the distribution. It must be symmetric and
        positive-semidefinite for proper sampling.
    size : int or tuple of ints, optional
        Given a sha

In [0]:
x, y = np.random.multivariate_normal([2, 3], [[0.3, 0], [0,  0.5]], 10).T


In [0]:
x

array([2.1419047 , 2.45065224, 2.12161889, 1.73367696, 3.74314429,
       2.24001386, 1.93005535, 1.80448549, 1.96514239, 1.98595816])

In [0]:
y

array([3.56253664, 3.21213273, 2.88869718, 1.59884805, 2.89022774,
       3.58136277, 3.19127819, 1.43251027, 2.6962604 , 3.7854386 ])

## Distributions & Permutations

[Distributions & Permutations Docs](https://docs.scipy.org/doc/numpy/reference/routines.random.html#permutations)

## Operations on Arrays

In [0]:
# ten digits, between 1 and 3
array1 = np.linspace(1, 3, 10)

# ten digits between 1 and 5
array2 = np.linspace(1, 5, 10)


print("Array1 : ",array1)
print("Array2 :", array2)


Array1 :  [1.         1.22222222 1.44444444 1.66666667 1.88888889 2.11111111
 2.33333333 2.55555556 2.77777778 3.        ]
Array2 : [1.         1.44444444 1.88888889 2.33333333 2.77777778 3.22222222
 3.66666667 4.11111111 4.55555556 5.        ]


In [0]:
# adding the two arrays together
addition = array1 + array2
addition

array([2.        , 2.66666667, 3.33333333, 4.        , 4.66666667,
       5.33333333, 6.        , 6.66666667, 7.33333333, 8.        ])

In [0]:
# check out linear algebra for more on this
multiplication = array1 * array2

multiplication

array([ 1.        ,  1.7654321 ,  2.72839506,  3.88888889,  5.24691358,
        6.80246914,  8.55555556, 10.50617284, 12.65432099, 15.        ])

In [0]:
# you want to add a scalar to the array
# "scales" each digit by the addition of the scalar
scalar = array1 + 10
scalar

array([11.        , 11.22222222, 11.44444444, 11.66666667, 11.88888889,
       12.11111111, 12.33333333, 12.55555556, 12.77777778, 13.        ])

In [0]:
# you want to add a scalar to the array
# "scales" each digit by the multiplication of the scalar
scalar_multi = array1 * 10
scalar_multi

array([10.        , 12.22222222, 14.44444444, 16.66666667, 18.88888889,
       21.11111111, 23.33333333, 25.55555556, 27.77777778, 30.        ])

In [0]:
# now doing it with nd arrays
A = np.ones((2,4))
B = np.ones((2,4))

In [0]:
# this is NOT MATRIX MULTIPLICATION, this is an element-wise product
A * B

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

## Matrix multiplication
> *This will be handy for neural networks*

In [0]:
# THIS IS MATRIX MULITPLICATION
# the columns in the first matrix have to match the rows in the second matrix
# syntax is np.array(rows x columns) for 2d matrix
# in other words:
    # If A is an n × m matrix and B is an m × p matrix, 
    # their matrix product AB is an n × p matrix,
C = np.ones((2, 5))
D = np.ones((5, 4))
C @ D

array([[5., 5., 5., 5.],
       [5., 5., 5., 5.]])

In [0]:
# you can also use @ to take the inner product of two flat arrays
flat1 = np.array((1, 3))
flat2 = np.array((10, 20))
flat1 @ flat2
# this returns 70 because it's (1x10 + 3x20)

70

In [0]:
# you can also do this with a tuple of the same size array
flat1 @ (0, 2)

6

## Mutability and Copying
> Arrays act as lists in python, so they are mutable

In [0]:
another_array = np.array([0, 1])
another_array

array([0, 1])

In [0]:
# changing array elements through index
another_array[1] = 2
another_array

array([0, 2])

In [0]:
# Can you add a new digit in the array by calling its index? Nope
another_array[2] = 4
another_array

IndexError: ignored

In [0]:
# BUT!
# Change the same array with some random digits?
  # which is wild because if you wanted to add, from indexing, a new digit the program
  # crashes
another_array = np.random.randn(3)
another_array

array([-0.62632114,  0.00660194, -0.01200653])