# NumPy

## ALWAYS do: import numpy as np
This is a convention that everyone follows. If you do not do this, other people will have a hard time reading your code

In [2]:
import numpy as np

'1.14.2'

## Numpy arrays
- arrays can only contain data of the same data type (similar to atomic vectors or matrices in R)

## Array creation
- direct creation with `np.array()`
- Create a list with square brackets, and put that inside `np.array()`

In [16]:
np.array([1,2,3])

array([1, 2, 3])

In [18]:
a = np.array([1, 2, 3])
print(a) # printing an array appears different from the array([]) in ipython

[1 2 3]


In [12]:
b = np.array([1, 2, 3.0])
print(b) # the 3.0 is a float and will coerce other values to floats

[1. 2. 3.]


In [23]:
c = np.array([1, 2, "3"]) # coerced to strings
print(c)

['1' '2' '3']


If you provide a list of lists, you can create a multi-dimensional array. (Like a matrix)

In [7]:
d = np.array([ [1,2,3],[4,5,6] ])
print(d)

[[1 2 3]
 [4 5 6]]


but if the dimensions don't match, you'll get an array of lists... which is not as useful.

In [9]:
e = np.array([ [1,2,3],[4,5] ])
print(e)

[list([1, 2, 3]) list([4, 5])]


## Other ways to make arrays

In [13]:
np.zeros(5) # makes a vector of 0s. similar to rep(0, 5)

array([0., 0., 0., 0., 0.])

In [15]:
np.zeros([2,4])  # makes an array 2x4

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [19]:
np.zeros([2,3,4]) # 3 dimensional array 2 x 3 x 4... 
# notice the order of creation: 2 'sheets' or 3 rows by 4 columns

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [17]:
np.ones(5)  # similar, but inserts ones

array([1., 1., 1., 1., 1.])

In [21]:
np.full([2,3], 1.2)  # similar, but you specify one value that gets repeated

array([[1.2, 1.2, 1.2],
       [1.2, 1.2, 1.2]])

## Making arrays of random numbers
numpy uses the Mersenne Twister
- All random generator functions begin with `np.random.`

In [51]:
np.random.seed(1)  # seed the generator for reproducibility

In [52]:
np.random.random(5)  # random.random for random values on the interval [0,1)

array([4.17022005e-01, 7.20324493e-01, 1.14374817e-04, 3.02332573e-01,
       1.46755891e-01])

In [53]:
np.random.randn(5)
# random.randn for random normal from standard normal
# arranged in a 2 x 2 matrix

array([-1.10593508, -1.65451545, -2.3634686 ,  1.13534535, -1.01701414])

In [54]:
np.random.normal(10, 3, [2, 4])
# random.randn for random normal from normal with mean 10 and sd 3
# arranged in a 2 x 4 matrix

array([[11.91208544,  7.42028018, 15.31782289,  6.66891084],
       [10.5436428 , 11.6930346 ,  8.30046931, 12.18992679]])

In [55]:
np.random.randint(0, 10, 20)  # select random integers from 0 inclusive to 10 exclusive
# and return 20 values

array([1, 8, 8, 3, 9, 8, 7, 3, 6, 5, 1, 9, 3, 4, 8, 1, 4, 0, 3, 9])

More random generation at: <https://docs.scipy.org/doc/numpy-1.14.0/reference/routines.random.html>

## Array sequences
make sequences with

- `np.arange(start, stop, step)`  
-  makes an **a**rray **range** from start (inclusive) to stop (exclusive), by step

In [56]:
np.arange(0,100, 5)

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80,
       85, 90, 95])

- `np.linspace(start, stop, num)`
- makes an array of **lin*ear **space**d values beginning with start, ending with stop, with a length of num

In [59]:
np.linspace(0, 100, 11)

array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

In [63]:
np.linspace(0, 100, 10)

array([  0.        ,  11.11111111,  22.22222222,  33.33333333,
        44.44444444,  55.55555556,  66.66666667,  77.77777778,
        88.88888889, 100.        ])

In [65]:
np.linspace(0, 100, 10, endpoint = False)  # optional parameter endpoint to exclude the stop value

array([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90.])

# Array Attributes
- `array.ndim` for dimensions
- `array.shape` for the size of each dimension
- `array.dtype` for the data type 

In [67]:
x = np.ones([3,4])
print(x)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [68]:
x.ndim

2

In [69]:
x.shape

(3, 4)

In [70]:
x.dtype

dtype('float64')

In [149]:
y = np.arange(0,12, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [151]:
y.shape # a one dimensional array

(12,)

## Reshaping Arrays
- `np.reshape(array, [new shape])` is the reshape function
- `array.T` is the transpose method, but leaves the original array unaffected


In [98]:
j = np.arange(0,12,1)
print(j)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [99]:
k = np.reshape(j, [3,4])  # note that it fills row-wise unlike R
print(k)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [100]:
print(k.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [105]:
print(k) # calling k.T does not modify the original k array

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [106]:
# can combine the above methods and steps into one:
l = np.arange(0,12,1).reshape([3,4]).T
# create a-range >> reshape >> transpose
print(l)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [159]:
y = np.arange(0,12, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [160]:
y.shape

(12,)

In [154]:
print(y.T) # the transpose of a one dimensional array doesn't suddenly give it a second dimension

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [161]:
y.T.shape

(12,)

In [156]:
z = np.reshape(y, [1,12])
print(z)

[[ 0  1  2  3  4  5  6  7  8  9 10 11]]


In [163]:
z.shape

(1, 12)

In [157]:
print(z.T)

[[ 0]
 [ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]]


In [164]:
z.T.shape

(12, 1)

# Subsetting and Slicing Arrays
- very similar to subsetting and slicing lists

In [117]:
y = np.arange(0,12, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [118]:
y[4]

4

In [144]:
y.shape

(12,)

In [119]:
y[4:6]

array([4, 5])

you can slice with a second colon. The array gets subset with `array[start:stop:step]`

In [120]:
y[1:8:3]

array([1, 4, 7])

Subsetting and slicing higher dimensional arrays is similar, and uses a comma to separate subsetting instructions for each dimension.

In [121]:
z = np.reshape(y, [3,4])
print(z)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [134]:
print(z[1,2]) # returns what is at row index 1, col index 2

6


In [135]:
type(z[1,2]) # with only one value, the type is the integer. It is no longer an array.

numpy.int32

In [137]:
z[0:2, 0:2]

array([[0, 1],
       [4, 5]])

In [138]:
type(z[0:2, 0:2])  # the type remains a numpy array

numpy.ndarray

In [139]:
print(z[2, :]) # returns row at index 2

[ 8  9 10 11]


In [142]:
z[2, :].shape  # the shape is one dimensional

(4,)

In [143]:
print(z[:,2]) # returns column at index 2

[ 2  6 10]


In [145]:
z[:,2].shape # shape is one dimensional

(3,)