# NumPy

NumPy is short for Numerical Python.  It is standard to use the alias ``np`` when importing.
~~~
import numpy as np
~~~

In [47]:
import numpy as np
from numpy.random import randn

The main object that we will use is the `ndarray`.  The ndarray is a generic multidimensional container for homogeneous data (all the elements must be the same data type).  We will mostly be using 1D and 2D arrays, but they can potentially have many more dimensions.  You will know the dimension of the array by the number of square brackets.

## Creating NumPy Arrays

### From Lists
~~~
np.array(my_list)
~~~

In [48]:
list_one = [1, 2, 3]
list_two = [[1, 1, 1], [2, 2, 2], [3, 3, 3]]

In [49]:
np.array(list_one)

array([1, 2, 3])

In [51]:
two = np.array(list_two)

In [52]:
two.ndim

2

### From built-in functions
~~~
np.arange(start, stop, step) #exclusive of stop
np.zeros(n) OR np.zeros([n,m])
np.ones(n) OR np.ones([n,m])
np.full(n, fill) OR np.full([n,m], fill)
np.linspace(start, stop, n) 
np.eye(n) 
~~~

In [5]:
np.full([3,4], 20)

array([[20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20]])

In [6]:
np.arange(1,21).reshape(4,5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20]])

In [7]:
np.linspace(0,1,10)

array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])

In [53]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [8]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### From random functions
~~~
np.random.rand(n) OR np.random.rand(n,m)
np.random.randint(low, high, n) OR np.random.randint(low, high, [n,m]) #exclusive of high
np.random.randn(n) OR np.random.randn(n,m)
~~~

In [62]:
b = np.random.rand(5,2)

In [63]:
b

array([[0.83843401, 0.06464173],
       [0.71705562, 0.58584449],
       [0.44020022, 0.94202098],
       [0.11855606, 0.17280969],
       [0.45600763, 0.70747501]])

AttributeError: 'numpy.ndarray' object has no attribute 'iloc'

In [64]:
b.max()

0.942020982327526

In [65]:
b.argmax()

5

In [58]:
a = np.random.randint(0,10,5)

In [59]:
a

array([0, 4, 2, 8, 3])

In [60]:
a.max()

8

In [61]:
a.argmax()

3

In [11]:
randn(10)

array([-0.62835546, -0.32599951,  0.49229583,  1.43567796, -0.92345402,
       -0.16596574,  1.19309763,  0.15378232, -2.48674932,  0.05841425])

## Array attributes and methods

### Some common attributes
~~~
arr = np.random.rand(10)
arr.ndim   #number of dimensions
arr.shape  #tuple of integers of length dnim
arr.size   #total number of elements
arr.dtype
arr.T # the tanspose
~~~

In [54]:
arr = np.random.rand(10,5)
arr

array([[0.59444858, 0.86123586, 0.16567258, 0.67530354, 0.59397409],
       [0.59161784, 0.76100015, 0.41245382, 0.87183717, 0.7775405 ],
       [0.36403135, 0.11866452, 0.75954601, 0.67748114, 0.71616198],
       [0.91701315, 0.1205372 , 0.14405668, 0.97473384, 0.37787789],
       [0.65404856, 0.52848887, 0.44782598, 0.76827563, 0.04979173],
       [0.35768507, 0.41163571, 0.10403565, 0.68200652, 0.73571329],
       [0.15725297, 0.35053016, 0.50272418, 0.18859659, 0.32913854],
       [0.32646072, 0.97898428, 0.93226831, 0.23824124, 0.63742977],
       [0.04752786, 0.11835148, 0.15526745, 0.35438908, 0.79335794],
       [0.42253939, 0.99990051, 0.58286483, 0.99385283, 0.32406872]])

In [56]:
arr.dtype

dtype('float64')

In [14]:
arr.shape

(10, 5)

In [15]:
arr.size

50

In [16]:
arr.dtype

dtype('float64')

In [17]:
arr

array([[0.16437522, 0.66721055, 0.79848721, 0.46038474, 0.42991773],
       [0.57405016, 0.63585318, 0.58237994, 0.33509575, 0.90410329],
       [0.1043764 , 0.95153177, 0.96160581, 0.81729035, 0.31922106],
       [0.44694489, 0.28242473, 0.36772317, 0.41431332, 0.75721761],
       [0.87225897, 0.63077061, 0.95384028, 0.0925588 , 0.54279661],
       [0.69116845, 0.14545485, 0.59139375, 0.10043208, 0.38287881],
       [0.23643857, 0.70013483, 0.65772819, 0.46089169, 0.17796269],
       [0.76709222, 0.41106164, 0.02568539, 0.99039227, 0.19582752],
       [0.18509337, 0.23992547, 0.87970429, 0.487342  , 0.30467638],
       [0.6464485 , 0.02541506, 0.19761221, 0.22408126, 0.78051692]])

In [18]:
arr.T

array([[0.16437522, 0.57405016, 0.1043764 , 0.44694489, 0.87225897,
        0.69116845, 0.23643857, 0.76709222, 0.18509337, 0.6464485 ],
       [0.66721055, 0.63585318, 0.95153177, 0.28242473, 0.63077061,
        0.14545485, 0.70013483, 0.41106164, 0.23992547, 0.02541506],
       [0.79848721, 0.58237994, 0.96160581, 0.36772317, 0.95384028,
        0.59139375, 0.65772819, 0.02568539, 0.87970429, 0.19761221],
       [0.46038474, 0.33509575, 0.81729035, 0.41431332, 0.0925588 ,
        0.10043208, 0.46089169, 0.99039227, 0.487342  , 0.22408126],
       [0.42991773, 0.90410329, 0.31922106, 0.75721761, 0.54279661,
        0.38287881, 0.17796269, 0.19582752, 0.30467638, 0.78051692]])

In [19]:
arr.T.shape

(5, 10)

### Some common methods

~~~
arr.reshape(rows,cols)
arr.max()
arr.argmax()
arr.min()
arr.argmin()
arr.sum()
arr.cumsum()
~~~

In [20]:
arr

array([[0.16437522, 0.66721055, 0.79848721, 0.46038474, 0.42991773],
       [0.57405016, 0.63585318, 0.58237994, 0.33509575, 0.90410329],
       [0.1043764 , 0.95153177, 0.96160581, 0.81729035, 0.31922106],
       [0.44694489, 0.28242473, 0.36772317, 0.41431332, 0.75721761],
       [0.87225897, 0.63077061, 0.95384028, 0.0925588 , 0.54279661],
       [0.69116845, 0.14545485, 0.59139375, 0.10043208, 0.38287881],
       [0.23643857, 0.70013483, 0.65772819, 0.46089169, 0.17796269],
       [0.76709222, 0.41106164, 0.02568539, 0.99039227, 0.19582752],
       [0.18509337, 0.23992547, 0.87970429, 0.487342  , 0.30467638],
       [0.6464485 , 0.02541506, 0.19761221, 0.22408126, 0.78051692]])

In [21]:
arr.max()

0.9903922659222898

In [22]:
arr.argmax()

38

In [23]:
arr.sum()

24.572090540189443

In [24]:
np.sum(arr)

24.572090540189443

In [25]:
arr.shape

(10, 5)

In [26]:
arr.reshape(25,2)

array([[0.16437522, 0.66721055],
       [0.79848721, 0.46038474],
       [0.42991773, 0.57405016],
       [0.63585318, 0.58237994],
       [0.33509575, 0.90410329],
       [0.1043764 , 0.95153177],
       [0.96160581, 0.81729035],
       [0.31922106, 0.44694489],
       [0.28242473, 0.36772317],
       [0.41431332, 0.75721761],
       [0.87225897, 0.63077061],
       [0.95384028, 0.0925588 ],
       [0.54279661, 0.69116845],
       [0.14545485, 0.59139375],
       [0.10043208, 0.38287881],
       [0.23643857, 0.70013483],
       [0.65772819, 0.46089169],
       [0.17796269, 0.76709222],
       [0.41106164, 0.02568539],
       [0.99039227, 0.19582752],
       [0.18509337, 0.23992547],
       [0.87970429, 0.487342  ],
       [0.30467638, 0.6464485 ],
       [0.02541506, 0.19761221],
       [0.22408126, 0.78051692]])

## Indexing and Slicing



In [69]:
arr = np.arange(10,20)
arr2d = np.random.randint(1,11,[5,5])

### 1-d arrays
~~~
arr[i]
arr[i:j]
arr[:j]
arr[i:]
arr[i:j:step]
~~~

In [28]:
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [82]:
arr[0:6:2]

array([0, 2, 4])

In [30]:
arr[1:6]

array([11, 12, 13, 14, 15])

### 2-d arrays
~~~
arr2d[row]
arr2d[row][col] OR arr2d[row, col]
arr2d[:i, j:]
arr2d[[a,b,c,d]] # will get rows with index a, b, c and d
~~~


In [31]:
arr2d

array([[ 7,  8,  5,  7, 10],
       [ 3,  7,  3,  6,  3],
       [ 1,  9,  4,  8, 10],
       [ 6,  9,  2,  4,  3],
       [10,  9,  7,  8,  6]])

In [33]:
arr2d[[0,2,4]]

array([[ 7,  8,  5,  7, 10],
       [ 1,  9,  4,  8, 10],
       [10,  9,  7,  8,  6]])

#### Important Note:
Array slices are *views* of the original array.  Changing any values in the slice will also change values in the original array.  

In [70]:
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [71]:
arr_slice = arr[0:5]

In [72]:
arr_slice

array([10, 11, 12, 13, 14])

In [73]:
arr_slice[0] = 1234567

In [74]:
arr_slice

array([1234567,      11,      12,      13,      14])

In [75]:
arr

array([1234567,      11,      12,      13,      14,      15,      16,
            17,      18,      19])

If a copy of a slice is needed, use .copy()

In [76]:
arr = np.arange(11)

In [77]:
arr_slice = arr[0:5].copy()

In [78]:
arr_slice

array([0, 1, 2, 3, 4])

In [79]:
arr_slice[0] = 1234567

In [80]:
arr_slice

array([1234567,       1,       2,       3,       4])

In [81]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

### Boolean Indexing
(Boolean indexing creates a copy)

In [83]:
arr = np.array([1,1,1,3,3,3,4,5,4])

In [84]:
arr2d = np.random.randn(9,5)

In [85]:
arr2d

array([[ 2.73667983e-01, -2.05314892e-01,  8.28804723e-01,
         9.36495061e-01, -1.28283150e+00],
       [ 5.89122589e-02,  1.14784168e+00,  9.00700642e-02,
        -8.99803719e-01, -1.44356364e-01],
       [ 4.51805668e-02,  1.07012909e+00, -4.97310291e-01,
        -1.16748273e+00,  3.61262121e-01],
       [-9.40249593e-01, -2.15541658e+00,  1.36971050e+00,
         4.13224574e-01,  1.62478198e+00],
       [ 3.82733184e-03,  2.75619409e-01,  5.17378713e-01,
         1.53353490e-01, -5.18183688e-01],
       [ 2.59420970e-02, -1.16074596e+00,  1.61735654e+00,
         4.88588390e-01,  1.03591335e+00],
       [-4.31601761e-01, -2.53857966e-01, -9.07106440e-01,
        -1.38058677e+00,  8.72579182e-01],
       [ 1.36743438e+00,  1.17558300e+00, -1.48132821e+00,
         2.55935579e-01, -1.96210496e-04],
       [-1.66761801e-01, -4.49665248e-01,  9.31688570e-01,
         1.42458574e+00,  2.18974769e+00]])

In [86]:
arr[arr>3]

array([4, 5, 4])

In [87]:
arr_part = arr[arr>3]

In [88]:
arr_part

array([4, 5, 4])

In [89]:
arr_part[0] = 121212

In [90]:
arr_part

array([121212,      5,      4])

In [91]:
arr

array([1, 1, 1, 3, 3, 3, 4, 5, 4])

In [None]:
arr2d[arr2d>0]

In [None]:
arr2d[arr>3]

In [None]:
arr2d[arr2d < 0] = 0

In [None]:
arr2d

In order to combine multiple boolean conditions, use ``&`` and ``|`` (instead of ``and`` and ``or``).  Also use parentheses to separate conditions.

In [None]:
arr2d[(arr == 1) | (arr > 4)] #

In [None]:
arr2d[(arr == 1) or (arr > 4)]

### Fancy Indexing
Fancy indexing refers to indexing using integer arrays.  **The result of fancy indexing is always a one-dimensional array.**

In [40]:
arr = np.empty((8,4))

In [41]:
for i in range(8):
    for j in range(4):
        arr[i,j] = i + (j+1)/10
arr

array([[0.1, 0.2, 0.3, 0.4],
       [1.1, 1.2, 1.3, 1.4],
       [2.1, 2.2, 2.3, 2.4],
       [3.1, 3.2, 3.3, 3.4],
       [4.1, 4.2, 4.3, 4.4],
       [5.1, 5.2, 5.3, 5.4],
       [6.1, 6.2, 6.3, 6.4],
       [7.1, 7.2, 7.3, 7.4]])

In [42]:
arr[[0,2,4,6]]

array([[0.1, 0.2, 0.3, 0.4],
       [2.1, 2.2, 2.3, 2.4],
       [4.1, 4.2, 4.3, 4.4],
       [6.1, 6.2, 6.3, 6.4]])

In [43]:
arr[:2,3:]

array([[0.4],
       [1.4]])

In [44]:
arr[[2,0,7]]

array([[2.1, 2.2, 2.3, 2.4],
       [0.1, 0.2, 0.3, 0.4],
       [7.1, 7.2, 7.3, 7.4]])

Be careful when using multiple array indexes.  It might not do what you expect.

In [45]:
arr[[2,0,7], [1,2,3,0]]

IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (3,) (4,) 

In [46]:
arr[[2,0,7]][:,[1,2,3]]

array([[2.2, 2.3, 2.4],
       [0.2, 0.3, 0.4],
       [7.2, 7.3, 7.4]])

## Universal Functions
Universal functions, sometimes called a *ufunc* is a function that performs element-wise operations on data in ndarrays.  There are *unary* ufuncs, that take as input a single ndarray and *binary* ufuncs that take two ndarrays as input and return a single array.  

* Examples of unary ufuncs:  ``sqrt``, ``square``, ``exp``, ``log``, ``isnan``, ``cos`` (and other trig functions)
* Examples of binary ufuncs: ``add``, ``power``, ``mod``

(See Tables 4-3 and 4-4 on pgs 107-108 in *Python for Data Analysis*)


In [92]:
arr1 = np.linspace(0,1,11)

In [93]:
arr1

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [None]:
arr2 = np.random.randint(10,100,11)

In [None]:
arr2

In [94]:
np.square(arr1)

array([0.  , 0.01, 0.04, 0.09, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81, 1.  ])

In [None]:
np.exp(arr1)

In [None]:
np.power(arr2,arr1)

In [95]:
arr

array([1, 1, 1, 3, 3, 3, 4, 5, 4])

In [96]:
arr2d

array([[ 2.73667983e-01, -2.05314892e-01,  8.28804723e-01,
         9.36495061e-01, -1.28283150e+00],
       [ 5.89122589e-02,  1.14784168e+00,  9.00700642e-02,
        -8.99803719e-01, -1.44356364e-01],
       [ 4.51805668e-02,  1.07012909e+00, -4.97310291e-01,
        -1.16748273e+00,  3.61262121e-01],
       [-9.40249593e-01, -2.15541658e+00,  1.36971050e+00,
         4.13224574e-01,  1.62478198e+00],
       [ 3.82733184e-03,  2.75619409e-01,  5.17378713e-01,
         1.53353490e-01, -5.18183688e-01],
       [ 2.59420970e-02, -1.16074596e+00,  1.61735654e+00,
         4.88588390e-01,  1.03591335e+00],
       [-4.31601761e-01, -2.53857966e-01, -9.07106440e-01,
        -1.38058677e+00,  8.72579182e-01],
       [ 1.36743438e+00,  1.17558300e+00, -1.48132821e+00,
         2.55935579e-01, -1.96210496e-04],
       [-1.66761801e-01, -4.49665248e-01,  9.31688570e-01,
         1.42458574e+00,  2.18974769e+00]])

In [99]:
arr2d.sum(axis=0)

array([ 0.23635146, -0.55582746,  2.46926417,  0.22430962,  4.13871656])