## Storage and Manipulation of Numerical Arrays using NumPy
<img src="http://www.numpy.org/_static/numpy_logo.png" width="150" align="left"/>

In [None]:
import numpy as np # np is a commonly used shorthand

### (and simple visualization with Matplotlib)

In [None]:
import matplotlib.pyplot as plt # standard plotlyb import using the alias plt

#### In Python,  `list` is the built-in flexible container

In [None]:
my_list = [1, 1.0, 'a string', True] # a list of items of different type

Here is a list containing the types of the items in my_list

In [None]:
my_list_types = [type(i) for i in my_list]
print(my_list_types)
list_from_range = [i for i in range(4)]
print(list_from_range)

#### Here is how to retrieve some properties of a list

In [None]:
list_of_int = list(range(11))
print( 'size\t', 'type of container\t\t', 'type of items')
print( len(list_of_int), '\t', type(list_of_int), '\t\t', type(list_of_int[0]) ) 

#### If the `list` contains items of the same type, it is more efficient to use a fixed-type array: a NumPy array can be created from a list
`NumPy` arrays are collections of items of the same type which can be efficiently saved/loaded, stored, and manipulated.

In [None]:
print( 'size\t', 'type of container\t\t', 'type of items')
array_of_int = np.array(list_of_int)
print( array_of_int.size, '\t', type(array_of_int), '\t', type(list_of_int[0]) )

#### The type of the items in a NumPy array can be converted using `astype`

In [None]:
print('Array of integers:\n',array_of_int)
array_of_str = array_of_int.astype(str) # from integer to string
print('Array of strings of integers:\n',array_of_str)
array_of_float = array_of_int.astype(float) # from integer to float
print('Array of floating-point numbers:\n',array_of_float)
array_of_str = array_of_float.astype(str) # from float to string
print('Array of strings of numbers with one trailing zero:\n',array_of_str)

#### Number formatting
To convert numerical arrays into arrays of formatted strings, we can use [`format`](https://pyformat.info/) (useful to customize ticks or legend labels in plots)

In [None]:
array_of_str = np.array(['{:1.4f}'.format(i) for i in array_of_float])
print('Array of strings of numbers with four trailing zero:\n',array_of_str)
array_of_str = np.array(['{:1.0e}'.format(i) for i in array_of_float])
print('Array of strings of numbers in exponent notation:\n',array_of_str)

#### Multidimensional arrays can be created from lists of ranges

In [None]:
array = np.array( [ range(i,i+4) for i in range(5) ] ) 
print( array )

#### Create arrays using `empty`, `zeros`, `ones`

In [None]:
# uninitialized array of shape 2x2, dtype is float64 by default
array = np.empty(shape=(2,2))
print('empty:\n',array)
# float array of shape 2x3 filled with zeros
array = np.zeros(shape=(2,3),dtype=float)
print('zeros:\n',array)
# integer array of shape 3x10 filled with ones
array = np.ones(shape=(3,10),dtype=int)
print('ones:\n',array)

#### Create arrays using  `arange`, `linspace`

In [None]:
# linear sequence from 0 to 10, stepping by 0.3
array = np.arange(start=0,stop=10,step=.3)
print('arange:\n',array)
# 5 values evenly spaced between 0 and 10
array = np.linspace(start=0,stop=10,num=5)
print('linspace:\n',array)

#### Task:
1. create a linear sequence of integers from 0 to 7 stepping by 1 with `arange` and `linspace`
2. create a linear sequence of floating-point numbers from 0 to 7 stepping by 0.25 with `arange` and `linspace`

In [None]:
print('using arange:\n',np.arange(0,8,1)) # type inferred
print('using linspace:\n',np.linspace(0,7,7+1,dtype=int))
print('using arange:\n',np.arange(0,7.1,.25))
print('using linspace:\n',np.linspace(0,7,7*4+1,dtype=float))

#### Array attributes: `size`, `shape`, `ndim`, `dtype`

In [None]:
array = np.ones(shape=(2,3,4),dtype='complex64')
print(array[0,1,0])
print('size:',array.size)
print('shape:',array.shape)
print('ndim:',array.ndim)
print('dtype:',array.dtype)

#### Reshaping of Arrays

In [None]:
print( 'shape: \t\t size:' )
print( array.shape, '\t', array.size )
array = array.reshape( (6, 4) )
print( 'shape: \t\t size:' )
print( array.shape, '\t\t', array.size )

A 1D array can be turned into a 2D array using the `newaxis` keyword

In [None]:
x1 = np.linspace(0,4,5) # 5 items (row vector)
print('1D array - shape:',x1.shape,'ndim:',x1.ndim)
x2_row = x1[np.newaxis,:] # 1 row x 5 columns (row vector)
print('2D array (row vector) - shape:',x2_row.shape,'ndim:',x2_row.ndim)
x2_col = x1[:,np.newaxis] # 5 rows x 1 column (column vector)
print('2D array (column vector) - shape:',x2_col.shape,'ndim:',x2_col.ndim)

#### Unidimensional Array Indexing

In [None]:
x1 = np.linspace(start=0,stop=10,num=11,dtype=int)
print(x1,'\n')
print('first value:',x1[0])
print('second value:',x1[1])
print('last value:',x1[-1])
print('second last value:',x1[-2])

#### Unidimensional Array Slicing 

In [None]:
print('Elements after index 5:\n',x1[5:]) # 1st field: start
print('First five values:\n',x1[:5]) # 2nd field: stop
print('Last five values:\n',x1[-5:])
print('Every Other values:\n',x1[::2]) # 3rd field: skip or/and reverse
print('Reversed:\n',x1[::-1])
print('Reversed Every Other:\n',x1[::-2])

#### Task
Generate the array [9 7 5 3] from `np.linspace(start=0,stop=10,num=11)` and convert it to string with 2 trailing zeros.

In [None]:
array = np.linspace(start=0,stop=10,num=11,dtype=int)[-2:2:-2]
array = np.array(['{:1.2f}'.format(i) for i in array])
print(array)

#### Multidimensional Array Indexing

In [None]:
x2 = np.array([range(i,i+4) for i in x1[:4]])
print(x2)
print('size:',x2.size,', shape:',x2.shape)
print('ndim:',x2.ndim,', dtype:',x2.dtype)
print('(0,0) value:',x2[0,0])
print('(2,3) value:',x2[-2,-1])

#### Multidimensional Array Slicing

In [None]:
print('First Row:\n',x2[0,:])
print('Second Column:\n',x2[:,2])
print('Every Other Row:\n',x2[::2,:])

#### Multidimensional Array Slicing

In [None]:
print('Reversed Row:\n',x2[::-1])
print('Reversed Column:\n',x2[:,::-1])

#### Task
What does `x2[1]` correspond to?

In [None]:
print('Second Row:',x2[1],'=',x2[1,:])

#### Comparison Operators and Masking

In [None]:
x = np.arange(11)
print(x)
print(x>5) # this is a boolean array
print(x[x>5])

#### Task: Modifying Values – one by one or by fancy indexing
Generate a 3x3 zero matrix and turn it into a 3x3 identity matrix

In [None]:
array = np.zeros(shape=(3,3),dtype=bool)
# one by one
array[0,0] = 1
array[1,1] = 1
array[2,2] = 1
# or using fancy indexing
array = np.zeros(shape=(3,3),dtype=bool)
array[[0,1,2],[0,1,2]] = 1 # or array[range(3),range(3)] 
# using np.add.at(array,indeces,1)
np.add.at( array, [range(3),range(3)], 1 ) # sum
print(array)

#### Concatenating and Splitting: `hstack`, `vstack`, `split`

In [None]:
x = np.array(range(7))
print( 'Array:\n', x )
print( 'Concatenate Horizontally:\n', np.hstack((x,x,x)) )
print( 'Concatenate Vertically:\n', np.vstack((x,x,x)) )
x = np.hstack((x,x,x))
x1, x2, x3 = np.split(x,[6,12])
print( 'Split 1D array:' )
print( 'part 1:', x1 )
print( 'part 2:', x2 )
print( 'part 3:', x3 )

#### `np.append`: how to initialize an array in a for loop

In [None]:
a = np.empty(0)
print(a.size)
for i in range(3):
    a = np.append(a, range(3))
print(a,a.size)

#### Splitting: `hsplit`, `vsplit`

In [None]:
x = np.vstack(( np.zeros(4),np.ones(4),np.array(range(4)),np.array(range(4,8)) ) )
print( '4x4 array:\n', x) 
left, right = np.hsplit(x, [2])
print( 'Split Horizontally: left, right = np.hsplit(x, [2]) \nleft:\n', left, '\nright:\n', right )

In [None]:
array = np.random.rand(3,5)
print(array)
one, two, three, four, five = np.hsplit(array,[1,2,3,4])
print(one.T, two.T, three.T, four.T, five.T)

#### Task: 
Generate a 3x5 array of random numbers in the range [0,1) using `np.random.rand()` and split the 2D array into 5 1D arrays of shape(3,1).

In [None]:
array2d = np.random.rand(3,5)
print(array2d)
a,b,c,d,e = np.hsplit(array2d,[1,2,3,4])
for array1d in [a,b,c,d]:
    print(array1d.T) # we print the transpose, i.e. the row vectors

Looping over arrays to operate on each element is very slow<br>
For faster execution, vectorized operations are implemented as Numpy's universal functions (ufuncs), e.g.:

|        operator       | equivalent ufunc |
|:---------------------:|:----------------:|
|           +           |     `np.add`     |
|           -           |   `np.subtract`  |
|           *           |   `np.multiply`  |
|           /           |   `np.divide`  |
|           **          |    `np.power`    |
|           %           |     `np.mod`     |

* trigonometric functions: `np.cos`, `np.sin`, `np.tan`, `np.arcsin`, etc.
* exponential functions: `np.exp`, `np.expm1`
* logarithmic functions: `np.log`, `np.log10`, `np.logp1`
* link to [whole list](https://docs.scipy.org/doc/numpy-1.13.0/reference/ufuncs.html)

#### Task: what results from multiplying the `list` [1 2 3 4] by 3? Is it the same as for the corresponding `NumPy` array?

In [None]:
l = [1, 2, 3, 4]
a = np.array(l)
print(l*3)
print(a*3)

Multiplying a `list` $l$ by an integer $n$ corresponds to generating a new `list` by repeating $l$ $n$ times.<br>
`np.tile()` does the same with `NumPy` arrays.

In [None]:
print(np.tile(a, 3))

### Useful Numpy functions

|  function  |     description    |
|:----------:|:------------------:|
|   np.sum   | sum over all items |
|   np.prod  | multiply all items |
|  np.cumsum |   cumulative sum   |
| np.cumprod | cumulative product |
|   np.mean  |        mean        |
|   np.std   | standard deviation |
|   np.min   |   minimum values   |
|   np.max   |    maximum value   |
|np.gradient |     gradient       |
|  np.trapz  |     integral       |

In [None]:
x1 = np.arange(0,11,1)
print('sum:',np.sum(x1),'or',x1.sum())
print('mean:',np.mean(x1),'or',x1.mean())
print('standard deviation:',np.std(x1),'or',x1.std())
print('max:',np.max(x1),'or',x1.max())
print('min:',np.min(x1),'or',x1.min())

#### Task: Find the mean value of the cosine function between 0 and $\pi/3$

In [None]:
print( np.cos( np.linspace( 0, np.pi/3., 100000 ) ).mean() )

####  Task: show that $\sum_1^{10^7} \frac{1}{n^2} \approx \frac{\pi^2}{6}$ 

In [None]:
x = np.arange(1,1e7,1.)
np.power(x, 2, out=x)
print( np.divide(1, x).sum(), np.pi**2/6)

In [None]:
x = np.arange(1,1e7,1.)
np.power(x, 2, out=x)
print( np.divide(1, x).sum(), np.pi**2/6)

when dealing with large arrays, it is convenient to specify the array where the result of the calculation will be stored, as opposed to creating a temporary array and later copying it into x:

In [None]:
x = np.arange(1,1e7,1.)
x = np.power(x, 2)

#### Task: What happens when we add a 1D array to a 2D array? Find the shapes of x1+x2_row and x1+x2_col:
`x1 = np.linspace(0,4,5)` # 5 items (row vector)<br>
`x2_row = x1[np.newaxis,:]` # 1 row x 5 columns (row vector)<br>
`x2_col = x1[:,np.newaxis]` # 5 rows x 1 column (column vector)

In [None]:
x1 = np.linspace(0,4,5); x2_row = x1[np.newaxis,:]; x2_col = x1[:,np.newaxis]
print('adding two row vectors - shape:', (x1+x2_row).shape )
print('adding a column vector to a row vector - shape:', (x1+x2_col).shape )

### Broadcasting: 

In [None]:
print('we added together arrays of shape (5,) and (5,1) and obtained:\n',x1+x2_col ) 

In [None]:
x1 = np.linspace(0,4,5)
print( 'x1 row vector:\n', x1 )
x2_col = x1[:,np.newaxis] # 5 rows x 1 column (column vector)
print( 'x2_col column vector:\n', x2_col )
print( 'x1 stretched vertically:\n', np.vstack( (x1,x1,x1,x1,x1)) )
print( 'x2_col stretched horizontally:\n', np.hstack( (x2_col,x2_col,x2_col,x2_col,x2_col)) )

#### Task
Compute the [outer product](https://en.wikipedia.org/wiki/Outer_product) between the row vector [1 2 3 4] and the column vector [7 8 9 10]

In [None]:
row = np.arange(1,5,1)
column = np.arange(7,11,1)
print(row.shape, column.shape, column[:,np.newaxis].shape)
row * column[:,np.newaxis] # or np.outer(row,column)

#### Simple Plot with `Matplotlib`
`Matplotlib` is a library for data visualization based on `NumPy` arrays. In the beginning of this notebook, we imported the `plt` interface which has a syntax similar to MATLAB.<br>
The `plt` interface deals with two objects: `Figure` (the whole image or canvas) and `Axes` (the graph or subplot). The current figure and axes can be accessed using `plt.gcf()` and `plt.gca()`, respectively.<br> 
In a notebook, graphics can be embedded as interactive or static plots using the the `%matplotlib` magic command:
```python
%matplotlib notebook # interactive mode
%matplotlib inline   # static mode
```
Here we visualize two 1D arrays in static mode using `plt.plot(x-array, y-array)`. 

In [None]:
%matplotlib inline
x = np.arange(0,4*np.pi,.1)
plt.plot(x,np.cos(x),color='r',linewidth=3,label='cosine')
plt.plot(x,np.sin(x),color='b',linewidth=3,label='sine')
plt.xlabel('$x$ / rad')
plt.ylabel('$f(x)$')
plt.legend(frameon=False) # shows the legend
plt.show() # interact with the operating system and graphics backend

#### Visualize 2D arrays as images with `Matplotlib`'s `imshow()`  
* we generate an empty $z$-array of shape (2001,2001)
* we generate the $x$-vector with values from -1000 to 1000 with spacing 1 (1D `NumPy` array)
* the $y$-vector is the column vector of x
* we assign $x^2 + y^2$ to the z array (broadcasting)
* all items with values $\leq 1000^2$ are set to 1 (masking)
* all other values are set to 0 (masking)
* we plot the resulting 2D array using `imshow()`

#### Visualize 2D arrays as images with `Matplotlib`'s `imshow()`  

In [None]:
radius = 1000
z = np.empty((radius*2+1,radius*2+1)) # uninitialized array of size 2001x2001
x = np.linspace(-radius,radius,radius*2+1,dtype=int) # x 
y = x[:,np.newaxis] # y is the column vector of x
np.add( x**2, y**2, out=z ) # we use broadcasting to create 2001x2001 array by adding a column to a row
z[z<=radius**2] = 1; z[z>radius**2] = 0 # masking
plt.imshow(z,extent=[-radius,radius,-radius,radius]) # we plot the array
plt.colorbar(label='z') # we plot the colorbar
plt.xlabel('x'); plt.ylabel('y')
plt.show()

#### Colors can be easily changed by specifying the [`cmap` parameter](https://matplotlib.org/users/colormaps.html)

In [None]:
plt.imshow(z, extent=[-radius,radius,-radius,radius], cmap=plt.cm.binary_r)
plt.colorbar(label='z'); plt.xlabel('x'); plt.ylabel('y')
circle = plt.Circle((0, 0), radius, color='r', lw=4, ls='--', fill=False) # direct way to plot a circle
ax = plt.gca(); ax.add_artist(circle); plt.show() # adding the circle to the axes object

### Task: compute and plot the function $z=xy\exp[-(x^2+y^2)]$ on a square grid 100x100 with side lengths ranging from -2 to 2

In [None]:
%matplotlib inline
x = np.linspace(-2, 2, 100)
z = np.add( x**2, x[:,np.newaxis]**2) 
np.exp( -z, out=z )
np.multiply( x*x[:,np.newaxis], z, out=z )
plt.imshow(z, extent=[-2,2,2,-2])
plt.colorbar(label='z'); plt.xlabel('x'); plt.ylabel('y'); plt.show()

`plt.imshow()` interprets the 2D array as an image. Therefore, by default the origin is in the $upper$ left corner.<br>
When we plot a 2D array, we have to specify that we want the origin in the $lower$ left corner (origin='lower').

In [None]:
#%matplotlib inline
plt.imshow(z, extent=[-2,2,-2,2], origin='lower')
plt.colorbar(label='z'); plt.xlabel('x'); plt.ylabel('y'); plt.show()

Alternative: build 2D array from 1D array using `np.meshgrid()`.

In [None]:
def f(x,y): # define function f of arguments x and y
    return x*y*np.exp( -(x**2 + y**2) ) # return statement
x = np.linspace(-2, 2, 100)
y = np.linspace(-2, 2, 100)
X, Y = np.meshgrid(x, y) # returns the coordinates of a matrix from two vectors
Z = f(X,Y)
plt.imshow(Z,extent=[-2,2,-2,2],origin='lower')
plt.colorbar(label='z'); plt.xlabel('x'); plt.ylabel('y'); plt.show()

Alternative: visualize data with a filled countour plot.

In [None]:
plt.contourf(X,Y,Z,100,cmap=plt.cm.Blues_r) # 30 equally spaced intervals
plt.colorbar(label='z'); plt.xlabel('x'); plt.ylabel('y'); plt.show()

#### Loading/Saving Numpy arrays from text files

In [None]:
!head -n 3 data/sthlm_2d.csv # maximum temperatures in Stockholm from 1859 to 2016 – http://bolin.su.se/
!sed -n 4730,4733p data/sthlm_2d.csv

In [None]:
sthlm_2d = np.loadtxt('data/sthlm_2d.csv',skiprows=1) # load the tab-separated values file skipping the header  
sthlm_2d = sthlm_2d[~np.isnan(sthlm_2d[:,1])] # remove rows containing NaNs
year = sthlm_2d[:,0] # first column
tmax = sthlm_2d[:,1] # second column

#### Task: show how NaNs are removed by `sthlm_2d[ ~np.isnan( sthlm_2d[:,1] ) ]`
Generate an array `x=np.arange(0,6,1.)`, assign `np.nan` to an item, and print `np.isnan(x)` as well as `~np.isnan(x)`

In [None]:
x = np.arange(0,6,1.)
x[2] = np.nan
print(np.isnan(x),~np.isnan(x))

#### Histogram using `np.histogram()` and `plt.plot()`

In [None]:
tmax_bins = np.arange(tmax.min(), tmax.max()+1, 1)
for y_b in range(1876,2016,40): # we loop over intervals of 20 years spaced by 40 years
    y_e = y_b + 20 # we generate histograms from time spans of 20 years
    i = (year>=y_b) & (year<y_e) # boolean array to be used as a mask
    histo, xedges = np.histogram(tmax[i], bins=tmax_bins, normed=False)
    # xedges contains bin edges including the rightmost => 
    X = xedges[:-1]+(xedges[1]-xedges[0])/2.
    plt.plot( X, histo, lw=2, label=str(y_b)+'–'+str(y_e))
plt.legend(loc='upper left',frameon=False)
plt.xlabel(r'T$_{max}$ ($^\circ$C)'); plt.ylabel('Counts'); plt.show()

#### Histogram using `plt.hist()`

In [None]:
for y_b in range(1876,2016,40):
    y_e = y_b + 20
    i = (year>=y_b) & (year<y_e)
    plt.hist(tmax[i], tmax_bins, lw=2, histtype='step',label=str(y_b)+'–'+str(y_e),normed=True)
plt.legend(loc='upper left',frameon=False)
plt.xlabel(r'T$_{max}$ ($^\circ$C)'); plt.ylabel('Counts')
plt.show()

#### 2D Histogram using `np.histogram2d()` and `plt.plot()`

In [None]:
year_bins = np.arange(1876,2017,10) # we bin the years in intervals of 10
histo2d, xedges, yedges = np.histogram2d(tmax,year,bins=[tmax_bins,year_bins])
X = xedges[:-1]+(xedges[1]-xedges[0])/2.
Y = yedges[:-1]+(yedges[1]-yedges[0])/2.
plt.imshow(histo2d,extent=[Y.min(),Y.max(),X.min(),X.max()],
           cmap=plt.cm.coolwarm,origin='lower',aspect=1.5)
plt.colorbar(label='Counts',shrink=.82); plt.ylabel(r'T$_{max}$ ($^\circ$C)'); plt.xlabel('Years'); plt.show()

#### Task: plot the same 2D Histogram using `plt.hist2d()`

In [None]:
plt.hist2d(year,tmax,bins=[year_bins,tmax_bins],cmap=plt.cm.coolwarm)
plt.colorbar(label='Counts') 
plt.ylabel('T$_{max}$ ($^\circ$C)') 
plt.xlabel('Years')
plt.show()