# Numpy and Data Visualization: A Short Tutorial
-  Numpy is an add-on package for scientific computation.
-  The basic object in Numpy is *ndarray*, an array of objects of the same type.
-  Operations on ndarrays are very efficient.

In [None]:
# import the numpy module
import numpy as np

## One-dimensional Arrays

### Constructors
-  ```np.linspace``` builds an equally spaced array of floats.
-  ```np.arange``` returns an array of integers.
-  ```np.array``` converts a container object into an array.
-  ```np.zeros```, ```np.ones``` and ```np.empty``` create an array filled with zeros, ones and Nones, respectively. 
-  Look alike constructors ```np.zeros_like```, ```np.ones_like``` and ```np.empty_lie```set up arrays of the same size and type.

In [None]:
# np.linspace with endpoint = True
x = np.linspace(0, 1, 11, endpoint = True)
x

In [None]:
# np.linspace with endpoint = False
x = np.linspace(0, 1, 11, endpoint = False)
x

In [None]:
# np.arange
y = np.arange(0, 10, 2, dtype=np.int32)
y

In [None]:
# np.array converts a container object to an ndarray
z = np.array([1, 2, 3])
z

In [None]:
# np.zeros
a1 = np.zeros(5)
a1

In [None]:
# np.ones
a2 = np.ones(10)
a2

In [None]:
# np.zeros_like
a3 = np.array([1, 2, 3, 4, 5])
a4 = np.zeros_like(a3)
a4

In [None]:
# np.ones_like
a5 = np.array([1, 3, 5, 7, 9])
a6 = np.ones_like(a5)
a6

### Arithmetical Operations
-  Arithmetic operations between arrays of the same size are performed component-wise.
-  Arithmetic operations between an array and a scalar are performed component-wise.

In [None]:
# arrays a and b are of the same size
a = np.arange(5)
b = np.arange(2, 7)
print(a)
print(b)
a + b

In [None]:
# broadcasting
c = np.arange(5)
print(c)
2 * c

In [None]:
# slicing an array creates a view of the original array
arr1 = np.arange(5)
arr2 = arr1[2:]
arr2[0] = 10
print(arr2)
print(arr1)

### Universal Functions
-  A *ufunc* is a universal function which when applied to an array produces an array of the same size, by operating component-wise.
-  *Vectorization* is to implement operations through ufuncs instead of loops.
-  Vectorization makes repeated calculations on array elements much more efficient.

In [None]:
# np.exp
a = np.arange(5)
b = np.exp(a)
b

 ### Logical Operations on Arrays

In [None]:
# a Boolean array
x = np.linspace(-2, 2, 9)
y = x < 0
y

In [None]:
# masking
z = x.copy()
z[y] = -z[y]
z

In [None]:
# masking
z = x.copy()
z[z < 0] = -z[z < 0]
z

## Two-dimensional Arrays
-  Three attributes: ```ndim```, ```shape```, and ```dtype```.

In [None]:
v = np.array([[0, 1, 2], [3, 4, 5]])
print(v.ndim)
print(v.shape)
print(v.shape[0])
print(v.dtype)

### Indexing
-  Fancy indexing: passing an array of indices to access multiple array elements at once. 

In [None]:
# basic indexing
v = np.array([[0, 1, 2], [3, 4, 5]])
print(v[1, 2])
print(v[0])
print(v[:, 1])

In [None]:
# fancy indexing - one-dimensional
# with fancy indexing, the shape of the result reflects the shape of the index arrays. 
x = np.array([1, 2, 3, 4, 5, 6])
ind = np.array([[3, 2], [1, 4]])
x[ind]

In [None]:
# fancy indexing - two-dimensional
row = np.array([0, 1, 1])
col = np.array([2, 1, 2])
v[row, col]   # v[0,2], v[1,1], v[1,2]

### Slicing
-  Slicing an ndarray creates a view, not a copy. 
-  Apply .copy() can create a deep copy of the array.

In [None]:
x = np.array([0, 1, 2, 3])
print(x[1:3])
# y is a view
y = x[1:3]
y[0] = -1
print(x[1:3])

In [None]:
x = np.array([0, 1, 2, 3])
print(x[1:3])
# y is a copy
y = x[1:3].copy()
y[0] = -1
print(x[1:3])

### Constructors
-  For the three functions ```np.zeros```, ```np.ones``` and ```np.empty```, the first argument is replaced with a tuple to define the shape of the array.
-  The look alike constructors can be applied.
-  ```np.reshape``` recasts the array into another shape.

In [None]:
# np.zeros
x = np.zeros((3, 4))
x

In [None]:
# np.zeros_like
q = np.array([[0, 1, 2], [3, 4, 5]])
q0 = np.zeros_like(q)
q0

In [None]:
# np.reshape
y = np.arange(12)
z = y.reshape((4, 3))
print(y)
print(z)

### Broadcasting
-  Broadcasting can be applied when the shapes of array x and array y differ. 
-  Rule 1: If the arrays do not have the same number of dimensions, then a "1" will be repeatedly prepended to the shapes of the smaller arrays until all arrays have the same number of axes.
-  Rule 2: The arrays with a size of 1 along a particular dimension or axis act as if they had the size of the array with the largest size along that dimension.

In [None]:
a = np.arange(12).reshape((3, 4))
b = np.array([10, 11, 12, 13])

In [None]:
# b is expanded along axis = 0 (columns)
a + b

In [None]:
c = np.array([20, 30, 40])
# could not be broadcast
try:
    a + c
except ValueError:
    print("ValueError: operands could not be broadcast together with shapes (3, 4) (3,)")

In [None]:
# c is expanded along axis = 1 (rows)
a + c[:, np.newaxis]

In [None]:
d = np.array([[5], [6], [7]])
# d is expanded along axis = 1 (rows)
a + d

In [None]:
# broadcasting two arrays
e = np.arange(3).reshape((3,1))
f = np.arange(4).reshape((1,4))
e + f

### Aggregation Ufuncs

In [None]:
# np.max and np.min
x = np.arange(6).reshape((3, 2))
print(x)
print(np.max(x))
print(np.max(x, axis = 0))  # column maxima
print(np.max(x, axis = 1))  # row maxima

In [None]:
# np.argmax and np.argmin
np.argmax(x, axis = 0)   # positions of the column maxima

In [None]:
# np.sum and np.prod
print(np.sum(x, axis = 0))   # column sums
print(np.prod(x, axis = 1))   # row products

In [None]:
# np.average and np.var
print(np.average(x, axis = 0))
print(np.var(x, axis = 1))

### Boolean Arrays

In [None]:
# masking - the result is a 1-D array
x = np.arange(12).reshape((3, 4))
y = x[x < 6]
y

In [None]:
# counting entries
print(np.sum(x < 6))   # number of values less than 6
print(np.sum(x < 6, axis = 0))   # number of values less than 6 in each column

## Numpy Text Input and Output

In [None]:
# output
len = 21
x = np.linspace(0, 2 * np.pi, len)
c = np.cos(x)
s = np.sin(x)
t = np.tan(x)
arr = np.empty((4, len), dtype = float)
arr[0, :] = x
arr[1, :] = c
arr[2, :] = s
arr[3, :] = t
np.savetxt('x.txt', x, delimiter = ' ', fmt = '%6.3f')   # a 1-d array
np.savetxt('xcst.txt', (x, c, s, t), fmt = '%6.3f')   # a tuple
np.savetxt('arr.csv', arr, delimiter = ',', fmt = '%6.3f')   # a 2-d array

In [None]:
# input
xv = np.loadtxt('x.txt')
print(xv.shape)
xv, cv, sv, tv = np.loadtxt('xcst.txt')
print(xv.shape)
arrv = np.loadtxt('arr.csv', delimiter = ',')
print(arrv.shape)

## Linear Algebra
-  transpose
-  ```np.identity```
-  ```np.dot```
-  ```np.vstack```

In [None]:
I = np.identity(3)
I

In [None]:
# matrix multiplicaiton
x = np.arange(12).reshape((3, 4))
w = np.array([1, 2, 3])
y = np.dot(w, x)
y

In [None]:
# transpose
x = np.arange(12).reshape(3, 4)
w = np.array([[1], [2], [3]])
y = np.dot(w.T, x)
y

## Random Numbers

In [None]:
# np.random.rand
np.random.seed(12345)
rx = np.random.rand(3, 4)
rx

In [None]:
# np.random.randn
ry = 2.5 * np.random.randn(3, 4) + 6   # mean of 6 and stdev of 2.5
ry

## Vectorization

In [None]:
def compute_reciprocal(values):
    output = np.empty_like(values)
    for i in range(values.size):
        output[i] = 1.0 / values[i];
    return output

values = np.random.randn(1000000)
%timeit -n 10 compute_reciprocal(values)
%timeit -n 10 1.0 / values 

## Data Visualization
-  Matplotlib is a multiplatform data visualization library built on Numpy arrays.
-  In Jupyter notebooks, put all the plotting commands for a figure in a single notebook cell.
-  Steps to plot:
   *  Create a new figure with ```fig = plt.figure()```.
   *  Add one or more subplots using ```ax = fig.add_subplot()```.
   *  Call ```ax.plot()``` to draw the curve in each subplot. 
   *  Format the appearance of the axes and lines.

In [None]:
# initialization
import matplotlib.pyplot as plt
%matplotlib inline

### Figures and Subplots

In [None]:
# a simple line plot
fig = plt.figure(figsize = (10, 5))   # figsize is in inches
ax = fig.add_subplot(1, 1, 1)
x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x))

In [None]:
# multiple plots
fig = plt.figure(figsize = (5, 5))
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
x = np.linspace(0, 10, 100)
ax1.plot(x, np.sin(x))
ax2.plot(x, np.cos(x))
ax3.plot(x, np.exp(x))

### Colors, Markers and Line Styles
-  Colors: 'bgrcmykw'
-  Linestyles: '-', '--', '-.', ':' 
-  Markers:  '.O*+xv^<>nph' 

In [None]:
# a line plot with markers
fig = plt.figure(figsize = (10, 5))   
ax = fig.add_subplot(1, 1, 1)
np.random.seed(12345)
x = np.linspace(0, 10, 100)
y = np.random.randn(100)
ax.plot(x, y, 'go--')   # green, circular markers and dashed curve

In [None]:
# a scatter plot
fig = plt.figure(figsize = (10, 5))   
ax = fig.add_subplot(1, 1, 1)
np.random.seed(12345)
x = np.linspace(0, 5, 10)
y = 2 * x + np.random.randn(10)
ax.plot(x, y, 'ro')   # red, circular markers

In [None]:
# a histogram
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
data = np.random.randn(1000)
ax.hist(data, bins = 10, density = True, histtype = 'stepfilled', color = 'steelblue', edgecolor = 'none')

### Axes, Title, Labels, and Legends
-  Axis limits: ```ax.set_xlim()```, ```ax.set_ylim()```, or ```ax.set_axis()```
-  Title: ```ax.set_title()```
-  Axis labels: ```ax.set_xlabel()```, ```ax.set_ylabel()```
-  Legends: ```ax.legend()```

In [None]:
# a figure with multiple line plots
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x), label = 'six(x)')
ax.plot(x, np.cos(x), label = 'cos(x)')
ax.set_title('Functions of six(x) and cos(x)')
ax.set_xlabel('x value')
ax.set_ylabel('function')
ax.set_xlim([0, 10])
ax.set_ylim([-1, 1])
ax.legend()