In [None]:
# to bring the print function from Python 3 into Python 2.6+
from __future__ import print_function

### Demostration of Python Loop

In [None]:
a = list(range(5))
print(a)
for idx in range(len(a)):
    a[idx] += 5
print(a)

### Python Tuples

In [None]:
tup1 = ('physics', 'chemistry', 1997, 2000);
tup2 = (1, 2, 3, 4, 5 );
tup3 = "a", "b", "c", "d";
# The empty tuple is written as two parentheses containing nothing
tup1 = ();
# To write a tuple containing a single value you have to include a comma,
tup1 = (50,);
# Accessing Values in Tuples
print("tup1[0]: ", tup1[0])
print("tup2[1:5]: ", tup2[1:5])
# Updating Tuples, create a new tuple as follows
tup3 = tup1 + tup2;
print(tup3)
# delete tuple elements
print("Deleting tup3 : ")
del tup3;

#print(tup3) # this will reture an error

# Numpy Overview
- NumPy (Numeric Python) is the fundamental package for scientific computing in Python.
- It is a Python library that provides a multidimensional array object, various derived objects (such as masked arrays and matrices)
- An assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation and much more.
- In short , NumPy package provides basic routines for manipulating large arrays and matrices of numeric data. 

# Simple array math using np.array.
* Note that NumPy array starts its index from 0, end at N-1 (C-style)

In [None]:
import numpy as np
a = np.array([1,2,3])
b = np.array([4,5,6])
print(a+b)
print(a*b)
print(a ** b)

* Setting Array Element Values

In [None]:
a[0]
a[0]=11
print(a)
a.fill(0) # set all values in the array with 0
a[:]=1 # why we need to use [:]?
print(a)
print(a.dtype) # note that a is still int64 type !
a[0]=10.6 # decimal parts are truncated, be careful!
print(a)
a.fill(-3.7) # fill() will have the same behavior
print(a)

* #### Numpy Array Properties

In [None]:
a = np.array([0,1,2,3]) # create a from a list
# create evenly spaced values within [start, stop)
a = np.arange(1,5)
print(a)
print(type(a))
print(a.dtype)
# Length of one array element in bytes
print(a.itemsize)
# shape returns a tuple listing the length of the array
# along each dimension.
print(a.shape) # or np.shape(a)
print(a.size) # or np.size(a), return the total number of elements
# return the number of bytes used by the data portion of the array
print(a.nbytes)
# return the number of dimensions of the array
print(a.ndim)

* #### Numpy Array Creation Functions

In [None]:
a = np.arange(4)
print(a)
a = np.arange(0, 2*np.pi, np.pi/4)
print(a)
a = np.arange(1.5,2.1,0.3)
print(a)
a = np.ones((2,3))
print(a)
print(a.dtype)
a = np.zeros(3)
print(a)
print(a.dtype)

In [None]:
a = np.identity(4)
print(a)
a = np.eye(4)
print(a)
print(a.dtype)
a = np.eye(4,dtype=int)
print(a)
print(a.dtype)
a = np.empty(2)
print(a)
a.fill(5.0)
print(a)
a[:] = 4.0
print(a)

In [None]:
a = np.linspace(0,1,5)
print(a)
a = np.logspace(0,1,5)
print(a)

* #### Array from/to ASCII files
use loadtxt

`data.txt`<br>
  `Index`<br>
`Brain Weight`<br>
`Body Weight`<br>
`#here is the training set`<br>
`1 3.385 44.500 abjhk`<br>
`2 0.480 33.38 bc_00asdk`<br>
`...`<br>
`#here is the cross validation set`<br>
`6 27.660 115.000 rk`<br>
`7 14.830 98.200 fff`<br>
`...`<br>
`9 4.190 58.000 kij`<br>

In [None]:
a = np.loadtxt('data.txt',skiprows=16,usecols={0,1,2},dtype=None,comments="#")
print(a)

Using genfromtxt

In [None]:
# np.genfromtxt can guess the actual type of your columns by using dtype=None
a = np.genfromtxt('data.txt',skip_header=16,dtype=None)
print(a)

* #### Reshaping Arrays

In [None]:
a = np.arange(6)
print(a)
print(a.shape)
a.shape = (2,3) # reshape array to 2x3
print(a)
a = a.reshape(3,2) # reshape array to 3x2
print(a)
# a.reshape(2,5) # cannot change the number of elements in the array
# Traceback (most recent call last):
# File "<stdin>", line 1, in <module>
# ValueError: total size of new array must be unchanged
a = a.reshape(2,-1) # numpy determines the last dimension
print(a)

* #### Flattening Multi-dimensional Arrays

In [None]:
# Note the difference between
# a.flatten() and a.flat
print(a)
# a.flatten() converts a multidimensional array into
# a 1-D array. The new array is a copy of the original data.
b = a.flatten()
print(b)
b[0] = 7
print(b)
print(a)
# a.flat is an attribute that returns an iterator object that accesses the data in the multi-
# dimensional array data as a 1-D array. It references the original memory.
print(a.flat)
print(a.flat[:])
b = a.flat
b[0] = 7
print(a)

* #### (Un)raveling Multi-dimensional Arrays

In [None]:
print(a)
# ravel() is the same as flatten but returns a reference of the array if possible
b = a.ravel()
print(b)
b[0] = 13
print(b)
print(a)
at = a.transpose()
print(at)
b = at.ravel()
print(b)
b[0]=19
print(b)
print(a)

# Basic Usage of Matplotlib

- Matplotlib is probably the single most used Python package for 2Dgraphics. (http://matplotlib.org/)
- It provides both a very quick way to visualize data from Python and publication-quality figures in many formats.
- Provides Matlab/Mathematica-like functionality.

<img src="figure/logo_matplotlib.png", style="float: left;", width="250">

In [None]:
# Plot the sine and cosine arrays using the default settings
import numpy as np
import matplotlib.pyplot as plt
X = np.linspace(-np.pi, np.pi, 50,endpoint=True)
C,S = np.cos(X), np.sin(X)
# plt.plot(X,C)
# plt.plot(X,S)
plt.plot(X,C,X,S)
plt.show()

In [None]:
# plot multiple groups with different line styles
plt.plot(X,C,'bo',X,S,'r-^',X,np.sin(2*X),'g-s')
plt.show()

In [None]:
plt.scatter(X,S)
plt.show()

In [None]:
x = np.random.rand(200)
y = np.random.rand(200)
size = np.random.rand(200)*30
color = np.random.rand(200)
plt.scatter(x, y, size, color)
plt.colorbar()
plt.show()

In [None]:
# Multiple Figures
X = np.linspace(-np.pi, np.pi, 50,endpoint=True)
C,S = np.cos(X), np.sin(X)
# create a figure
plt.figure()
plt.plot(S)
# create a new figure
plt.figure()
plt.plot(C)
plt.show()

In [None]:
# Multiple Plots Using subplot
# divide the plotting area in 2 rows and 1 column(s)
# subplot(rows, columns, active_plot)
plt.subplot(2, 1, 1)
plt.plot(S, 'r-^')
# create a new figure
plt.subplot(2, 1, 2)
plt.plot(C, 'b-o')
plt.show()

In [None]:
#Adding Legend to Plot
# Legend labels with plot
# Add labels in plot command
plt.plot(S, 'r-^', label='sin')
plt.plot(C, 'b-o', label='cos')
plt.legend(numpoints=1)
plt.show()

In [None]:
# Label with plt.legend
# Add labels via list in legend.
plt.plot(S, 'r-^', C, 'b-o')
plt.legend(['sin','cos'])
# Add x and y labels
plt.xlabel('radians')
# Keywords set text properties.
plt.ylabel('amplitude', fontsize='large')
# Add title and show grid
plt.title('Sin(x) vs Cos(x)')
plt.grid()
plt.show()

# Four Tools in Numpy
## Removing loops using NumPy
- Ufunc (Universal Function)
- Aggregation
- Broadcasting
- Slicing, masking and fancy indexing

### Numpy’s universal function (or ufunc for short) is a function that operates on ndarrays in an element-by-element fashion
- Ufunc is a “vectorized” wrapper for a function that takes a fixed number of scalar inputs and produces a fixed number of scalar outputs.
- Vectorization (simplified): is the process of rewriting a loop so that instead of processing a single element of an array N times, it processes (say) 4 elements of the array simultaneously N/4 times. 
- Many of the built-in functions are implemented in compiled C code.
- They can be much faster than the code on the Python level

### Ufunc: Many ufuncs available
- Arithmetic Operators: `+ - * / // % **`
- Bitwise Operators: `& | ~ ^ >> <<`
- Comparison Oper’s: `< > <= >= == !=`
- Trig Family: `np.sin, np.cos, np.tan ...`
- Exponential Family: `np.exp, np.log, np.log10 ...`
- Special Functions: `scipy.special.*`
- . . . and many, many more.

In [None]:
x = np.arange(5.)
print(x)
c = np.pi
x *= c
y = np.sin(x)
print(y)

### Aggregation Functions
- Aggregations are functions which summarize the values in an array (e.g. min, max, sum, mean, etc.)
- Numpy aggregations are much faster than Python built-in functions

- All have the same call style:

    `np.min() np.max() np.sum() np.prod()`<br>
`np.argsort()`<br>
`np.mean() np.std() np.var() np.any()`<br>
`np.all() np.median() np.percentile()`<br>
`np.argmin() np.argmax() . . .`<br>
`np.nanmin() np.nanmax() np.nansum(). . .`<br>

In [None]:
# Numpy Aggregation - Array Calculation
a=np.arange(6).reshape(2,-1)
print(a)
# by default a.sum() adds up all values
print(a.sum())
# same result, functional form
print(np.sum(a))
# note this is not numpy’s sum!
print(sum(a))
# not numpy’s sum either!
# sum(a,axis=0)
# Traceback (most recent call last):
# File "<stdin>", line 1, in <module>
# TypeError: sum() takes no keyword
# arguments
# sum along different axis
print(np.sum(a,axis=0))
print(np.sum(a,axis=1))
print(np.sum(a,axis=-1))
# product along different axis
print(np.prod(a,axis=0))
print(a.prod(axis=1))

In [None]:
# Numpy Aggregation – Statistical Methods
np.set_printoptions(precision=4)
# generate 2x3 random float array
a=np.random.random(6).reshape(2,3)
print(a)
print(a.mean(axis=0))
print(a.mean())
print(np.mean(a))
0.61730865425015347


In [None]:
# average can use weights
print(np.average(a,weights=[1,2,3],axis=1))
# standard deviation
print(a.std(axis=0))
# variance
print(np.var(a, axis=1))

In [None]:
# min/max operation
print(a.min())
print(np.max(a))
# find index of the minimum
print(a.argmin(axis=0))
print(np.argmax(a,axis=1))
# this will return flattened index
print(np.argmin(a))
print(a.argmax())

### Array Broadcasting

- Broadcasting is a set of rules by which ufuncs operate on arrays of different sizes and/or dimensions.
- Broadcasting allows NumPy arrays of different dimensionality to be
combined in the same expression.
- Arrays with smaller dimension are broadcasted to match the larger arrays, without copying data.

<img src="figure/np_broadcast.png", style="float: left;", width="500">

In [None]:
a = np.arange(3) + 5
print(a)

In [None]:
a = np.ones((3,3)) + np.arange(3)
print(a)

In [None]:
a = np.arange(3).reshape(3,1) + np.arange(3)
print(a)

### Slicing, Masking and Fancy Indexing

- `arr[lower:upper:step]`
- Extracts a portion of a sequence by specifying a lower and upper bound. The lower-bound element is included, but the upper-bound element is not included. Mathematically: `[lower, upper)`. The step value specifies the stride between elements

In [None]:
# indices: 0 1 2 3 4
# negative indices:-5 -4 -3 -2 -1
a = np.array([10,11,12,13,14])
# The following slicing results are the same
print(a[1:3])
print(a[1:-2])
print(a[-4:3])

In [None]:
# Omitting Indices: omitted boundaries are assumed to be the beginning
# or end of the list, compare the following results
print(a[:3])
print(a[-2:])
print(a[1:]) # from 1st element to the last
print(a[:-1]) # from 1st to the second to last
print(a[:]) # entire array
print(a[::2]) # from 1st, every other element (even indices)
print(a[1::2]) # from 2nd, every other element (odd indices)

In [None]:
# Multidimensional Arrays
# A few 2D operations similar to the 1D operations shown above
a = np.array([[ 0, 1, 2, 3],[10,11,12,13]], float)
print(a)
print(a.shape,a.size)
print(a.ndim) # number of dimensions
print(a[1,3]) # reference a 2D array element
a[1,3] = -1 # set value of an array element
print(a[1]) # address second row using a single index

In [None]:
a = np.arange(1,26)
a = a.reshape(5,5) # generate the 2D array
print(a)

In [None]:
print(a[0,3:5])

In [None]:
print(a[0,3:4])

In [None]:
print(a[4:,4:])

In [None]:
print(a[3:,3:])

In [None]:
print(a[:,2])

In [None]:
print(a[2::2,::2])

### Slices Are References
- Slices are references to memory in the original array
- Changing values in a slice also changes the original array !

In [None]:
a = np.arange(5)
print(a)
b = a[2:4]
print(b)
b[0]=7
print(a)

### Masking

In [None]:
a = np.arange(10)
print(a)

In [None]:
# creation of mask using ufunc
mask=np.abs(a-5)>2
print(mask)
print(a[mask])

In [None]:
b=a[mask]
# manual creation of mask
mask=np.array([0,1,0,1,0],dtype=bool)
print(b[mask])

### 2D Masking
<img src="figure/masking_2d.png", style="float: left;", width="250">

In [None]:
a=np.arange(25).reshape(5,5)+10
print(a)

In [None]:
mask=np.array([0,1,1,0,1],dtype=bool)
print(a[mask]) # on rows, same as a[mask,:]

In [None]:
print(a[:,mask]) # on columns

### Fancy Indexing - 1D
- NumPy offers more indexing facilities than regular Python sequences.
- In addition to indexing by integers and slices, arrays can be indexed by arrays of integers and arrays of Booleans (as seen before).

In [None]:
a=np.arange(8)**2
print(a)
# indexing by position
i=np.array([1,3,5,1])
print(a[i])

In [None]:
b=(np.arange(6)**2).reshape(2,-1)
print(b)
i=[0,1,0]
j=[0,2,1]
print(b[i,j]) # indexing 2D array by position

### Fancy Indexing - 2D
<img src="figure/fancy_indexing_2d.png", style="float: left;", width="200">

In [None]:
b=(np.arange(12)**2).reshape(3,-1)
print(b)
i=[0,2,1]
j=[0,2,3]
# indexing 2D array
print(b[i,j])

In [None]:
# note the shape of the resulting array
i=[[0,2],[2,1]]
j=[[0,3],[3,1]]
# When an array of indices is used,
# the result has the same shape as the indices;
print(b[i,j])

## Example of using masking, and plotting

In [None]:
x = np.linspace(-np.pi, np.pi, 30,endpoint=True)
y = np.sin(x)

In [None]:
plt.plot(x,y,'b-s')
# plot the y>0 part
mask = y>0
plt.plot(x[mask],y[mask],'r-o')
# plot between -pi/2 and pi/2
mask1 = (y<0) & (x>-np.pi/2)
plt.plot(x[mask1],y[mask1],'g-^')
plt.show()