### Part 1, NumPy

### Command mode vs. Edit mode</p>
Command mode - binds the keyboard to notebook level actions. Indicated by a grey cell border with a blue left margin.</p>
Edit mode - when you’re typing in a cell. Indicated by a green cell border

In [None]:
# h (in command mode) - view all keyboard shortcuts 
# Shift+Enter, Ctrl+Enter - run cell
# Shift+Tab - tooltip
# insert cell: A - above, B - below
# delete cell - DD
# Tab - auto-completion
# Ctrl+/ - (un)comment

In [1]:
import numpy as np # Import and use NumPy

In [14]:
# Help
# ? - explore documentation, ?? - explore source code
# a built-in help() function
# help(len) or len?

In [15]:
# http://www.numpy.org/
# http://www.numpy.org/devdocs/user/quickstart.html

np?

A powerful N-dimensional array object of one type (dif from list type object)

### Creation and attributes

In [None]:
# np.array(), np.arange()

In [None]:
 # One-dimensional array

Object `nd.array` not found.


In [2]:
# Two-dimensional array

In [8]:
# Attributes

# ndim (the number of dimensions)
# shape (the size of each dimension)
# size (the total size of the array)
# dtype (data type of the array)

In [3]:
# x1

In [4]:
# print("x1 ndim: ", x1.ndim)
# print("x1 shape:", x1.shape)
# print("x1 size: ", x1.size)
# print("x1 dtype: ", x1.dtype)

In [14]:
# ndim, shape, size, dtype x2?

### Indexing of arrays: Getting and setting the value of individual array elements

In [None]:
 # One-dimensional array

In [10]:
# x1

In [7]:
# x1[0]

In [8]:
# x1[-1]

In [None]:
 # Two-dimensional array

In [9]:
# x2

In [10]:
# x2[0,0]

In [16]:
# x2[0,0] = 
# x2

In [11]:
# x2[0,-1]

### Getting and setting smaller subarrays within a larger array
### Fancy indexing and Slicing

In [None]:
# Fancy indexing

In [None]:
 # One-dimensional array

In [12]:
# x1

In [13]:
# x1[[1,3,4]]

In [None]:
 # Two-dimensional array

In [14]:
# x2

In [2]:
# x2[0,[1,2]]

In [None]:
# Slicing of arrays

In [None]:
# x[start:stop:step]

In [None]:
# If unspecified, default to the values 
# start=0, stop=size of dimension, step=1

In [None]:
# One-dimensional subarrays

In [27]:
# x = np.arange(10)
# x

In [28]:
# first five elements

In [29]:
# elements starting from index 2

In [30]:
# elements from index 2 to index 4

In [31]:
# every other element, starting at index 1

In [32]:
# all elements reversed

In [26]:
# Two-dimensional subarrays

In [33]:
# x2

In [34]:
# all rows, two first columns

In [35]:
# two first rows, every other column

In [None]:
# Accessing array rows and columns (combining indexing and slicing)

In [36]:
# first column of x2

In [37]:
# first row of x2

In [None]:
# In the case of row access, the empty slice can be omitted for a more compact syntax:

In [19]:
# x2[0] equivalent to x2[0, :]


In [None]:
# Subarrays as no-copy views

In [38]:
# x2

In [39]:
# sub_x2 = x2[:,1:]
# sub_x2

In [None]:
# sub_x2[0, 0] = 111
# print(sub_x2)
# x2

In [None]:
# Creating copies of arrays with copy() method

In [40]:
# x2

In [41]:
# sub_x2 = x2[:,1:].copy()
# print(sub_x2)
# x2

In [None]:
# Reshaping of Arrays
# .reshape()

In [42]:
# np.arange(9).reshape(3,3)

In [20]:
# row vector via reshape

In [None]:
# x = np.array([1, 2, 3])
# print(x)
# print("x ndim: ", x.ndim)
# print("x shape:", x.shape)

In [None]:
# y = x.reshape(1, 3)
# print(y)
# print("y ndim: ", y.ndim)
# print("y shape:", y.shape)

In [43]:
# column vector via reshape

In [None]:
# z =x.reshape(3, 1)
# print(z)
# print("z ndim: ", z.ndim)
# print("z shape:", z.shape)

In [None]:
# row vectors via newaxis: [np.newaxis, :]
# np.array([1,2,3])[np.newaxis, :]

In [None]:
# column vectors via newaxis: [:, np.newaxis]
# np.array([1,2,3])[:, np.newaxis]

### Computation on NumPy Arrays: Universal Functions

In [None]:
#  +   np.add          (e.g., 1 + 1 = 2)
#  -   np.subtract     (e.g., 3 - 2 = 1)
#  -   np.negative     (e.g., -2)
#  *   np.multiply     (e.g., 2 * 3 = 6)
#  /   np.divide       (e.g., 3 / 2 = 1.5)
#  //  np.floor_divide (e.g., 3 // 2 = 1)
#  **  np.power        (e.g., 2 ** 3 = 8)
#  %   np.mod          (e.g., 9 % 4 = 1)
#      np.abs          (e.g., np.abs([-1, 0, 1]) = ([1, 0, 1]))

In [44]:
# np.array([1,1,1]) + np.array([2,2,2])

In [45]:
# np.add(([1,1,1]),([2,2,2]))

### Aggregation functions

In [46]:
# x = np.arange(9)
# x

In [47]:
# print(x.min(), x.max(), x.sum(), x.mean())
# print(np.median(x), np.percentile(x, 25), np.percentile(x, 75))
# print(np.var(x), np.std(x))

Median rank (element number) (1 + n) // 2 = 5</p>
The rank of the lower quantile - 25% percentile: (median rank + 1) // 2 = 3</p>
The rank of the top quantile - 75% percentile: (n + 1) - rank of the bottom quantile = 7

In [None]:
# Multi dimensional aggregates
# axis = 0 - columns, axis = 1 - rows

In [48]:
# Min in each column of x2
# x2.min(axis = 0)

In [1]:
# Example: What is the Average Height of US Presidents?

In [49]:
# import pandas as pd
# data = pd.read_csv('data/president_heights.csv')
# data.head()

In [50]:
# heights = np.array(data['height(cm)'])
# print(heights)

In [51]:
# print("Mean height:       ", )
# print("Standard deviation:", )
# print("Minimum height:    ", )
# print("Maximum height:    ", )
# print("25th percentile:   ", )
# print("Median:            ", )
# print("75th percentile:   ", )

In [52]:
# %matplotlib inline
# import matplotlib.pyplot as plt

In [53]:
# plt.hist(heights);
# plt.title('Height Distribution of US Presidents');
# plt.xlabel('height (cm)');
# plt.ylabel('number');

### Comparisons, Masks, and Boolean Logic

In [None]:
# Comparison operators
#  ==   np.equal
#  !=   np.not_equal
#  <    np.less
#  <=   np.less_equal
#  >    np.greater
#  >=   np.greater_equal

In [85]:
# np.random.seed(0)  # seed for reproducibility
# x = np.random.randint(10, size=(3, 4))
# x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [54]:
# Comparisons
# y = x<6
# y

In [55]:
# Masks
# x[x<6]

In [None]:
# number of True entries in a Boolean array: np.sum()

In [56]:
# np.sum(x < 6)

In [None]:
# Example: Counting Rainy Days

In [57]:
# rainfall = pd.read_csv('data/Seattle2014.csv')
# rainfall.head(3)

In [58]:
# inches = rainfall['PRCP'].values / 254.0
# inches.shape

In [59]:
# Histogram of inches
# ???????

In [None]:
# inches[:10]

In [None]:
# Number of days without rain

In [60]:
# np.sum(????)

In [61]:
# len(inches[???])

In [48]:
# Boolean operators
#  &   np.bitwise_and
#  |   np.bitwise_or

In [None]:
# How many days with rainfall between 0.5 and 1.0 inches?

In [62]:
# np.sum((????) & (?????))

In [None]:
# Handling missing data

In [None]:
# NaN: Missing numerical data

In [63]:
# x = np.array([1, np.nan, 3, 4]) 
# x

In [64]:
# x.sum(), x.min(), x.max()

In [65]:
# np.nansum(x), np.nanmin(x), np.nanmax(x)