###### Katherine Kairis, kak275@pitt.edu, 9/7/2017

In [1]:
import numpy as np

## List vs. Fixed-type array in Python

In [2]:
flexible = [1, 1.0, 'one', 2, 2.0, 'two', 3, 3.0, 'three']
for i in flexible:
    print(type(i))

<class 'int'>
<class 'float'>
<class 'str'>
<class 'int'>
<class 'float'>
<class 'str'>
<class 'int'>
<class 'float'>
<class 'str'>


In [3]:
import array
L = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
fixed = array.array('i', L)  #'i' = type encoding for integer

for i in fixed:
    print(type(i))

<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>


## Arrays in Numpy (decsribe multi-dimensional arrays here, too)
Part of the NumPy package is an array object. Like the Python array, all of the elements in the Numpy array must be of the same data type. NumPy improves upon Python's array by adding efficient operations that can be performed upon NumPy arrays.

### Creating NumPy arrays

#### Converting lists to NumPy arrays

In [4]:
#Convert a Python list to a NumPy array
np.array([1, 2, 3 ,4, 5])

array([1, 2, 3, 4, 5])

In [5]:
#All of the elements in the array must be of the same data type.
#If a list contains elements that are of different data types, the NumPy array will convert all of the elements
#to the higher data type, if possible. (In a list of integers and floating points, all of the elements in the
#array will be converted to floating points).
np.array([1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5])

array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ])

In [6]:
#Can use the dtype keyword to specify data type
np.array([1, 2, 3, 4, 5, 6, 7], dtype='float32')

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.], dtype=float32)

#### Other ways to initialize arrays
1. np.zeros(array_dimensions): create an array of the specified size whose elements are all zeros
2. np.ones(array_dimensions): create an array of the specified size whose elements are all ones
3. np.full(array_dimensions, element): create an array filled with a specified element
4. np.arange(start, end): create an array containing every value between start (inclusive) and end (exclusive)
5. np.arange(start, end, n): create an array containing every nth value between start (inclusive) and end (exclusive)
    * n represents an interval
6. np.linspace(start, end, n): create an array with n equally-spaced values between start (inclusive) and end (exclusive)
    * n represents the number of elements in the array

In [7]:
#Create a 5-element array of all zeros using np.zeros()
print('Array 1:')
print(np.zeros(5, dtype = int))

#Create a 2x2 (4-element) array of all zeros using np.zeros()
print('\nArray 2:')
print(np.zeros((2,2)))

Array 1:
[0 0 0 0 0]

Array 2:
[[ 0.  0.]
 [ 0.  0.]]


In [8]:
#Create a 5-element array of all ones using np.ones()
print('Array 1:')
print(np.ones(5, dtype = int))

#Create a 3x5 array of all ones using np.ones()
print('\nArray 2:')
print(np.ones((3,5)))

Array 1:
[1 1 1 1 1]

Array 2:
[[ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]


In [9]:
#Create a 10-element array, where all 10 elements are 7.89
print('Array 1:')
print(np.full(10, 7.89))

#Create a 2x3 array, where all elements are 4
print('\nArray 2:')
print(np.full((2, 3), 4))

Array 1:
[ 7.89  7.89  7.89  7.89  7.89  7.89  7.89  7.89  7.89  7.89]

Array 2:
[[4 4 4]
 [4 4 4]]


In [10]:
#Create an array whose elements are every number from 0(inclusive) to 10(exclusive)
print('Array 1:')
print(np.arange(0, 10))

#Create an array whose elements are every 5th number from 50(inclusive) to 100(exclusive)
print('\nArray 2:')
print(np.arange(50, 100, 5))

#Create an array whose elements are every 3rd number from 0(inclusive) to 20(exclusive)
print('\nArray 3:')
print(np.arange(0, 20, 3))


Array 1:
[0 1 2 3 4 5 6 7 8 9]

Array 2:
[50 55 60 65 70 75 80 85 90 95]

Array 3:
[ 0  3  6  9 12 15 18]


In [11]:
#Create an array filled with a linear sequence
#Create an array with three equally spaced elements between 0(inclusive) and 100(exclusive)
print('Array 1:')
print(np.linspace(0, 100, 3))

#Create an array with four equally spaced elements between 0(inclusive) and 100(exclusive)
print('\nArray 2:')
print(np.linspace(0, 100, 4))

#Create an array with five equally spaced elements between 0(inclusive) and 100(exclusive)
print('\nArray 3:')
print(np.linspace(0, 100, 5))

Array 1:
[   0.   50.  100.]

Array 2:
[   0.           33.33333333   66.66666667  100.        ]

Array 3:
[   0.   25.   50.   75.  100.]


In [12]:
#np.arange() vs. np.linspace()
#arange() specifies the interval/distance between elements. linspace() specifies the number of elements in the array
print('np.arange:')
print(np.arange(0, 1, 5))

print('\nnp.linspace:')
print(np.linspace(0, 1, 5))

np.arange:
[0]

np.linspace:
[ 0.    0.25  0.5   0.75  1.  ]


In [13]:
#Identity matrix
print(np.eye(5))

[[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]


In [14]:
#Uninitialized array of 5 elements. Values will be whatever happens to be in the location
print('Array 1:')
print(np.empty(5))

print('\nArray 2:')
print(np.empty(3))

Array 1:
[ 1.  1.  1.  1.  1.]

Array 2:
[  33.33333333   66.66666667  100.        ]


#### Create arrays with random values

1. np.random.random(number_of_elements): create an array of the given size with elements between 0 and 1
2. np.random.normal(mean, standard_deviation, numner_of_elements): create an array of the specified length, whose elements have the specified mean and standard deviation.
3. np.random.randint(start, end, number_of_elements): create an array of the specified length whose elements are between start(inclusive) and end(exclusive).

In [15]:
#Array of 5 elements with randoms values between 0 and 1
print('Array 1:')
print(np.random.random(5))

#2x2 array of elements with random values between 0 and 1 
print('\nArray 2:')
print(np.random.random((2,2)))

Array 1:
[ 0.51739421  0.30512831  0.36010917  0.40369799  0.08618933]

Array 2:
[[ 0.9286874   0.95651563]
 [ 0.63401794  0.04382688]]


In [16]:
#Array of 5 elements with a mean of 0 and a standard deviation of 1
print('Array 1:')
print(np.random.normal(0, 1, 5))

#Array of 5 elements with a mean of 7 and a standard deviation of 3
print('\nArray 2:')
print(np.random.normal(7, 3, 5))

Array 1:
[ 1.83771072 -0.76416061 -1.48999843  0.09898317 -0.01263051]

Array 2:
[ 3.97260345  5.78654941  1.11424053  5.72058774  8.37004834]


In [17]:
#An array of 5 elements of random integers between 0 (inclusive) and 10 (exclusive)
np.random.randint(0, 10, 5)

array([3, 9, 8, 4, 3])

### Properties of Arrays
1. ndim (the number of dimensions)
2. shape (the size of each dimension)
3. size (the total size of the array)

Other attributes include dtype (the data type of the array), itemsize (lists the size,in bytes, of each array element), and nbytes (lists the total size,in bytes, of the array).

In [18]:
x1 = np.random.randint(10, size = 6)
x2 = np.random.randint(10, size = (3, 4))
x3 = np.random.randint(10, size = (3, 4, 5))
print(x1)
print('x1 dimensions:', '\n\tnumber of dimensions =', x1.ndim, '\n\tshape =', x1.shape, '\n\tsize =', x1.size, '\n')
print(x2)
print('x2 dimensions:', '\n\tnumber of dimensions =', x2.ndim, '\n\tshape =', x2.shape, '\n\tsize =', x2.size, '\n')
print(x3)
print('x3 dimensions:', '\n\tnumber of dimensions =', x3.ndim, '\n\tshape =', x3.shape, '\n\tsize =', x3.size)

[2 4 5 5 8 0]
x1 dimensions: 
	number of dimensions = 1 
	shape = (6,) 
	size = 6 

[[7 2 5 5]
 [7 4 9 6]
 [9 3 9 0]]
x2 dimensions: 
	number of dimensions = 2 
	shape = (3, 4) 
	size = 12 

[[[1 4 0 8 6]
  [8 3 2 5 4]
  [9 3 3 8 9]
  [8 8 0 8 9]]

 [[1 6 1 1 3]
  [9 7 5 2 2]
  [4 5 8 7 9]
  [3 7 1 9 1]]

 [[2 6 4 6 0]
  [3 7 0 1 9]
  [4 8 7 2 9]
  [2 6 9 0 8]]]
x3 dimensions: 
	number of dimensions = 3 
	shape = (3, 4, 5) 
	size = 60


### Array indexing: Accessing and modifying elements

#### One-dimensional arrays

In [19]:
x1 = np.random.randint(10, size = 6)

#Get first element
print('x1[0] =', x1[0])
#Get 3rd element
print('x1[2] =', x1[2])
#Get last (in this case, the 10th) element
print('x1[-1] =', x1[-1])
#Gets penultimate (in this case, 9th) element
print('x1[-2] =', x1[-2])

#Change the first element to 100 and the last element to 500
#Alters the array
print('\nBefore modifications')
print(x1)

x1[0] = 100
x1[-1] = 500

print('\nAfter modifications')
print(x1)


x1[0] = 2
x1[2] = 1
x1[-1] = 2
x1[-2] = 0

Before modifications
[2 5 1 5 0 2]

After modifications
[100   5   1   5   0 500]


#### Multi-dimensional arrays

In [20]:
x2 = np.random.randint(10, size = (3, 4))
x3 = np.random.randint(10, size = (3, 4, 5))

#Use comma-separated tuple of indices
print('x2[0,1] =', x2[0,1])
print('x3[2, 0, 3] =', x3[2, 0, 3])

x2[2, 1] = 50
x3[2, 3, 4] = 500

print('\nx2:')
print(x2)
print('\nx3:')
print(x3)

x2[0,1] = 5
x3[2, 0, 3] = 6

x2:
[[ 0  5  3  5]
 [ 3  7  3  4]
 [ 6 50  3  3]]

x3:
[[[  4   3   1   7   3]
  [  9   3   6   3   0]
  [  6   2   7   0   5]
  [  6   3   3   0   5]]

 [[  1   9   9   1   7]
  [  5   7   1   7   2]
  [  1   5   3   2   8]
  [  7   1   6   4   3]]

 [[  7   3   5   6   0]
  [  1   3   0   5   9]
  [  9   0   3   9   0]
  [  9   0   7   1 500]]]


#### Inserting incompatible data type

In [21]:
#If insert float into array of integers, the float will be truncated
x1[2] = 1.987
print(x1)

[100   5   1   5   0 500]


#### Fancy indexing

In [22]:
x = np.array([32, 46, 71, 29, 11, 65, 91, 36, 52, 84])
indices = [0, 8, 3, 7]
#Returns an array containing the elements found in x at the listed indices
x[indices]

array([32, 52, 29, 36])

In [23]:
#Fancy indexing with multi-dimensional arrays
x = np.array([[11, 10, 9, 8],
              [7, 6, 5, 4],
              [3, 2, 1, 0]])
rows = np.array([0, 1, 2])
columns = np.array([3, 2, 1])
#Returns an array with the following elements: x[0][3], x[1][2], x[2][1]
x[rows, columns]

array([8, 5, 2])

#### Combined indexing

In [24]:
x = np.array([[11, 10, 9, 8],
              [7, 6, 5, 4],
              [3, 2, 1, 0]])
#Returns an array with the following elements: x[1][3], x[1][0], x[1][1]
print('Array 1')
print(x[1, [3, 0, 1]])

#Returns an array with the following elements: x[1][3], x[1][0], x[1][1], x[2][3], x[2][0], x[2][1]
print('\nArray 2')
print(x[1:, [3, 0, 1]])

Array 1
[4 7 6]

Array 2
[[4 7 6]
 [0 3 2]]


### Array Slicing and Subarrays
* array_name[start:stop:step]
* start is inclusive and stop is exclusive

#### One-dimensional arrays

In [25]:
x = np.arange(25)
#First ten elements
print('Array 1')
print(x[:10])

#All elements after index 10
print('\nArray 2')
print(x[10:])

#All elements between index 10(inclusive) and 20(exclusive)
print('\nArray 3')
print(x[10:20])

Array 1
[0 1 2 3 4 5 6 7 8 9]

Array 2
[10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]

Array 3
[10 11 12 13 14 15 16 17 18 19]


In [26]:
#Every other element between 0(inclusive) and 10 (exclusive)
print('Array 1')
print(x[0:10:2])

#Every 5th element in the array starting at 0
print('\nArray 2')
print(x[0::5])

#Every 5th element in the array starting at 0
print('\nArray 3')
print(x[10::5])

#Every 5th element in the array ending at 15 (exclusive)
print('\nArray 4')
print(x[:15:5])

Array 1
[0 2 4 6 8]

Array 2
[ 0  5 10 15 20]

Array 3
[10 15 20]

Array 4
[ 0  5 10]


In [27]:
#Using a negative step is a useful way to reverse the array. It swaps start and stop

#Every element, in reverse
print('Array 1')
print(x[::-1])

#Every other element, in reverse
print('\nArray 2')
print(x[::-2])

#Every third element, starting at the 15th element (inclusive) and ending at the 5th element (exculsive)
print('\nArray 3')
print(x[15:5:-3])

Array 1
[24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0]

Array 2
[24 22 20 18 16 14 12 10  8  6  4  2  0]

Array 3
[15 12  9  6]


#### Multi-dimensional subarrays

In [28]:
#Get the first 2 rows, and the first 3 columns
print('Array 1')
print(x2[:2, :3])

#Get all rows, and every other column
print('\nArray 2')
print(x2[:, ::2])

#Reverse only the columns
print('\nArray 3')
print(x2[::-1, :])

#Reverse only the rows
print('\nArray 4')
print(x2[:, ::-1])

#Reverse both rows and columns
print('\nArray 5')
print(x2[::-1, ::-1])

Array 1
[[0 5 3]
 [3 7 3]]

Array 2
[[0 3]
 [3 3]
 [6 3]]

Array 3
[[ 6 50  3  3]
 [ 3  7  3  4]
 [ 0  5  3  5]]

Array 4
[[ 5  3  5  0]
 [ 4  3  7  3]
 [ 3  3 50  6]]

Array 5
[[ 3  3 50  6]
 [ 4  3  7  3]
 [ 5  3  5  0]]


In [29]:
#Get first row of x2
print('Array 1')
print(x2[0, :])

#Get first column of x2
print('\nArray 2')
print(x2[:, 0])

#These are equivalent
print('\nArray 3')
print(x2[0, :] == x2[0])

Array 1
[0 5 3 5]

Array 2
[0 3 6]

Array 3
[ True  True  True  True]


Some important notes on subarrays:
* Subarrays are no-copy views: array slices return views rather than copies of the array daya. In Python, however, slices are copies.
* To explicitly copy the data in an array or subarray, use the copy() method.

In [30]:
original = np.array([[1, 2, 3], [4, 5, 6]])
sub_array = original[1]

print('Before modification')
print('Original')
print(original)
print('\nSubarray')
print(sub_array)


#Change both sub_array and original
sub_array[0] = 100
sub_array[2] = 471

print('\n\nAfter modification')
print('Original')
print(original)
print('\nSubarray')
print(sub_array)

Before modification
Original
[[1 2 3]
 [4 5 6]]

Subarray
[4 5 6]


After modification
Original
[[  1   2   3]
 [100   5 471]]

Subarray
[100   5 471]


In [31]:
original = np.array([[1, 2, 3], [4, 5, 6]])
sub_array = (original.copy()[1])
#These instructions only change sub_array. original is not modified
sub_array[0] = 100
sub_array[2] = 471

print('original:')
print(original)

print('\nsubarray:')
print(sub_array)

original:
[[1 2 3]
 [4 5 6]]

subarray:
[100   5 471]


### Reshaping arrays
array_name.reshape(new_dimensions)

In [32]:
#For this to work, the size of the initial array must equal the size of the reshaped array
#If possible, the reshape method will use a no-copy view of the initial array

print('Before reshape:')
x = np.arange(1, 10)
print(x)

print('\nAfter reshape:')
print(x.reshape((3,3)))


Before reshape:
[1 2 3 4 5 6 7 8 9]

After reshape:
[[1 2 3]
 [4 5 6]
 [7 8 9]]


### Concatenation of arrays

#### Arrays with the same dimensions (both one-dimensional and multi-dimensional)

In [33]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
z = np.array([7, 8, 9])

print('Concatenate x and z')
print(np.concatenate([x, z]))

print('\nConcatenate x, y, and z')
print(np.concatenate([x, y, z]))

Concatenate x and z
[1 2 3 7 8 9]

Concatenate x, y, and z
[1 2 3 4 5 6 7 8 9]


#### Arrays with different dimensions
Use np.vstack([array1, array2]) and np.hstack([array1, array2])


In [34]:
x = np.array([1, 2, 3])
a = ([1, 2, 3], [4, 5, 6])
b = ([7, 8, 9], [10, 11, 12])

print('Concatenate x and a with vstack')
print(np.vstack([x, a]))

print('\nConcatenate c and a with hstack')
c = ([13],
     [13])
print(np.hstack([c, a]))

Concatenate x and a with vstack
[[1 2 3]
 [1 2 3]
 [4 5 6]]

Concatenate c and a with hstack
[[13  1  2  3]
 [13  4  5  6]]


### Splitting arrays
Use np.split(array, [slice_index]), np.vsplit([row_to_split_on]), and np.hsplit([column_to_split_on])

In [35]:
#Splitting of arrays
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
x1, x2, x3 = np.split(x, [3,5])

grid = np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]])
upper, lower = np.vsplit(grid, [2])
left, right = np.hsplit(grid, [2])

print("x:")
print(x)

print("\nupper:")
print(upper)

print("\nlower:")
print(lower)

print("\nleft:")
print(left)

print("\nright:")
print(right)

x:
[1, 2, 3, 4, 5, 6, 7, 8, 9]

upper:
[[0 1 2 3]
 [4 5 6 7]]

lower:
[[ 8  9 10 11]
 [12 13 14 15]]

left:
[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]

right:
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


### Computation on NumPy Arrays: Universal Functions
Vectorized operations in NumPy are implemented via ufuncs, whose main purpose is to quickly execute repeated operations on values in NumPy arrays.
Unary ufuncs operate on a single input and binary ufuncs operate on two inputs.


#### ufuncs operations

In [36]:
x = np.arange(6)
print("x = ", x)
print("x + 5 = ", x + 5)
print("x - 5 = ", x - 5)
print("x * 2= ", x * 2)
print("x ^ 2= ", x ** 2)
print("x / 2= ", x / 2)
print("x % 2= ", x % 2)
print("x // 2= ", x // 2) #Floor division
print("-x = ", -x)
print("(3 + x) * 2 = ", (3 + x) * 2)

x = np.array([-2, -1, 0, 1, 2])
print('\nx:')
print(x)

print('\nabsolute(x)')
print(np.absolute(x))
abs(x) == np.absolute(x)

x =  [0 1 2 3 4 5]
x + 5 =  [ 5  6  7  8  9 10]
x - 5 =  [-5 -4 -3 -2 -1  0]
x * 2=  [ 0  2  4  6  8 10]
x ^ 2=  [ 0  1  4  9 16 25]
x / 2=  [ 0.   0.5  1.   1.5  2.   2.5]
x % 2=  [0 1 0 1 0 1]
x // 2=  [0 0 1 1 2 2]
-x =  [ 0 -1 -2 -3 -4 -5]
(3 + x) * 2 =  [ 6  8 10 12 14 16]

x:
[-2 -1  0  1  2]

absolute(x)
[2 1 0 1 2]


array([ True,  True,  True,  True,  True], dtype=bool)

### Aggregations
Many of NumPy's aggregation operations exist in Python, but NumPy's versions operate more quickly. Here are some of NumPy's aggregation operations:
* np.sum: Compute sum of elements
* np.prod: Compute product of elements
* np.mean: Compute mean of elements
* np.std: Compute standard deviation
* np.var: Compute variance
* np.min: Find minimum value
* np.max: Find maximum value
* np.argmin: Find index of minimum value
* np.argmax: Find index of maximum value
* np.median: Compute median of elements
* np.percentile: Compute rank-based statistics of elements
* np.any: Evaluate whether any elements are true
* np.all: Evaluate whether all elements are true


In [37]:
x = np.random.random(100)
sum(x) #python's sum
np.sum(x) #numpy's sum
sum(x) == np.sum(x)
#Same results, but numpy's version is faster/more efficient
#because it executes the operation in compiled code, NumPy's version of the operation is computed much more quickly:

False

In [38]:
x = np.random.random(100)
print('x:')
print(x)

min(x) #python
max(x) #python

print("min =", np.min(x)) #Numpy
print("max =", np.max(x)) #Numpy

print("sum of all elements in x =", np.sum(x))

#Multidimensional aggregates
x = np.random.random((3, 4))

print('\n\nx:')
print(x)

print("sum of all elements in x =", np.sum(x))

print('Min value in each column:', x.min(axis=0))  #Aggregation functions take an additional argument specifying the axis along which the aggregate is computed. For example, we can find the minimum value within each column by specifying axis=0:
print('Max value in each row:', x.max(axis=1))  # Similarly, we can find the maximum value within each row with axis = 1

x:
[ 0.10769312  0.47297913  0.91780973  0.30340703  0.11455742  0.03366912
  0.41791628  0.31679846  0.00784425  0.506246    0.40170968  0.14355973
  0.63160214  0.73535404  0.58197633  0.12592535  0.18619324  0.8745621
  0.32696309  0.40190652  0.63949337  0.59191951  0.46263438  0.63507759
  0.7910898   0.82105706  0.26740266  0.96938674  0.91367639  0.68402971
  0.95700746  0.16752873  0.80216652  0.94124408  0.61203649  0.67099891
  0.91179422  0.88264909  0.68811569  0.00892827  0.28105512  0.91744365
  0.81487394  0.68847184  0.12062075  0.45655316  0.45347565  0.48064829
  0.26422278  0.666787    0.47010632  0.0235297   0.363719    0.25126741
  0.00560837  0.46226548  0.41063071  0.80461561  0.37305435  0.47273558
  0.32212812  0.01298071  0.9704594   0.87176585  0.85768702  0.81861874
  0.07382863  0.67587424  0.27086568  0.26874875  0.70213556  0.18581834
  0.52204232  0.83680928  0.25808522  0.28516225  0.47790418  0.62441214
  0.05803896  0.94171516  0.5497316   0.15294081 

Axis keyword: specifies the dimension of the array that will be collapsed, rather than the dimension that will be returned. So specifying axis=0 means that the first axis will be collapsed: for two-dimensional arrays, this means that values within each column will be aggregated.

### Broadcasting
For arrays of the same size, binary operations are performed on an element-by-element basis. Broadcasting allows these operations to be performed on arrays of different size.

In [39]:
a = np.array([0, 1, 2])
b = np.array([3, 4, 5])
print('a:')
print(a)
print('b:')
print(b)
print('\na+b = ', a+b)

#Adding 1d array to 2d array
c = M = np.ones((3, 3))
c + a

a = np.arange(3)
b = np.arange(3)[:, np.newaxis]
c = a + b
print(a)
print(b)
print(c)

a:
[0 1 2]
b:
[3 4 5]

a+b =  [3 5 7]
[0 1 2]
[[0]
 [1]
 [2]]
[[0 1 2]
 [1 2 3]
 [2 3 4]]


#### Rules of broadcasting
1. If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
2. Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
3. If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

### Comparisons and boolean arrays

In [40]:
x = np.array([1, 2, 3, 4, 5])
#Will return boolean array. Element will be True if corresponding element in x is less than 3. 
#Otherwise, element will be False.
print('All values in x less than 3:', x < 3)
print('All values in x greater than 3:', x > 3)
print('All values in x greater than or equal to 3:', x <= 3)
print('All values in x less than or equal to 3:', x >= 3)
print('All values in x equal to 3:', x == 3)
print('All values in x not equal to 3:', x != 3)
print('All values in x such at x = x^2:', x == x ** 2)
print('All values in x such at 2x = x^2:',(2 * x) == x ** 2)

"""
To count the number of True entries in a Boolean array, np.count_nonzero is useful:
"""
np.count_nonzero(x < 3)

"""
Another way to get at this information is to use np.sum; in this case, False is interpreted as 0, and True is interpreted as 1:
"""
np.sum(x < 3)

"""
The benefit of sum() is that like with other NumPy aggregation functions, this summation can be done along rows or columns as well:
"""

#If any or all are true
np.any(x == 3)
np.all(x == 3)
np.all(x >= 0)


#Boolean Arrays as masks. Will return only the elements in x that are less than 3.
print('All elements in x less than 3:', x[x<3])

All values in x less than 3: [ True  True False False False]
All values in x greater than 3: [False False False  True  True]
All values in x greater than or equal to 3: [ True  True  True False False]
All values in x less than or equal to 3: [False False  True  True  True]
All values in x equal to 3: [False False  True False False]
All values in x not equal to 3: [ True  True False  True  True]
All values in x such at x = x^2: [ True False False False False]
All values in x such at 2x = x^2: [False  True False False False]
All elements in x less than 3: [1 2]


### Sorting arrays
1. np.sort(array)
    * Returns the sorted array. By default, np.sort() uses a quicksort algorithm. Python has a version of this method, but it is much less efficient.
2. np.argsort(array)
    * Retruns the indices of the sorted elements, instead of the sorted array
3. np.partition(array, n)
    * Returns a new array with the smallest n values to the left of the partition, and the remaining values to the right. Elements on both sides of the partition are ordered arbitrarily.

In [41]:
x = np.array([4, 3, 1, 5, 2])
np.sort(x)


#To sort the array in-place, use the sort method of arrays:
x.sort()
print(x)


[1 2 3 4 5]


In [42]:
#np.argsort with one-dimensional array
x = np.array([4, 3, 1, 5, 2])
i = np.argsort(x)
print('Sort one-dimensional array: ')
print(i)


#Sort among rows and columns
x = np.array([[8, 5, 4, 6], [2, 7, 1, 2], [9, 3, 4, 6], [4, 8, 6, 4]])
print('\nSort among rows and columns: ')
print(x)

#Sort each column of x
np.sort(x, axis=0)
#Sort each row of x
np.sort(x, axis=1)

Sort one-dimensional array: 
[2 4 1 0 3]

Sort among rows and columns: 
[[8 5 4 6]
 [2 7 1 2]
 [9 3 4 6]
 [4 8 6 4]]


array([[4, 5, 6, 8],
       [1, 2, 2, 7],
       [3, 4, 6, 9],
       [4, 4, 6, 8]])

In [43]:
x = np.array([61, 13, 39, 44, 26, 7, 31, 88, 6])
np.partition(x, 4) #first 4 values are the 4 smallest

array([13,  6,  7, 26, 31, 39, 44, 88, 61])

### Structured arrays and record arrays
These types of arrays provide efficient storage for heterogeneous data.

#### Data types
* 'b' -- byte
    * np.dtype('b')
* 'i' -- signed integer
    * np.dtype('i4') == np.int32
* 'u' -- unsigned integer
    * np.dtype('u1') == np.uint8
* 'f' -- floating point
    * np.dtype('f8') == np.int64
* 'c'-- complex floating point
    * np.dtype('c16') == np.complex128
* 'S', 'a' -- string
    * np.dtype('S5')
* 'U' -- unicode string
    * np.dtype('U') == np.str_
* 'V' -- raw data(void)
    * np.dtype('V') == np.void

#### Creating structured arrays

In [44]:
#Create a 4-element array of "people" from lists
names = ['Anne', 'Bob', 'Cathy', 'Dave']
ages = [24, 37, 44, 31]
heights = [64.5, 72, 62, 70.5]

#Set up the arrays with new data types(name, age, and height) and their formats
#Data type of name = U10 (Unicode string with a max length of 10)
#Data type of age = i4 (4-byte/32-bit integer)
#Data type of height = f8 (8-byte/64-bit float)
x = np.zeros(4, dtype={'names':('name', 'age', 'height'),
                       'formats':('U10', 'i4', 'f8')})

#Put values from the lists into the people array
x['name'] = names
x['age'] = ages
x['height'] = heights
print(x)

[('Anne', 24,  64.5) ('Bob', 37,  72. ) ('Cathy', 44,  62. )
 ('Dave', 31,  70.5)]


In [45]:
#Get all of the names
print('Get all of the names:', x['name'])

#Get the first row (the first "person" in the array)
print('Get the first person in the array:', x[0])

#Get the name from the last row (the name of the last 'person' in the array)
print('Get the name of the last person in the array:', x[-1]['name'])

#Get all of the "people" under the age of 35
print('Get all of the people under the age of 35:', x[x['age'] < 35])

#Get only the names of the "people" under the age of 35
print('Get only the names of the people under the age of 35:', x[x['age'] < 35]['name'])

Get all of the names: ['Anne' 'Bob' 'Cathy' 'Dave']
Get the first person in the array: ('Anne', 24,  64.5)
Get the name of the last person in the array: Dave
Get all of the people under the age of 35: [('Anne', 24,  64.5) ('Dave', 31,  70.5)]
Get only the names of the people under the age of 35: ['Anne' 'Dave']


#### Additional ways to create structured arrays

In [46]:
#List of tuples (x,y), where x is the name of the variable and y is its data type/format
#Create the same person array as above
np.dtype([('name', 'U10'), ('age', '<i4'), ('weight', '<f8')])

#Can also use Python's data types/formats
np.dtype({'names':('name', 'age', 'height'),
          'formats':((np.str_, 10), int, np.float32)})

dtype([('name', '<U10'), ('age', '<i8'), ('height', '<f4')])

#### Creating record arrays
Record arrays are similar to structured arrays, but they allow fields to be accessed as attributes rather than as dictionary keys.

In [47]:
#Create the "people structured array again"
names = ['Anne', 'Bob', 'Cathy', 'Dave']
ages = [24, 37, 44, 31]
heights = [64.5, 72, 62, 70.5]

structured = np.zeros(4, dtype={'names':('name', 'age', 'height'),
                       'formats':('U10', 'i4', 'f8')})
structured['name'] = names
structured['age'] = ages
structured['height'] = heights

#Create a record array from the sructured array
record = structured.view(np.recarray)

#Get the ages of the people from the structured array
structured['age']
#Get the ages of the people from the record array
record.age

array([24, 37, 44, 31], dtype=int32)