###### Katherine Kairis, kak275@pitt.edu, 9/7/2017

In [None]:
import numpy as np

## List vs. Fixed-type array in Python

In [None]:
flexible = [1, 1.0, 'one', 2, 2.0, 'two', 3, 3.0, 'three']
for i in flexible:
    print(type(i))

In [None]:
import array
L = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
fixed = array.array('i', L)  #'i' = type encoding for integer

for i in fixed:
    print(type(i))

## Arrays in Numpy (decsribe multi-dimensional arrays here, too)
Part of the NumPy package is an array object. Like the Python array, all of the elements in the Numpy array must be of the same data type. NumPy improves upon Python's array by adding efficient operations that can be performed upon NumPy arrays.

### Creating NumPy arrays

#### Converting lists to NumPy arrays

In [None]:
#Convert a Python list to a NumPy array
np.array([1, 2, 3 ,4, 5])

In [None]:
#All of the elements in the array must be of the same data type.
#If a list contains elements that are of different data types, the NumPy array will convert all of the elements
#to the higher data type, if possible. (In a list of integers and floating points, all of the elements in the)
#array will be floating points.
np.array([1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5])

In [None]:
#Can use the dtype keyword to specify data type
np.array([1, 2, 3, 4, 5, 6, 7], dtype='float32')

#### Other ways to initialize arrays
1. np.zeros(array_dimensions): create an array of the specified size whose elements are all zeros
2. np.ones(array_dimensions): create an array of the specified size whose elements are all ones
3. np.full(): create an array filled with a specified element
4. np.arange(start, end): create an array containing every value between start(inclusive)and end(exclusive)
5. np.arange(start, end, n): create an array containing every nth value between start(inclusive)and end(exclusive)
    * n represents an interval
6. np.linspace(start, end, n): create an array with n equally-spaced values between start(inclusive)and end(exclusive)
    * n represents the number of elements in the array

In [None]:
#Create a 5-element array of all zeros using np.zeros()
np.zeros(5, dtype = int)

#Create a 2x2 (4-element) array of all zeros using np.zeros()
np.zeros((2,2))

In [None]:
#Create a 5-element array of all ones using np.ones()
np.ones(5, dtype = int)

#Create a 3x5 array of all ones using np.ones()
np.ones((3,5))

In [None]:
#Create a 10-element array, where all 10 elements are 7.89
np.full(10, 7.89)
#Create a 2x3 array, where all elements are 4
np.full((2, 3), 4)

In [None]:
#Create an array whose elements are every number from 0(inclusive) to 10(exclusive)
np.arange(0, 10)
#Create an array whose elements are every 5th number from 50(inclusive) to 100(exclusive)
np.arange(50, 100, 5)
#Create an array whose elements are every 3rd number from 0(inclusive) to 20(exclusive)
np.arange(0, 20, 3)

In [None]:
#Create an array filled with a linear sequence
#Create an array with three equally spaced elements between 0(inclusive) and 100(exclusive)
np.linspace(0, 100, 3)
#Create an array with four equally spaced elements between 0(inclusive) and 100(exclusive)
np.linspace(0, 100, 4)
#Create an array with five equally spaced elements between 0(inclusive) and 100(exclusive)
np.linspace(0, 100, 5)

In [None]:
#np.arange() vs. np.linspace()
#arange() specifies the interval/distance between elements. linspace() specifies the number of elements in the array
np.arange(0, 1, 5)
np.linspace(0, 1, 5)

In [None]:
#Identity matrix
np.eye(5)

In [None]:
#Uninitialized array of 5 elements. Values will be whatever happens to be in the location
np.empty(5)
np.empty(3)

#### Create arrays with random values

1. np.random.random(number_of_elements): create an array of the given size with elements between 0 and 1
2. np.random.normal(mean, standard_deviation, numner_of_elements): create an array of the specified length, whose elements have the specified mean and standard deviation.
3. np.random.randint(start, end, number_of_elements): create an array of the specified length whose elements are between start(inclusive) and end(exclusive).

In [None]:
#Array of 5 elements with randoms values between 0 and 1
np.random.random(5)

#2x2 array of elements with random values between 0 and 1 
np.random.random((2,2))

In [None]:
#Array of 5 elements with a mean of 0 and a standard deviation of 1
np.random.normal(0, 1, 5)

#Array of 5 elements with a mean of 7 and a standard deviation of 3
np.random.normal(7, 3, 5)

In [None]:
#An array of 5 elements of random integers between 0 (inclusive) and 10 (exclusive)
np.random.randint(0, 10, 5)

### Properties of Arrays
1. ndim (the number of dimensions)
2. shape (the size of each dimension)
3. size (the total size of the array)

Other attributes include dtype (the data type of the array), itemsize (lists the size,in bytes, of each array element), and nbytes (lists the total size,in bytes, of the array).

In [None]:
x1 = np.random.randint(10, size = 6)
x2 = np.random.randint(10, size = (3, 4))
x3 = np.random.randint(10, size = (3, 4, 5))
print(x1)
print('x1 dimensions:', '\n\tnumber of dimensions =', x1.ndim, '\n\tshape =', x1.shape, '\n\tsize =', x1.size, '\n')
print(x2)
print('x2 dimensions:', '\n\tnumber of dimensions =', x2.ndim, '\n\tshape =', x2.shape, '\n\tsize =', x2.size, '\n')
print(x3)
print('x3 dimensions:', '\n\tnumber of dimensions =', x3.ndim, '\n\tshape =', x3.shape, '\n\tsize =', x3.size)

### Array indexing: Accessing and modifying elements

#### One-dimensional arrays

In [None]:
x1 = np.random.randint(10, size = 6)

#Get first element
x1[0]
#Get 3rd element
x1[2]
#Get last (in this case, the 10th) element
x1[-1]
#Gets penultimate (in this case, 9th) element
x1[-2] #Gets penultimate

#Change the first element to 100 and the last element to 500
#Alters the array
x1[0] = 100
x1[-1] = 500

print(x1)

#### Multi-dimensional arrays

In [None]:
x2 = np.random.randint(10, size = (3, 4))
x3 = np.random.randint(10, size = (3, 4, 5))

#Use comma-separated tuple of indices
x2[0,1]
x3[2, 0, 3]

x2[2, 1] = 50
x3[2, 3, 4] = 500

print(x2)
print(x3)

#### Inserting incompatible data type

In [None]:
#If insert float into array of integers, the float will be truncated
x1[2] = 1.987
print(x1)

### Array Slicing and Subarrays
* array_name[start:stop:step]
* start is inclusive and stop is exclusive

#### One-dimensional arrays

In [None]:
x = np.arange(25)
#First ten elements
x[:10]
#All elements after index 10
x[10:]
#All elements between index 10(inclusive) and 20(exclusive)
x[10:20]

In [None]:
#Every other element between 0(inclusive) and 10 (exclusive)
x[0:10:2]
#Every 5th element in the array starting at 0
x[0::5]
#Every 5th element in the array starting at 0
x[10::5]
#Every 5th element in the array ending at 15 (exclusive)
x[:15:5]

In [None]:
#Using a negative step is a useful way to reverse the array. It swaps start and stop

#Every element, in reverse
x[::-1]
#Every other element, in reverse
x[::-2]
#Every third element, starting at the 15th element (inclusive) and ending at the 5th element (exculsive)
x[15:5:-3]

#### Multi-dimensional subarrays

In [None]:
#Get the first 2 rows, and the first 3 columns
x2[:2, :3]
#Get all rows, and every other column
x2[:, ::2]
#Reverse only the columns
x2[::-1, :]
#Reverse only the rows
x2[:, ::-1]
#Reverse both rows and columns
x2[::-1, ::-1]

In [None]:
#Get first row of x2
x2[0, :]
#Get first column of x2
x2[:, 0]
#These are equivalent
x2[0, :] == x2[0]

Some important notes on subarrays:
* Subarrays are no-copy views: array slices return views rather than copies of the array daya. In Python, however, slices are copies.
* To explicitly copy the data in an array or subarray, use the copy() method.

In [None]:
original = np.array([[1, 2, 3], [4, 5, 6]])
sub_array = original[1]
#Change both sub_array and original
sub_array[0] = 100
sub_array[2] = 471

In [None]:
original = np.array([[1, 2, 3], [4, 5, 6]])
sub_array = (original.copy()[1])
#These instructions only change sub_array. original is not modified
sub_array[0] = 100
sub_array[2] = 471

### Reshaping arrays

In [None]:
#For this to work, the size of the initial array must equal the size of the reshaped array
#If possible, the reshape method will use a no-copy view of the initial array
x = np.arange(1, 10)
print(x)
x.reshape((3,3))
print(x)

### Concatenation of arrays

#### Arrays with the same dimensions (both one-dimensional and multi-dimensional)

In [None]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
z = np.array([7, 8, 9])
np.concatenate([x, z])
np.concatenate([x, y, z])

a = ([1, 2, 3], [4, 5, 6])
b = ([7, 8, 9], [10, 11, 12])


#### Arrays with different dimensions
Use np.vstack([array1, array2]) and np.hstack([array1, array2])


In [None]:
x = np.array([1, 2, 3])
a = ([1, 2, 3], [4, 5, 6])
b = ([7, 8, 9], [10, 11, 12])

np.vstack([x, a])

c = ([13],
     [13])
np.hstack([c, a])

### Splitting arrays
Use np.split(array, [slice_index]), np.vsplit([row_to_split_on]), and np.hsplit([column_to_split_on])

In [None]:
#Splitting of arrays
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
x1, x2, x3 = np.split(x, [3,5])

grid = np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]])
upper, lower = np.vsplit(grid, [2])
left, right = np.hsplit(grid, [2])

### Computation on NumPy Arrays: Universal Functions
Vectorized operations in NumPy are implemented via ufuncs, whose main purpose is to quickly execute repeated operations on values in NumPy arrays.
Unary ufuncs operate on a single input and binary ufuncs operate on two inputs.


#### ufuncs operations

In [None]:
x = np.arange(6)
print("x = ", x)
print("x + 5 = ", x + 5)
print("x - 5 = ", x - 5)
print("x * 2= ", x * 2)
print("x ^ 2= ", x ** 2)
print("x / 2= ", x / 2)
print("x % 2= ", x % 2)
print("x // 2= ", x // 2) #Floor division
print("-x = ", -x)
print("(3 + x) * 2 = ", (3 + x) * 2)

x = np.array([-2, -1, 0, 1, 2])
abs(x)
np.absolute(x)
abs(x) == np.absolute(x)

### Aggregations
Many of NumPy's aggregation operations exist in Python, but NumPy's versions operate more quickly. Here are some of NumPy's aggregation operations:
* np.sum: Compute sum of elements
* np.prod: Compute product of elements
* np.mean: Compute mean of elements
* np.std: Compute standard deviation
* np.var: Compute variance
* np.min: Find minimum value
* np.max: Find maximum value
* np.argmin: Find index of minimum value
* np.argmax: Find index of maximum value
* np.median: Compute median of elements
* np.percentile: Compute rank-based statistics of elements
* np.any: Evaluate whether any elements are true
* np.all: Evaluate whether all elements are true


In [None]:
x = np.random.random(100)
sum(x) #python's sum
np.sum(x) #numpy's sum
sum(x) == np.sum(x)
#Same results, but numpy's version is faster/more efficient
#because it executes the operation in compiled code, NumPy's version of the operation is computed much more quickly:

In [None]:
x = np.random.random(100)
min(x) #python
max(x) #python

np.min(x) #Numpy
np.max(x) #Numpy

np.sum(x)

#Multidimensional aggregates
x = np.random.random((3, 4))
x.sum()

x.min(axis=0)  #Aggregation functions take an additional argument specifying the axis along which the aggregate is computed. For example, we can find the minimum value within each column by specifying axis=0:
x.max(axis=1)  # Similarly, we can find the maximum value within each row with axis = 1

Axis keyword: specifies the dimension of the array that will be collapsed, rather than the dimension that will be returned. So specifying axis=0 means that the first axis will be collapsed: for two-dimensional arrays, this means that values within each column will be aggregated.

### Broadcasting
For arrays of the same size, binary operations are performed on an element-by-element basis. Broadcasting allows these operations to be performed on arrays of different size.

In [None]:
a = np.array([0, 1, 2])
b = np.array([3, 4, 5])
a+b

#Adding 1d array to 2d array
c = M = np.ones((3, 3))
c + a

a = np.arange(3)
b = np.arange(3)[:, np.newaxis]
c = a + b
print(a)
print(b)
print(c)

#### Rules of broadcasting
1. If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
2. Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
3. If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

### Comparisons and boolean arrays

In [None]:
x = np.array([1, 2, 3, 4, 5])
#Will return boolean array. Element will be True if corresponding element in x is less than 3. 
#Otherwise, element will be False.
x < 3
x > 3
x <= 3
x >= 3
x == 3
x != 3
x == x ** 2
(2 * x) == x ** 2

"""
To count the number of True entries in a Boolean array, np.count_nonzero is useful:
"""
np.count_nonzero(x < 3)

"""
Another way to get at this information is to use np.sum; in this case, False is interpreted as 0, and True is interpreted as 1:
"""
np.sum(x < 3)

"""
The benefit of sum() is that like with other NumPy aggregation functions, this summation can be done along rows or columns as well:
"""

#If any or all are true
np.any(x == 3)
np.all(x == 3)
np.all(x >= 0)


#Boolean Arrays as masks. Will return only the elements in x that are less than 3.
x[x<3]

### Sorting arrays
1. np.sort(array)
    * Returns the sorted array. By default, np.sort() uses a quicksort algorithm. Python has a version of this method, but it is much less efficient.
2. np.argsort(array)
    * Retruns the indices of the sorted elements, instead of the sorted array
3. np.partition(array, n)
    * Returns a new array with the smallest n values to the left of the partition, and the remaining values to the right. Elements on both sides of the partition are ordered arbitrarily.

In [None]:
x = np.array([4, 3, 1, 5, 2])
np.sort(x)


#To sort the array in-place, use the sort method of arrays:
x.sort()
print(x)


In [None]:
#np.argsort with one-dimensional array
x = np.array([4, 3, 1, 5, 2])
i = np.argsort(x)
print(i)


#Sort among rows and columns
x = np.array([[8, 5, 4, 6], [2, 7, 1, 2], [9, 3, 4, 6], [4, 8, 6, 4]])
print(x)

#Sort each column of x
np.sort(x, axis=0)
#Sort each row of x
np.sort(x, axis=1)

In [None]:
x = np.array([61, 13, 39, 44, 26, 7, 31, 88, 6])
np.partition(x, 4) #first 4 values are the 4 smallest