In [None]:
import numpy as np

In [None]:
# Numpy arrays are made up of a pointer to data, a data type (dtype), a shape, and strides.
# Strides are the distance it takes (in bytes) in any dimension to advance to the next element
arr_1d_int = np.zeros((5), dtype=np.int64)

In [None]:
arr_1d_int.dtype

In [None]:
arr_1d_int.shape

In [None]:
# A 64 bit integer takes up 8 bytes - so the stride is 8.
arr_1d_int.strides

In [None]:
arr_2d_int32 = np.zeros((4, 5), dtype=np.int32)

In [None]:
arr_2d_int32.dtype

In [None]:
arr_2d_int32.shape

In [None]:
# A 32 bit integer takes up 4 bytes.
# In the second dimension, a stride is 4 because the next element in that dimension is only 4 bytes away.
# In the first dimension, a stride is 20 because the entire second dimension (of length 5 * 4 bytes) is traversed
# to reach the next element.
arr_2d_int32.strides

In [None]:
np.int64.mro()

In [None]:
np.uint8.mro()

In [None]:
np.float64.mro()

In [None]:
np.string_.mro()

In [None]:
np.object_.mro()

In [None]:
np.issubdtype(np.int64, np.number)

In [None]:
np.issubdtype(np.int64, np.signedinteger)

In [None]:
np.issubdtype(np.uint16, np.signedinteger)

In [None]:
arr = np.arange(16)

In [None]:
# Reshaping arrays in numpy follows two different orderings - C and Fortran 
# (named after the respective programming languages).  By default reshape() uses C ordering, which is row major.
# This means the row is filled out one at a time.
arr.reshape((4, 4))

In [None]:
# C ordering (row major) can be explicitly specified.
arr.reshape((4, 4), order='C')

In [None]:
# Fortran ordering (column major) fills out columns one at a time.
arr.reshape((4, 4), order='F')

In [None]:
arr = arr.reshape((4, 4))

In [None]:
arr.ravel()

In [None]:
arr.flatten()

In [None]:
arr

In [None]:
arr.ravel('F')

In [None]:
arr

In [None]:
arr1 = np.array([[1], [2]])
arr2 = np.array([[3], [4]])
np.concatenate((arr1, arr2))

In [None]:
np.concatenate((arr1, arr2), axis=0)

In [None]:
np.concatenate((arr1, arr2), axis=1)

In [None]:
# Equivalent to np.concatenate() on axis 0
np.vstack((arr1, arr2))

In [None]:
# Equivalent to np.concatenate() on axis 1
np.hstack((arr1, arr2))

In [None]:
arr = np.arange(10)
np.split(arr, [2, 4, 9])

In [None]:
arr = np.arange(3).repeat(2)
arr

In [None]:
np.array([5]).repeat(5)

In [None]:
np.full([5], 5)

In [None]:
np.tile(arr, (2, 1))

In [None]:
np.tile(arr, (1, 2))

In [None]:
arr1 = np.zeros((3, 3))
arr2 = np.array([1, 2, 3])
arr1[:] = arr2[:, np.newaxis]
arr1

In [None]:
np.add.reduce(np.arange(10))

In [None]:
# Logical AND chained with reduce() is equivalent to all()
arr = np.array([1, 1])
np.logical_and.reduce(arr == 1)

In [None]:
arr = np.arange(16).reshape((4, 4))
np.add.accumulate(arr, axis=0)

In [None]:
np.add.accumulate(arr, axis=1)

In [None]:
np.divide.outer(np.array([3, 6, 9]), np.array([1, 2, 3]))

In [None]:
np.multiply.outer(np.array([3, 6, 9]), np.array([1, 2, 3]))

In [None]:
# Reduce to the following array: [(0)]
np.add.reduceat(np.arange(5), [0, 2, 4])

In [None]:
# Custom ufuncs can be created with frompyfunc().  Note that these functions take a performance hit
# compared to their numpy counterparts.  There is a way to speed up custom functions to numpy-like performance
# with the numba library.
def miles_to_kilometers(miles):
    return miles * 1.609

# Create a custom unary unfunc (takes a single argument) that converts miles to kilometers
mile2km = np.frompyfunc(miles_to_kilometers, 1, 1)

arr = np.array([1, 3, 5])
mile2km(arr)

In [None]:
def comma_separated_strings(x, y):
    return f'{x}, {y}'

# Create a custom binary ufunc (takes two arguments) that concatenates two strings with a comma
cs_str = np.frompyfunc(comma_separated_strings, 2, 1)

arr1 = np.array(['first', 'last'])
arr2 = np.array(['andy', 'jarombek'])

cs_str(arr1, arr2)

In [None]:
# The arrays returned from a custom frompyfunc() function always have the type object
cs_str(arr1, arr2).dtype

In [None]:
mile2km(arr).dtype

In [None]:
# The type can be more specific with the help of the vectorize() function.
mile2km = np.vectorize(miles_to_kilometers, otypes=[np.float64])
mile2km(arr)

In [None]:
mile2km(arr).dtype

In [None]:
cs_str = np.vectorize(comma_separated_strings, otypes=[np.unicode])
cs_str(arr1, arr2)

In [None]:
cs_str(arr1, arr2).dtype

In [None]:
# Unfortunately, these custom ufuncs take a major performance hit
%timeit mile2km(arr)

In [None]:
%timeit arr * 1.609

In [None]:
# More complex data types are possible in numpy arrays
metric_dtype = [('miles', np.int32), ('kilometers', np.float64)]
mi_km_arr = np.array([(1, 1.609), (2, 3.218)], dtype=metric_dtype)
mi_km_arr

In [None]:
mi_km_arr[0]['miles']

In [None]:
mi_km_arr[0]['kilometers']

In [None]:
mi_km_arr['kilometers']

In [None]:
# Sort a numpy array in place, similar to Python arrays
arr = np.array([2, 3, 1])
arr.sort()
arr

In [None]:
# Sort a numpy array, returning a new array instance
np.sort(np.array([2, 3, 1]))

In [None]:
arr = np.array([[9, 6, 3], [8, 5, 2], [7, 4, 1]])
np.sort(arr)

In [None]:
np.sort(arr, axis=0)

In [None]:
np.sort(arr, axis=1)

In [None]:
arr = np.array([6, 8, 3, 5, 1])
indexer = arr.argsort()
indexer

In [None]:
arr[indexer]

In [None]:
# You can also use different sorting algorithms (defaults to quick sort)
indexer = arr.argsort(kind='heapsort')
indexer

In [None]:
# The result is the same
arr[indexer]

In [None]:
arr = np.random.randn(1000)
%timeit arr[arr.argsort(kind='quicksort')]

In [None]:
%timeit arr[arr.argsort(kind='mergesort')]

In [None]:
%timeit arr[arr.argsort(kind='heapsort')]

In [None]:
# Perform a binary search on a sorted array.
arr = np.array([1, 2, 4, 8, 16, 32, 64])
arr.searchsorted(16)

In [None]:
arr.searchsorted([2, 8, 32])

In [None]:
mmap = np.memmap('sample_mmap', dtype='float64', mode='w+', shape=(2, 2))
mmap

In [None]:
mmap[0] = 1
mmap

In [None]:
mmap.flush()
del mmap

In [None]:
try:
    mmap
except NameError:
    print("mmap does not exist")

In [None]:
# Revive the memory map
mmap = np.memmap('sample_mmap', dtype='float64', shape=(2, 2))
mmap

In [None]:
arr = np.random.randn(10)
arr.flags

In [None]:
arr = np.arange(4).reshape((2, 2))
arr.flags

In [None]:
arr = np.ones((2, 2), order='F')
arr.flags

In [None]:
%timeit np.ones((100, 100), order='C').sum(1)

In [None]:
%timeit np.ones((100, 100), order='F').sum(1)