In [2]:
import numpy as np

In [7]:
# Scalar ops
a = np.array([1,2,3,4])
a_plus = a + 1 # scalar addition
a_prod = 2*a # scalar multiplication
a_exp = a**2 # scalar exponentiation

print(a_plus)
print(a_prod)
print(a_exp)

[2 3 4 5]
[2 4 6 8]
[ 1  4  9 16]


In [21]:
# All arithetic operates element-wise

# Scalar addition (again)
b = np.ones(4) + 1
print("B = " + str(b))

# Vector addition/subtraction
a_minus_b = a - b
print("A - B = " + str(a_minus_b))

# Vector multiplication
a_prod_b = a * b
print("A * B = " + str(a_prod_b))

# Combining multiple operations in a single statement
j = np.arange(5)
print(2**(j + 1) - j)

B = [ 2.  2.  2.  2.]
A - B = [-1.  0.  1.  2.]
A * B = [ 2.  4.  6.  8.]
[ 2  3  6 13 28]


In [22]:
# Pure Python vs NumPy performance characteristics

a = np.arange(10000)
%timeit a + 1

The slowest run took 9.71 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 5.68 µs per loop


In [25]:
l = range(10000)
%timeit [i + 1 for i in l]

1000 loops, best of 3: 684 µs per loop


In [26]:
# NumPy is much faster

In [28]:
# Matrix multiplication

# ! This is NOT matrix multiplication, it is array multiplication
c = np.ones((3,3))
print("C = \n" + str(c))
c_array_squared = c * c
print("C * C = \n" + str(c_array_squared))

# This IS matrix multiplication (dot product)
c_matrix_squared = c.dot(c)
print("C dot C = \n" + str(c_matrix_squared))

C = 
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]
C * C = 
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]
C dot C = 
[[ 3.  3.  3.]
 [ 3.  3.  3.]
 [ 3.  3.  3.]]


In [75]:
# Exercises

# Add even elements with odd elements and time them against vanilla Python
x = np.arange(20)
even = x[np.arange(0,20,2)]
odd = x[np.arange(1,20,2)]
print("Even = " + str(even))
print("Odd = " + str(odd))
print("Even + Odd (element-wise) = " + str(even + odd))
%timeit x = np.arange(20); x[np.arange(0,20,2)] + x[np.arange(1,20,2)]
def is_even(n): return n % 2 == 0
def is_odd(n): return not is_even(n)
y = range(20)
even_y = filter(is_even, y)
odd_y = filter(is_odd, y)
print("Even + Odd = " + str([a[0] + a[1] for a in zip(even_y, odd_y)]))
%timeit y = range(20); [a[0] + a[1] for a in zip(filter(is_even, y), filter(is_odd, y))]

Even = [ 0  2  4  6  8 10 12 14 16 18]
Odd = [ 1  3  5  7  9 11 13 15 17 19]
Even + Odd (element-wise) = [ 1  5  9 13 17 21 25 29 33 37]
The slowest run took 11.50 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.67 µs per loop
Even + Odd = [1, 5, 9, 13, 17, 21, 25, 29, 33, 37]
100000 loops, best of 3: 12.2 µs per loop


In [77]:
# Generate powers of 2 up to the 4th power
print("NumPy Version")
print(2**np.arange(0,5))
%timeit 2**np.arange(0,5)
print("Vanilla Python Version")
print([2**x for x in range(0,5)])
%timeit [2**x for x in range(0,5)]

NumPy Version
[ 1  2  4  8 16]
The slowest run took 15.11 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.91 µs per loop
Vanilla Python Version
[1, 2, 4, 8, 16]
1000000 loops, best of 3: 1.92 µs per loop


In [78]:
# I'm not sure what the last exercise is asking me for...

In [79]:
# Other operations

In [81]:
# Comparisons
a = np.array([1,2,3,4])
b = np.array([4,2,2,4])
print("a == b? " + str(a == b))
print("a > b? " + str(a > b))
# All comparisons are elementwise and create a boolean array

a == b? [False  True False  True]
a > b? [False False  True False]


In [83]:
# But there is a way to do array-wise comparisons
a = np.array([1,2,3,4])
b = np.array([4,2,2,4])
c = np.array([1,2,3,4])
print("Does array 'a' == array 'b'? " + str(np.array_equal(a,b)))
print("Does array 'a' == array 'c'? " + str(np.array_equal(a,c)))

Does array 'a' == array 'b'? False
Does array 'a' == array 'c'? True


In [100]:
# Logical Operations
a = np.array([1,1,0,0], dtype=bool)
b = np.array([1,0,1,0], dtype=bool)
print(np.logical_or(a,b))
print(np.logical_and(a,b))
out = np.zeros(4, dtype=bool)
print(np.array_equal(np.logical_not(a,out), out))
print(out)
print(np.logical_xor(a,b))

[ True  True  True False]
[ True False False False]
True
[False False  True  True]
[False  True  True False]


In [102]:
# Transcendental functions
a = np.arange(5)
print(np.sin(a))
print(np.log(a))
print(np.exp(a))

[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025 ]
[       -inf  0.          0.69314718  1.09861229  1.38629436]
[  1.           2.71828183   7.3890561   20.08553692  54.59815003]




In [103]:
# Shape mismatches
a = np.arange(5)
a + np.array([1,2])

ValueError: operands could not be broadcast together with shapes (5,) (2,) 

In [116]:
a = np.triu(np.ones((5, 5)), 1)
print(a)
print(a.T) # Transpose
print(np.tril(np.ones((5,5)), 1))

[[ 0.  1.  1.  1.  1.]
 [ 0.  0.  1.  1.  1.]
 [ 0.  0.  0.  1.  1.]
 [ 0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.]]
[[ 0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.]
 [ 1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.]
 [ 1.  1.  1.  1.  0.]]
[[ 1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.]
 [ 1.  1.  1.  1.  0.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]


In [118]:
# Exercises

np.allclose?
# This is useful, for example, when comparing float arrays, since the floats might not be exactly the same all of the
# time, or when comparing lab results which allow for a certain % error.

In [119]:
np.triu?

In [120]:
np.tril?

In [123]:
# Computing Sums
x = np.arange(5)
print(x)
print(np.sum(x))
print(x.sum())

[0 1 2 3 4]
10
10


In [131]:
# Sum by rows and by columns
x = np.array([[1,1],[2,2]])
print(x)
print("Sum columns = " + str(x.sum(axis=0)))
print("Sum columns = " + str((x[:, 0].sum(), x[:, 1].sum())))
print("Sum rows = " + str(x.sum(axis=1)))
print("Sum rows = " + str((x[0, :].sum(), x[1, :].sum())))

[[1 1]
 [2 2]]
Sum columns = [3 3]
Sum columns = (3, 3)
Sum rows = [2 4]
Sum rows = (2, 4)


In [133]:
# Higher dimensional sums
x = np.random.rand(2,2,2)
print(x)
print(x.sum(axis=2)[0,1])
print(x[0,1,:].sum())

[[[ 0.99679957  0.36676061]
  [ 0.88093997  0.18820736]]

 [[ 0.2475781   0.59004487]
  [ 0.05173794  0.0318825 ]]]
1.06914733676
1.06914733676


In [150]:
# Other Reductions
# Extrema
x = np.array([1,2,4,3])
print(x.min())
print(x.max())
print("Index of minimum = " + str(x.argmin()))
print("Index of maximum = " + str(x.argmax()))

1
4
Index of minimum = 0
Index of maximum = 2


In [156]:
# Logical Operations
print(np.all([True, True, False]))
print(np.any([False, True, False]))
zs = np.zeros((100,100))
print(np.any(zs != 0))
print(np.all(zs == 0))
print(np.all(zs == zs))

a = np.array([1,2,3,2])
b = np.array([2,2,3,2])
c = np.array([6,4,4,5])
((a <= b) & (b <= c)).all()

False
True
False
True
True


True

In [164]:
# Statistics
x = np.array([1,2,3,1])
y = np.array([[1,2,3],[5,6,1]])
print(x.mean())
print(np.median(x))
print(np.median(y, axis=-1))
print(np.median(y, axis=0))
x.std() # full population standard deviation

1.75
1.5
[ 2.  5.]
[ 3.  4.  2.]


0.82915619758884995

In [170]:
# Reduction Exercises

# Q: What is there in addition to the 'sum' function?
# A: Product, called 'prod' after a quick lib search to confirm 

# Q: Difference between sum and cumsum?
# A: Sum just returns the final sum, cumsum returns an array containing all the intermediate sums calculated.

In [172]:
# Worked Example: data statistics
!cat data/populations.txt

# year	hare	lynx	carrot
1900	30e3	4e3	48300
1901	47.2e3	6.1e3	48200
1902	70.2e3	9.8e3	41500
1903	77.4e3	35.2e3	38200
1904	36.3e3	59.4e3	40600
1905	20.6e3	41.7e3	39800
1906	18.1e3	19e3	38600
1907	21.4e3	13e3	42300
1908	22e3	8.3e3	44500
1909	25.4e3	9.1e3	42100
1910	27.1e3	7.4e3	46000
1911	40.3e3	8e3	46800
1912	57e3	12.3e3	43800
1913	76.6e3	19.5e3	40900
1914	52.3e3	45.7e3	39400
1915	19.5e3	51.1e3	39000
1916	11.2e3	29.7e3	36700
1917	7.6e3	15.8e3	41800
1918	14.6e3	9.7e3	43300
1919	16.2e3	10.1e3	41300
1920	24.7e3	8.6e3	47300


In [174]:
data = np.loadtxt('data/populations.txt')
year, hares, lynxes, carrots = data.T # Transpose the dataset to store the data to variables by column

In [188]:
one, two, three = data.take([0,1,2,], axis=0) # an example of storing data to variables by row

In [196]:
from matplotlib import pyplot as plt
plt.axes([0.2, 0.1, 0.5, 0.8])
plt.plot(year,hares,year,lynxes,year,carrots)
plt.legend(('Hare','Lynx','Carrot'), loc=(1.05,.05))

<matplotlib.legend.Legend at 0x10c011dd8>

In [197]:
plt.show()

In [198]:
# The mean populations over time
populations = data[:, 1:]
populations.mean(axis=0)

array([ 34080.95238095,  20166.66666667,  42400.        ])

In [199]:
# The std deviations of the dataset
populations.std(axis=0)

array([ 20897.90645809,  16254.59153691,   3322.50622558])

In [200]:
# Which species has the highest population each year?
np.argmax(populations, axis=1)

array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2])

In [215]:
# ^ but with labels
list(map(lambda n: {0: 'Hare', 1: 'Lynx', 2: 'Carrot'}[n], np.argmax(populations, axis=1)))

['Carrot',
 'Carrot',
 'Hare',
 'Hare',
 'Lynx',
 'Lynx',
 'Carrot',
 'Carrot',
 'Carrot',
 'Carrot',
 'Carrot',
 'Carrot',
 'Hare',
 'Hare',
 'Hare',
 'Lynx',
 'Carrot',
 'Carrot',
 'Carrot',
 'Carrot',
 'Carrot']

In [235]:
# Worked Example
# Diffusion using a random walk algorithm

n_stories = 1000 # number of walkers
t_max = 200      # time duration for which each walker is followed

# Randomly 1 or -1 for each step of the walk at each time unit
t = np.arange(t_max) # the time axis
steps = 2 * np.random.random_integers(0,1,(n_stories, t_max)) - 1 # this creates random arrays of 1s and -1s
np.array_equal(np.setdiff1d(np.unique(steps), np.array([1, -1])), [])

True

In [239]:
# Build the walks by summing the steps along the timeline
positions = np.cumsum(steps, axis=1) # axis 1 = timeline
sq_distance = positions**2
sq_distance

array([[  1,   0,   1, ...,   4,   9,  16],
       [  1,   0,   1, ...,  64,  81,  64],
       [  1,   4,   1, ...,  16,   9,  16],
       ..., 
       [  1,   0,   1, ..., 484, 441, 400],
       [  1,   0,   1, ..., 100,  81, 100],
       [  1,   4,   9, ...,  64,  81,  64]])

In [241]:
# Get the mean in the stories axis
mean_sq_distance = np.mean(sq_distance, axis=0) # axis 0 = storyline
mean_sq_distance

array([   1.   ,    1.92 ,    2.88 ,    4.048,    5.096,    6.14 ,
          7.12 ,    8.24 ,    9.064,   10.288,   11.176,   12.06 ,
         12.904,   13.944,   15.528,   16.088,   16.96 ,   17.844,
         18.96 ,   20.02 ,   20.16 ,   20.82 ,   22.32 ,   23.716,
         24.552,   25.364,   26.576,   27.716,   28.736,   29.596,
         30.464,   31.392,   31.408,   31.948,   32.592,   33.72 ,
         35.024,   35.976,   37.096,   37.948,   39.16 ,   40.308,
         41.   ,   42.124,   43.432,   43.684,   44.52 ,   46.088,
         48.04 ,   49.224,   50.696,   50.284,   50.808,   51.844,
         53.368,   53.92 ,   55.136,   56.66 ,   56.48 ,   57.32 ,
         58.76 ,   60.352,   61.512,   62.368,   63.976,   64.572,
         65.16 ,   66.08 ,   67.84 ,   69.444,   70.24 ,   71.308,
         72.304,   73.848,   74.592,   74.808,   74.952,   76.284,
         76.752,   78.432,   80.272,   80.58 ,   82.048,   83.048,
         82.832,   83.38 ,   83.6  ,   84.344,   85.72 ,   87.

In [243]:
# plot the results
plt.figure(figsize=(4,3))
plt.plot(t, np.sqrt(mean_sq_distance), 'g.', t, np.sqrt(t), 'y-')
plt.xlabel(r"$t$")
plt.ylabel(r"$\sqrt{\langle (\delta x)^2 \rangle}$")
plt.show()

In [244]:
# as you can see, this is a logarithmic curve - it grows by the sqrt of the time

In [257]:
# Broadcasting (performing operations on different-sized arrays)
a = np.tile(np.arange(0, 40, 10), (3, 1)).T
print(a)
b = np.array([0, 1, 2])
print()
print(b)

# Add the matrix A to the array B even though they differ in size on the y dimension
print()
print(a + b)

[[ 0  0  0]
 [10 10 10]
 [20 20 20]
 [30 30 30]]

[0 1 2]

[[ 0  1  2]
 [10 11 12]
 [20 21 22]
 [30 31 32]]


In [259]:
# Another example of broadcasting at play
a = np.ones((4,5))
print(a)
print()
a[0] = 2 # Assign an array of dimension 0 to an array of dimension 1. Observe, 2 propogates the full size of the array.
print(a)

[[ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]

[[ 2.  2.  2.  2.  2.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]


In [263]:
# A useful trick
a = np.arange(0, 40, 10) # remember -> arange([min, max), step)
print(a)
print()
print(a.shape)
a = a[:, np.newaxis] # adds a new axis to make this a 2D array (now, it's a column instead of a row)
print(a.shape)
print()
print(a); print()
print(b); print()
print(a + b) # A is extended rowwise to match B's max rows, B is extended columnwise to match A's max columns.

[ 0 10 20 30]

(4,)
(4, 1)

[[ 0]
 [10]
 [20]
 [30]]

[0 1 2]

[[ 0  1  2]
 [10 11 12]
 [20 21 22]
 [30 31 32]]


In [268]:
# Worked Example: Broadcasting

# Construct an array of distances in miles between cities of Route 66: [Chicago, Springfield, Saint Louis, Tulsa,
# Oklahoma City, Amarillo, Santa Fe, Albuquerque, Flagstaff, Los Angeles].

mileposts = np.array([0, 198, 303, 736, 871, 1175, 1475, 1544, 1913, 2448])
print(mileposts[:])
print()
print(mileposts[:, np.newaxis])
print()
distance_array = np.abs(mileposts - mileposts[:, np.newaxis])
distance_array

[   0  198  303  736  871 1175 1475 1544 1913 2448]

[[   0]
 [ 198]
 [ 303]
 [ 736]
 [ 871]
 [1175]
 [1475]
 [1544]
 [1913]
 [2448]]



array([[   0,  198,  303,  736,  871, 1175, 1475, 1544, 1913, 2448],
       [ 198,    0,  105,  538,  673,  977, 1277, 1346, 1715, 2250],
       [ 303,  105,    0,  433,  568,  872, 1172, 1241, 1610, 2145],
       [ 736,  538,  433,    0,  135,  439,  739,  808, 1177, 1712],
       [ 871,  673,  568,  135,    0,  304,  604,  673, 1042, 1577],
       [1175,  977,  872,  439,  304,    0,  300,  369,  738, 1273],
       [1475, 1277, 1172,  739,  604,  300,    0,   69,  438,  973],
       [1544, 1346, 1241,  808,  673,  369,   69,    0,  369,  904],
       [1913, 1715, 1610, 1177, 1042,  738,  438,  369,    0,  535],
       [2448, 2250, 2145, 1712, 1577, 1273,  973,  904,  535,    0]])

In [269]:
# Compute the distance of each point from the origin on a 10x10 grid
x,y = np.arange(5), np.arange(5)[:, np.newaxis]
distance = np.sqrt(x**2 + y**2)  # pythagorean theorem
distance

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  4.        ],
       [ 1.        ,  1.41421356,  2.23606798,  3.16227766,  4.12310563],
       [ 2.        ,  2.23606798,  2.82842712,  3.60555128,  4.47213595],
       [ 3.        ,  3.16227766,  3.60555128,  4.24264069,  5.        ],
       [ 4.        ,  4.12310563,  4.47213595,  5.        ,  5.65685425]])

In [271]:
plt.pcolor(distance)
plt.colorbar()
plt.show()

In [280]:
# the 'ogrid' function allows you to do the same as the above more succinctly
x, y = np.ogrid[0:5, 0:5]
print(x)
print(y)
print(x.shape, y.shape)
distance = np.sqrt(x**2 + y**2)
distance

[[0]
 [1]
 [2]
 [3]
 [4]]
[[0 1 2 3 4]]
(5, 1) (1, 5)


array([[ 0.        ,  1.        ,  2.        ,  3.        ,  4.        ],
       [ 1.        ,  1.41421356,  2.23606798,  3.16227766,  4.12310563],
       [ 2.        ,  2.23606798,  2.82842712,  3.60555128,  4.47213595],
       [ 3.        ,  3.16227766,  3.60555128,  4.24264069,  5.        ],
       [ 4.        ,  4.12310563,  4.47213595,  5.        ,  5.65685425]])

In [284]:
plt.pcolor(distance)
plt.colorbar()
plt.show()

In [288]:
# You can also use mgrid where broadcasting is not possible or desireable
x, y = np.mgrid[0:5, 0:5]
print(x)
print()
print(y)

[[0 0 0 0 0]
 [1 1 1 1 1]
 [2 2 2 2 2]
 [3 3 3 3 3]
 [4 4 4 4 4]]

[[0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]]


In [289]:
# Array shape manipulation

# flattenning
a = np.array([[1,2,3],[4,5,6]])
a.ravel()

array([1, 2, 3, 4, 5, 6])

In [290]:
a.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [291]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

In [297]:
# Reshaping
print(a.shape)
b = a.ravel()
print(b)
b = b.reshape((2,3))
print(b)
print()
print(a)
print(np.array_equal(a, b))

(2, 3)
[1 2 3 4 5 6]
[[1 2 3]
 [4 5 6]]

[[1 2 3]
 [4 5 6]]
True


In [298]:
a.reshape((2, -1)) # The value at (-1) is taken to be an inferred value, which evenly splits array to 2 of equal size.

array([[1, 2, 3],
       [4, 5, 6]])

In [303]:
print(b)
print()
b[0, 0] = 99
print(b[0, 0])

# Beware, reshape, ravel, and other shaping functions are not guaranteed to return a deep copy
print(a[0,0])

[[99  2  3]
 [ 4  5  6]]

99
99


In [304]:
# But reshape MAY return a copy in some cases
a = np.zeros((3,2))
b = a.T.reshape(3*2)
b[0] = 9
a

array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]])

In [None]:
# for an intuitive understanding of this behavior, look into the memory layout of the NumPy lib.

In [313]:
# Adding a Dimension
a = np.arange(4*3*2)
x = a.reshape(4,3,2)
print(a)
print(x)
print()
print(x.shape)
b = x.transpose(1,2,0)
print(b.shape)
print()
print(b)
print()
print(b[2,1,0])

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]

 [[12 13]
  [14 15]
  [16 17]]

 [[18 19]
  [20 21]
  [22 23]]]

(4, 3, 2)
(3, 2, 4)

[[[ 0  6 12 18]
  [ 1  7 13 19]]

 [[ 2  8 14 20]
  [ 3  9 15 21]]

 [[ 4 10 16 22]
  [ 5 11 17 23]]]

5


In [316]:
# The above also operates on views, not deep copies
b[2,1,0] = -1
x[0,2,1]

-1

In [317]:
# Resizing

# The size of an array can be changed with ndarray.resize.  This will mutate the original data structure.
a = np.arange(4)
a.resize((8,))
a

array([0, 1, 2, 3, 0, 0, 0, 0])

In [318]:
# However, it must not be referred to somewhere else.
b = a
a.resize((4,))

ValueError: cannot resize an array that references or is referenced
by another array in this way.  Use the resize function

In [319]:
# Exercises

# 1. Read the docstring for reshape
np.ndarray.reshape?

In [320]:
np.reshape?

In [326]:
# 2. Use flatten as an alternative to ravel. Specify the differences (particularly with regards to which returns a
#    view vs a copy)
a = np.array([[1,2,3],[4,5,6]])
print(a)
print()
f = a.flatten()
r = a.ravel()
print(f)
print()
print(r)
print()
f[0] = 999
print(a)
print()
print(f)
print()
r[0] = 999
print(a)
print()
r

[[1 2 3]
 [4 5 6]]

[1 2 3 4 5 6]

[1 2 3 4 5 6]

[[1 2 3]
 [4 5 6]]

[999   2   3   4   5   6]

[[999   2   3]
 [  4   5   6]]



array([999,   2,   3,   4,   5,   6])

In [327]:
# ^ ravel uses a view, flatten uses a copy.  When memory is critical, use ravel, and use flatten for correctness.

In [335]:
np.transpose?

In [330]:
np.ones((1,2,3))

array([[[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]])

In [338]:
x = np.array([[[1,2,3],[4,5,6]]])
print(x)
print()
print("Transpose default")
print(x.transpose())
print()
print("Transpose to specific dimension")
print(x.transpose((1,0,2)))
print(x.transpose((1,0,2)).shape)

[[[1 2 3]
  [4 5 6]]]

Transpose default
[[[1]
  [4]]

 [[2]
  [5]]

 [[3]
  [6]]]

Transpose to specific dimension
[[[1 2 3]]

 [[4 5 6]]]
(2, 1, 3)


In [339]:

# Sorting Data


In [342]:
# Sorting along an axis
a = np.array([[4,3,5],[6,2,1]])
b = np.sort(a, axis=1)
print(b)
b = np.sort(a, axis=0)
print()
print(b)

[[3 4 5]
 [1 2 6]]

[[4 2 1]
 [6 3 5]]


In [343]:
# in-place sort
a.sort(axis=1)
a

array([[3, 4, 5],
       [1, 2, 6]])

In [345]:
# Return the array indices in the order that sorts the array contents.
a = np.array([4,3,1,2])
j = np.argsort(a)
print(j)
a[j]

[2 3 1 0]


array([1, 2, 3, 4])

In [346]:
# Finding minima and maxima
a = np.array([4,3,1,2])
j_max = np.argmax(a)
j_min = np.argmin(a)
j_max, j_min

(0, 2)

In [353]:
# Exercises
x = np.arange(12)
np.random.shuffle(x)
x

array([ 9,  7,  2,  6, 10,  0,  4,  5,  1,  3, 11,  8])

In [370]:
# note that this only shuffles the whole rows, and then the elements within each row, but will not shuffle elements to 
# a different row.
x = np.arange(16).reshape((4,4))
print(x)
print()
np.random.shuffle(x)
print(x)
np.random.shuffle(x.T)
x

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

[[12 13 14 15]
 [ 8  9 10 11]
 [ 4  5  6  7]
 [ 0  1  2  3]]


array([[15, 14, 13, 12],
       [11, 10,  9,  8],
       [ 7,  6,  5,  4],
       [ 3,  2,  1,  0]])

In [371]:
# To shuffle everything completely randomly, flatten the matrix using ravel to grap a view, shuffle the view, and then
# the original matrix will also be shuffled.
x = np.arange(16).reshape((4,4))
print(x)
np.random.shuffle(x.ravel())
x

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


array([[ 8, 10,  5,  7],
       [15, 13,  9,  1],
       [ 0, 11,  2, 12],
       [ 6, 14,  4,  3]])

In [372]:
# To re-sort x
x.ravel().sort()
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [392]:
# Or to sort first and then create a matrix
x = np.random.random_integers(0, 100, 64)
x.sort()
y = x.reshape((4,4,4))
y

array([[[ 1,  2,  4,  6],
        [ 6,  7,  8, 10],
        [14, 15, 15, 17],
        [18, 18, 26, 27]],

       [[28, 31, 31, 32],
        [33, 35, 36, 42],
        [43, 48, 48, 49],
        [53, 53, 53, 60]],

       [[62, 63, 65, 67],
        [68, 70, 71, 71],
        [72, 74, 75, 77],
        [77, 78, 79, 80]],

       [[81, 82, 84, 85],
        [85, 86, 86, 87],
        [88, 90, 90, 91],
        [95, 97, 98, 98]]])

In [395]:
# Sorting using axis
np.random.shuffle(y.ravel())
print(y)

# sort by column
y.sort(axis=1)
y

[[[53 10 42 49]
  [82 98 85 33]
  [35 71 53 17]
  [ 2 85 14 31]]

 [[98  6 48  7]
  [81 15 18 18]
  [86 70 26 53]
  [65 79 28 90]]

 [[95 77 27 88]
  [86 80  6  1]
  [87 67 62 72]
  [71 60 43 48]]

 [[84 91 97 74]
  [63  8 31 78]
  [77  4 90 15]
  [75 36 68 32]]]


array([[[ 2, 10, 14, 17],
        [35, 71, 42, 31],
        [53, 85, 53, 33],
        [82, 98, 85, 49]],

       [[65,  6, 18,  7],
        [81, 15, 26, 18],
        [86, 70, 28, 53],
        [98, 79, 48, 90]],

       [[71, 60,  6,  1],
        [86, 67, 27, 48],
        [87, 77, 43, 72],
        [95, 80, 62, 88]],

       [[63,  4, 31, 15],
        [75,  8, 68, 32],
        [77, 36, 90, 74],
        [84, 91, 97, 78]]])