In [None]:
%matplotlib inline
import numpy as np

# Broadcasting

## Example 1: Quick and simple broadcast sum

In [None]:
# Same example as the last time...
A = np.array([0.0, 10.0, 20.0, 30.0]).reshape((4,1))
B = np.array([1.0, 2.0, 3.0])

print A.shape
print B.shape
print A + B
# What the hell just happened?

In [None]:
# But does that make any sense?
# I think so. Check the next examples.

## Example 2: Move all 2d points making its center to the origin

In [None]:
from matplotlib import pyplot as plt
npoints = 4*4
xs = np.tile(np.arange(0, np.sqrt(npoints), dtype=np.float64), np.sqrt(npoints))
ys = np.repeat(np.arange(0, np.sqrt(npoints), dtype=np.float64), np.sqrt(npoints))
pts = np.empty(shape=(npoints, 2), dtype=np.float64)
pts[:,0] = xs
pts[:,1] = ys
plt.scatter(pts[:,0],pts[:,1])
plt.plot(pts[:,0], pts[:,1], 'ro')

In [None]:
print pts

In [None]:
center = np.mean(pts, axis=0)
pts = pts - center # Broadcast!
# pts.shape    = 16 2
# center.shape =    2

# First rule: 1's to the left
# pts.shape    = 16 2
# center.shape =  1 2

# Second rule: Complete 1's with higher values
# pts.shape    = 16 2
# center.shape = 16 2

# Third rule: All shapes are equal. Go on!
plt.scatter(pts[:,0],pts[:,1])
plt.plot(pts[:,0], pts[:,1], 'bo')

## Example 3: Get all points in Y = 1.0 or Y = 2.0 with a tolerance of +/- 0.1

In [None]:
pts = np.array([
  [1.577, 100.7,  2.09],
  [3.3,   2.01,   2.03],
  [4.2,   1.995,  5.333],
  [2.2,   10.8,   8.1],
  [2.2,   1.02,   2.],
  [1.001, 0.9999, 9.9513],
])
valid_ys = np.array([2.0, 1.0])
tolerance = 0.1

In [None]:
# Get the Y values from the original list
y_values = pts[:,1]
print y_values
print y_values.shape

# Proposed solution: Let's first get the difference between the y_values and the valid_ys.
# Problem is: valid_ys also have one dimension, so we won't be able to do broadcasting
print valid_ys.shape

In [None]:
# Solution: reshape
#y_values = y_values.reshape(6,1)
y_values = y_values.reshape(-1,1)
#y_values = y_values[:,np.newaxis]
print y_values
print y_values.shape

In [None]:
# Get the difference between each value with each valid y
difference = y_values - valid_ys # Broadcast!

# Explaining the broadcast:
# y_values.shape = 6, 1
# valid_ys.shape =    2

# First rule: 1's to the left
# y_values.shape = 6, 1
# valid_ys.shape = 1, 2

# Second rule: Complete 1's with higher values
# y_values.shape = 6, 2
# valid_ys.shape = 6, 2

# Third rule: All shapes are equal. Go on! Result shape=6, 2
print difference.shape
print difference

In [None]:
tolerance = 0.1
close_to_tolerance = abs(difference) < tolerance # Broadcast!

# Note: Actually, 'tolerance' is a python 'float':
print type(tolerance)
# But the '<' function will convert it to numpy array to make the operation possible.
# Even if you don't believe me, I'll force a numpy array here to make sure it is:
tolerance = np.array([tolerance])

print type(tolerance)
print difference.shape
print tolerance.shape

In [None]:
# Explaining the broadcast:
# difference.shape = 6, 2
# tolerance.shape  =    1

# First rule: 1's to the left
# difference.shape = 6, 2
# tolerance.shape  = 1, 1

# Second rule: Complete 1's with higher values
# difference.shape = 6, 2
# tolerance.shape  = 6, 2

# Third rule: All shapes are equal. Go on! Result shape=6, 2
close_to_tolerance = abs(difference) < tolerance # Broadcast!

print close_to_tolerance.shape
print close_to_tolerance

In [None]:
# Finally, get the lines where any of the values are True

# np.any: Test whether any array element along a given axis evaluates to True.
result = np.any(close_to_tolerance, axis=1)

# close_to_tolerance.shape = 6, 2
#                            ^  ^
#                            +--|--- Axis 0
#                               +--- Axis 1
print result.shape
print result


In [None]:
print pts
print
print pts[result]
print
print pts[~result]

# Universal Functions

In [None]:
 def my_func(x):
    if x > 10:
        return 0.0
    return x + 1.5

data = np.array([11, 1,2,30,40])
# This wont work
#my_func(data)

f = np.frompyfunc(my_func, 1, 1)
#my_array = np.array([1,2,3], dtype=np.float64)
#a = f(np.float64(10))
#print type(a)
#a.dtype

f2 = np.vectorize(my_func, ...)
#a = f2(data, data)
#print type(a)
#print a
#print a.dtype



In [None]:
from time import time

# Which one is faster?
start = time()
expected = f(np.arange(1e+7))
print time() - start

start = time()
o1 = f2(np.arange(1e+7))
print time() - start
assert np.all(expected == o1)

start = time()
o2 = []
for i in xrange(int(1e+7)):
    o2.append(my_func(i))
print time() - start
assert np.all(expected == o2)

start = time()
data = np.arange(1e+7)
mask = data > 10
data += 1
data[mask] = 0.0
print time() - start
assert np.all(expected == data)

In [None]:
# What else is different? ... (To be cont. ...)