In [1]:
import numpy as np

# Elementwise Operations

**1. Basic Operations**

**with scalars**

In [2]:
a = np.array([1, 2, 3, 4]) #create an array

a + 1

array([2, 3, 4, 5])

In [3]:
a ** 2

array([ 1,  4,  9, 16])

**All arithmetic operates elementwise**

In [4]:
b = np.ones(4) + 1

a - b

array([-1.,  0.,  1.,  2.])

In [5]:
a * b

array([ 2.,  4.,  6.,  8.])

In [6]:
# Arithmetic operations are  much faster than if you did them in pure python:

a = np.arange(10000)   #create an array

%timeit a + 1

The slowest run took 12.96 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 6.26 µs per loop


In [7]:
lst = range(10000)

%timeit [i+1 for i in lst]

1000 loops, best of 3: 430 µs per loop


In [8]:
#Array multiplication not matrix multiplication

c = np.diag([1, 2, 3, 4])

c * c

array([[ 1,  0,  0,  0],
       [ 0,  4,  0,  0],
       [ 0,  0,  9,  0],
       [ 0,  0,  0, 16]])

In [9]:
#matrix multiplication
c = np.ones((3, 3))

c.dot(c)

array([[ 3.,  3.,  3.],
       [ 3.,  3.,  3.],
       [ 3.,  3.,  3.]])

**comparisions**

In [10]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 2, 2, 4])
a == b

array([False,  True, False,  True], dtype=bool)

In [11]:
a > b

array([False, False,  True, False], dtype=bool)

In [12]:
#array-wise comparisions
a = np.array([1, 2, 3, 4])
b = np.array([5, 2, 2, 4])
c = np.array([1, 2, 3, 4])

np.array_equal(a, b)

False

In [13]:
np.array_equal(a, c)

True

**Logical Operations**

In [14]:
a = np.array([1, 1, 0, 0], dtype=bool)
b = np.array([1, 0, 1, 0], dtype=bool)

np.logical_or(a, b)

array([ True,  True,  True, False], dtype=bool)

In [15]:
np.logical_and(a, b)

array([ True, False, False, False], dtype=bool)

**Transcendental functions:**

In [16]:
a = np.arange(5)

np.sin(a)   

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [17]:
np.log(a)

  """Entry point for launching an IPython kernel.


array([       -inf,  0.        ,  0.69314718,  1.09861229,  1.38629436])

In [18]:
np.exp(a)   #evaluates e^x for each element in a given input

array([  1.        ,   2.71828183,   7.3890561 ,  20.08553692,  54.59815003])

**Shape Mismatch**

In [19]:
a = np.arange(4)

a + np.array([1, 2])

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

**Transposition**

In [None]:
np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], 0) #Upper triangle of an array.

#Return a copy of a matrix with the elements below the k-th diagonal zeroed.

#try for lower triangle of an array usin        tril

# Basic Reductions

**computing sums**

In [None]:
x = np.array([1, 2, 3, 4])
np.sum(x)

In [None]:
#sum by rows and by columns

x = np.array([[1, 1], [2, 2]])
x

In [None]:
x.sum(axis=0)   #columns first dimention

In [None]:
x.sum(axis=1)  #rows (second dimention)

**Other reductions**

In [None]:
x = np.array([1, 3, 2])
x.min()

In [None]:
x.max()

In [None]:
x.argmin()# index of minimum element

In [None]:
x.argmax()# index of maximum element

**Logical Operations**

In [None]:
np.all([True, True, False])

In [None]:
np.any([True, False, False])

In [20]:
#Note: can be used for array comparisions
a = np.zeros((50, 50))
np.any(a != 0)

False

In [21]:
np.all(a == a)

True

In [22]:
a = np.array([1, 2, 3, 2])
b = np.array([2, 2, 3, 2])
c = np.array([6, 4, 4, 5])
((a <= b) & (b <= c)).all()

True

**Statistics**

In [23]:
x = np.array([1, 2, 3, 1])
y = np.array([[1, 2, 3], [5, 6, 1]])
x.mean()

1.75

In [24]:
np.median(x)

1.5

In [25]:
np.median(y, axis=-1) # last axis

array([ 2.,  5.])

In [26]:
x.std()          # full population standard dev.

0.82915619758884995

**Example:**

Data in populations.txt describes the populations of hares and lynxes (and carrots) in northern Canada during 20 years.


In [27]:
#load data into numpy array object
data = np.loadtxt('populations.txt')

In [28]:
data

array([[  1900.,  30000.,   4000.,  48300.],
       [  1901.,  47200.,   6100.,  48200.],
       [  1902.,  70200.,   9800.,  41500.],
       [  1903.,  77400.,  35200.,  38200.],
       [  1904.,  36300.,  59400.,  40600.],
       [  1905.,  20600.,  41700.,  39800.],
       [  1906.,  18100.,  19000.,  38600.],
       [  1907.,  21400.,  13000.,  42300.],
       [  1908.,  22000.,   8300.,  44500.],
       [  1909.,  25400.,   9100.,  42100.],
       [  1910.,  27100.,   7400.,  46000.],
       [  1911.,  40300.,   8000.,  46800.],
       [  1912.,  57000.,  12300.,  43800.],
       [  1913.,  76600.,  19500.,  40900.],
       [  1914.,  52300.,  45700.,  39400.],
       [  1915.,  19500.,  51100.,  39000.],
       [  1916.,  11200.,  29700.,  36700.],
       [  1917.,   7600.,  15800.,  41800.],
       [  1918.,  14600.,   9700.,  43300.],
       [  1919.,  16200.,  10100.,  41300.],
       [  1920.,  24700.,   8600.,  47300.]])

In [29]:
year, hares, lynxes, carrots = data.T #columns to variables

In [31]:
#The mean population over time
populations = data[:, 1:]
populations

array([[ 30000.,   4000.,  48300.],
       [ 47200.,   6100.,  48200.],
       [ 70200.,   9800.,  41500.],
       [ 77400.,  35200.,  38200.],
       [ 36300.,  59400.,  40600.],
       [ 20600.,  41700.,  39800.],
       [ 18100.,  19000.,  38600.],
       [ 21400.,  13000.,  42300.],
       [ 22000.,   8300.,  44500.],
       [ 25400.,   9100.,  42100.],
       [ 27100.,   7400.,  46000.],
       [ 40300.,   8000.,  46800.],
       [ 57000.,  12300.,  43800.],
       [ 76600.,  19500.,  40900.],
       [ 52300.,  45700.,  39400.],
       [ 19500.,  51100.,  39000.],
       [ 11200.,  29700.,  36700.],
       [  7600.,  15800.,  41800.],
       [ 14600.,   9700.,  43300.],
       [ 16200.,  10100.,  41300.],
       [ 24700.,   8600.,  47300.]])

In [32]:
#sample standard deviations
populations.std(axis=0)

array([ 20897.90645809,  16254.59153691,   3322.50622558])

In [33]:
#which speicies has the highest population each year?

np.argmax(populations, axis=1)

array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2])

# Broadcasting

Basic operations on numpy arrays (addition, etc.) are elementwise

This works on arrays of the same size.
    Nevertheless, It’s also possible to do operations on arrays of different sizes if NumPy can transform these arrays     so that they all have the same size: this conversion is called broadcasting.

The image below gives an example of broadcasting:

![title](broadcasting.png)

In [51]:
a = np.tile(np.arange(0, 40, 10), (3, 1)).T
a

array([[ 0,  0,  0],
       [10, 10, 10],
       [20, 20, 20],
       [30, 30, 30]])

In [52]:
b = np.array([0, 1, 2])

In [53]:
a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [55]:
a = np.arange(0, 40, 10)
a.shape

(4,)

In [56]:
a = a[:, np.newaxis]  # adds a new axis -> 2D array
a.shape

(4, 1)

In [57]:
a

array([[ 0],
       [10],
       [20],
       [30]])

In [58]:
a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

the numpy.ogrid() function allows to directly create vectors x and y of the previous example, with two “significant dimensions”:

In [59]:
x, y = np.ogrid[0:5, 0:5]

In [60]:
x, y

(array([[0],
        [1],
        [2],
        [3],
        [4]]), array([[0, 1, 2, 3, 4]]))

So, np.ogrid is very useful as soon as we have to handle computations on a grid. On the other hand, np.mgrid directly provides matrices full of indices for cases where we can’t (or don’t want to) benefit from broadcasting:

In [61]:
x, y = np.mgrid[0:4, 0:4]

In [62]:
x

array([[0, 0, 0, 0],
       [1, 1, 1, 1],
       [2, 2, 2, 2],
       [3, 3, 3, 3]])

In [63]:
y

array([[0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3]])

# Array Shape Manipulation

**Flattening**

In [64]:
a = np.array([[1, 2, 3], [4, 5, 6]])
a.ravel() #Return a contiguous flattened array. A 1-D array, containing the elements of the input, is returned. A copy is made only if needed.

array([1, 2, 3, 4, 5, 6])

In [65]:
a.T #Transpose

array([[1, 4],
       [2, 5],
       [3, 6]])

In [66]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

**Reshaping**

The inverse operation to flattening:

In [67]:
a.shape

(2, 3)

In [68]:
b = a.ravel()

In [70]:
b = b.reshape((2, 3))
b

array([[1, 2, 3],
       [4, 5, 6]])

In [71]:
#or we can also do
a.reshape((2, -1))

array([[1, 2, 3],
       [4, 5, 6]])

In [72]:
b[0, 0] = 100
a

array([[100,   2,   3],
       [  4,   5,   6]])

**Note        Beware: reshape may also return a copy!:**

In [73]:
a = np.zeros((3, 2))
b = a.T.reshape(3*2)
b[0] = 50
a

array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]])

**Adding a Dimension**

Indexing with the np.newaxis object allows us to add an axis to an array

newaxis is used to increase the dimension of the existing array by one more dimension, when used once. Thus,

1D array will become 2D array

2D array will become 3D array

3D array will become 4D array and so on

In [74]:
z = np.array([1, 2, 3])
z

array([1, 2, 3])

In [75]:
z[:, np.newaxis]

array([[1],
       [2],
       [3]])

**Dimension Shuffling**

In [76]:
a = np.arange(4*3*2).reshape(4, 3, 2)
a.shape

(4, 3, 2)

In [77]:
a

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15],
        [16, 17]],

       [[18, 19],
        [20, 21],
        [22, 23]]])

In [78]:
a[0, 2, 1]

5

In [79]:
b = a.transpose(1, 2, 0)
b.shape

(3, 2, 4)

In [80]:
b[2, 1, 0]

5

**Resizing**

In [81]:
a = np.arange(4)
a.resize((8,))
a

array([0, 1, 2, 3, 0, 0, 0, 0])

However, it must not be referred to somewhere else:

In [82]:
b = a
a.resize((4,)) 

ValueError: cannot resize an array that references or is referenced
by another array in this way.  Use the resize function

**Sorting Data**

In [83]:
#Sorting along an axis:
a = np.array([[5, 4, 6], [2, 3, 2]])
b = np.sort(a, axis=1)
b

array([[4, 5, 6],
       [2, 2, 3]])

In [84]:
#in-place sort
a.sort(axis=1)
a

array([[4, 5, 6],
       [2, 2, 3]])

In [85]:
#sorting with fancy indexing
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
j

array([2, 3, 1, 0])

In [86]:
a[j]

array([1, 2, 3, 4])

In [87]:
#Finding minima and maxima:
a = np.array([4, 3, 1, 2])
j_max = np.argmax(a)
j_min = np.argmin(a)
j_max, j_min

(0, 2)