In [1]:
import numpy as np

# Elementwise Operations

**1. Basic Operations**

**with scalars**

In [2]:
a = np.array([1, 2, 3, 4]) #create an array

a + 1

array([2, 3, 4, 5])

In [3]:
a ** 2

array([ 1,  4,  9, 16], dtype=int32)

**All arithmetic operates elementwise**

In [4]:
b = np.ones(4) + 1

a - b

array([-1.,  0.,  1.,  2.])

In [5]:
a * b

array([2., 4., 6., 8.])

In [6]:
# Matrix multiplication

c = np.diag([1, 2, 3, 4])

print(c * c)
print("*****************")
print(c.dot(c))

[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]
*****************
[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]


**comparisions**

In [7]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 2, 2, 4])
a == b

array([False,  True, False,  True])

In [8]:
a > b

array([False, False,  True, False])

In [9]:
#array-wise comparisions
a = np.array([1, 2, 3, 4])
b = np.array([5, 2, 2, 4])
c = np.array([1, 2, 3, 4])

np.array_equal(a, b)

False

In [10]:
np.array_equal(a, c)

True

**Logical Operations**

In [11]:
a = np.array([1, 1, 0, 0], dtype=bool)
b = np.array([1, 0, 1, 0], dtype=bool)

np.logical_or(a, b)

array([ True,  True,  True, False])

In [12]:
np.logical_and(a, b)

array([ True, False, False, False])

**Transcendental functions:**

In [13]:
a = np.arange(5)

np.sin(a)   

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [14]:
np.log(a)

  """Entry point for launching an IPython kernel.


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436])

In [15]:
np.exp(a)   #evaluates e^x for each element in a given input

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692, 54.59815003])

**Shape Mismatch**

In [16]:
a = np.arange(4)

a + np.array([1, 2])

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

In [17]:
a = np.arange(4)
b = np.array([4,2])

print(a + b)

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

# Basic Reductions

**computing sums**

In [18]:
x = np.array([1, 2, 3, 4])
np.sum(x)

10

In [19]:
#sum by rows and by columns

x = np.array([[1, 1], [2, 2]])
x

array([[1, 1],
       [2, 2]])

In [20]:
x.sum() # sum each element, consider whole matrix as a list


6

In [21]:
x.sum(axis=0)   #columns first dimension

array([3, 3])

In [22]:
x.sum(axis=1)  #rows (second dimension)

array([2, 4])

**Other reductions**

In [23]:
y = np.array([1, 3, 2])
y.min()

1

In [24]:
y.max()

3

In [25]:
y.argmin()# index of minimum element

0

In [26]:
y.argmax()# index of maximum element

1

### for matrix

In [27]:
x.min()

1

In [28]:
x.max()

2

In [29]:
x.argmax()

2

In [30]:
x

array([[1, 1],
       [2, 2]])

In [31]:
x.argmin()

0

**Logical Operations**

In [32]:
np.all([True, True, False])

False

In [33]:
np.any([True, False, False])

True

In [34]:
#Note: can be used for array comparisions
a = np.zeros((50, 50))
np.any(a != 0)

False

In [35]:
np.all(a == a)

True

In [36]:
a = np.array([1, 2, 3, 2])
b = np.array([2, 2, 3, 2])
c = np.array([6, 4, 4, 5])
((a <= b) & (b <= c)).all()

True

**Statistics**

In [37]:
x = np.array([1, 2, 3, 1])
y = np.array([[1, 2, 3], [5, 6, 1]])
x.mean()

1.75

In [38]:
np.median(y)

2.5

In [39]:
np.median(x)

1.5

In [40]:
np.median(y, axis=-1) # last axis

array([2., 5.])

In [41]:
x.std()          # full population standard dev.

0.82915619758885

**Example:**

Data in populations.txt describes the populations of hares and lynxes (and carrots) in northern Canada during 20 years.


In [42]:
#load data into numpy array object
data = np.loadtxt('populations.txt')


In [43]:
data

array([[ 1900., 30000.,  4000., 48300.],
       [ 1901., 47200.,  6100., 48200.],
       [ 1902., 70200.,  9800., 41500.],
       [ 1903., 77400., 35200., 38200.],
       [ 1904., 36300., 59400., 40600.],
       [ 1905., 20600., 41700., 39800.],
       [ 1906., 18100., 19000., 38600.],
       [ 1907., 21400., 13000., 42300.],
       [ 1908., 22000.,  8300., 44500.],
       [ 1909., 25400.,  9100., 42100.],
       [ 1910., 27100.,  7400., 46000.],
       [ 1911., 40300.,  8000., 46800.],
       [ 1912., 57000., 12300., 43800.],
       [ 1913., 76600., 19500., 40900.],
       [ 1914., 52300., 45700., 39400.],
       [ 1915., 19500., 51100., 39000.],
       [ 1916., 11200., 29700., 36700.],
       [ 1917.,  7600., 15800., 41800.],
       [ 1918., 14600.,  9700., 43300.],
       [ 1919., 16200., 10100., 41300.],
       [ 1920., 24700.,  8600., 47300.]])

In [44]:
data.t

AttributeError: 'numpy.ndarray' object has no attribute 't'

In [45]:
data.T

array([[ 1900.,  1901.,  1902.,  1903.,  1904.,  1905.,  1906.,  1907.,
         1908.,  1909.,  1910.,  1911.,  1912.,  1913.,  1914.,  1915.,
         1916.,  1917.,  1918.,  1919.,  1920.],
       [30000., 47200., 70200., 77400., 36300., 20600., 18100., 21400.,
        22000., 25400., 27100., 40300., 57000., 76600., 52300., 19500.,
        11200.,  7600., 14600., 16200., 24700.],
       [ 4000.,  6100.,  9800., 35200., 59400., 41700., 19000., 13000.,
         8300.,  9100.,  7400.,  8000., 12300., 19500., 45700., 51100.,
        29700., 15800.,  9700., 10100.,  8600.],
       [48300., 48200., 41500., 38200., 40600., 39800., 38600., 42300.,
        44500., 42100., 46000., 46800., 43800., 40900., 39400., 39000.,
        36700., 41800., 43300., 41300., 47300.]])

In [46]:
year, hares, lynxes, carrots = data #because there many rows to unpack
print(year)

ValueError: too many values to unpack (expected 4)

In [47]:
year, hares, lynxes, carrots = data.T #row to variables
print(year)

[1900. 1901. 1902. 1903. 1904. 1905. 1906. 1907. 1908. 1909. 1910. 1911.
 1912. 1913. 1914. 1915. 1916. 1917. 1918. 1919. 1920.]


In [48]:
#The mean population over time
populations = data[:,1:] #[row,columns]
populations

array([[30000.,  4000., 48300.],
       [47200.,  6100., 48200.],
       [70200.,  9800., 41500.],
       [77400., 35200., 38200.],
       [36300., 59400., 40600.],
       [20600., 41700., 39800.],
       [18100., 19000., 38600.],
       [21400., 13000., 42300.],
       [22000.,  8300., 44500.],
       [25400.,  9100., 42100.],
       [27100.,  7400., 46000.],
       [40300.,  8000., 46800.],
       [57000., 12300., 43800.],
       [76600., 19500., 40900.],
       [52300., 45700., 39400.],
       [19500., 51100., 39000.],
       [11200., 29700., 36700.],
       [ 7600., 15800., 41800.],
       [14600.,  9700., 43300.],
       [16200., 10100., 41300.],
       [24700.,  8600., 47300.]])

In [49]:
#sample standard deviations, column-wise
populations.std(axis=0)  

array([20897.90645809, 16254.59153691,  3322.50622558])

In [50]:
#which species has the highest population each year?  # perform row-wise

np.argmax(populations, axis=-1)

array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2],
      dtype=int64)

In [51]:
#which species has the highest population each year?

np.argmax(populations, axis=1)

array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2],
      dtype=int64)

# Broadcasting

Basic operations on numpy arrays (addition, etc.) are elementwise

This works on arrays of the same size.
    Nevertheless, It’s also possible to do operations on arrays of different sizes if NumPy can transform these arrays     so that they all have the same size: this conversion is called broadcasting.

The image below gives an example of broadcasting:

![title](broadcasting.png)

In [52]:
a = np.tile(np.arange(0,40,10), (3,2)) # here second argument is tupple which suggests replicate 3 times , done this twice
print(a)

[[ 0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30]]


In [53]:
a = np.tile(np.arange(0, 40, 10), (3,1))
print(a)

print("*************")
a=a.T
print(a)

[[ 0 10 20 30]
 [ 0 10 20 30]
 [ 0 10 20 30]]
*************
[[ 0  0  0]
 [10 10 10]
 [20 20 20]
 [30 30 30]]


In [54]:

b = np.array([0, 1, 2])
b

array([0, 1, 2])

In [55]:

a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

####  covert 1-d array to 2-d array using np.newaxis:

In [56]:
a = np.arange(0, 40, 10)
a.shape


(4,)

In [57]:
a = a[:,np.newaxis] # adds a new axis -> 2D array
print(a)

[[ 0]
 [10]
 [20]
 [30]]


In [58]:
a.shape

(4, 1)

In [59]:
a

array([[ 0],
       [10],
       [20],
       [30]])

In [60]:
a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [61]:
a = np.array([[1,2],[3,4]]) 
b = np.array([[1,2],[3,4]]) 
np.dot(a,b)

array([[ 7, 10],
       [15, 22]])

In [62]:
a * b

array([[ 1,  4],
       [ 9, 16]])

# Array Shape Manipulation

**Flattening**

In [85]:
a = np.array([[1, 2, 3], [4, 5, 6]])

#Return a contiguous flattened array. A 1-D array, containing the elements of the input, is returned.
# A copy is made only if needed.

a.ravel() 

array([1, 2, 3, 4, 5, 6])

In [90]:
a.T #Transpose

array([[1, 4],
       [2, 5],
       [3, 6]])

In [91]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

**Reshaping**

The inverse operation to flattening:

In [99]:
print(a.shape)
print(a)

(2, 3)
[[1 2 3]
 [4 5 6]]


In [100]:
b_flat = a.ravel()
print(b_flat)

[1 2 3 4 5 6]


In [101]:
b_flat = b_flat.reshape((2, 3))
b_flat

array([[1, 2, 3],
       [4, 5, 6]])

In [103]:
b_flat[0, 0] = 100
print(b_flat)

print(a)

[[100   2   3]
 [  4   5   6]]
[[100   2   3]
 [  4   5   6]]


**Note and       Beware: reshape may or may not return a copy!:**

In [104]:
a = np.zeros((3, 2))
b = a.T.reshape(3*2)
b[0] = 50
a

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

**Adding a Dimension**

#### Indexing with the np.newaxis object allows us to add an axis to an array

<b>newaxis</b> is used to increase the dimension of the existing array by one more dimension, when used once. Thus,

1D array will become 2D array

2D array will become 3D array

3D array will become 4D array and so on

In [105]:
z = np.array([1, 2, 3])
z

array([1, 2, 3])

In [106]:
z[:, np.newaxis]

array([[1],
       [2],
       [3]])

**Dimension Shuffling**

In [107]:
a = np.arange(4*3*2).reshape(4, 3, 2)
a.shape

(4, 3, 2)

In [108]:
a

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15],
        [16, 17]],

       [[18, 19],
        [20, 21],
        [22, 23]]])

In [109]:
a[0, 2, 1]

5

In [110]:
a[4,2,0]

IndexError: index 4 is out of bounds for axis 0 with size 4

In [111]:
a[3,2,0]

22

**Resizing**

In [114]:
# re-size in place
a = np.arange(4)
a.resize(8)
a

array([0, 1, 2, 3, 0, 0, 0, 0])

In [115]:
a = np.arange(4)
np.resize(a,(8,))


array([0, 1, 2, 3, 0, 1, 2, 3])

In [184]:
help(np.resize)

Help on function resize in module numpy:

resize(a, new_shape)
    Return a new array with the specified shape.
    
    If the new array is larger than the original array, then the new
    array is filled with repeated copies of `a`.  Note that this behavior
    is different from a.resize(new_shape) which fills with zeros instead
    of repeated copies of `a`.
    
    Parameters
    ----------
    a : array_like
        Array to be resized.
    
    new_shape : int or tuple of int
        Shape of resized array.
    
    Returns
    -------
    reshaped_array : ndarray
        The new array is formed from the data in the old array, repeated
        if necessary to fill out the required number of elements.  The
        data are repeated in the order that they are stored in memory.
    
    See Also
    --------
    ndarray.resize : resize an array in-place.
    
    Notes
    -----
    i.e. it does not apply interpolation/extrapolation.
    It fills the return array with the required 

In [116]:
a=np.array([[0,1],[2,3]])
a

array([[0, 1],
       [2, 3]])

In [117]:
np.resize(a,(2,3))
    

array([[0, 1, 2],
       [3, 0, 1]])

However, it must not be referred to somewhere else:

In [118]:
b = a
a.resize((4,)) 

In [119]:
c = b
c

array([0, 1, 2, 3])

In [120]:
c.resize((9,))

ValueError: cannot resize an array that references or is referenced
by another array in this way.
Use the np.resize function or refcheck=False

**Sorting Data**

In [121]:
#Sorting along an axis:
a = np.array([[5, 4, 6], [2, 3, 2]])
b = np.sort(a, axis=1)
b

array([[4, 5, 6],
       [2, 2, 3]])

In [122]:
#in-place sort, it can affect original variable
a.sort(axis=1)
a

array([[4, 5, 6],
       [2, 2, 3]])

In [123]:
c = np.array([[5, 4, 6], [2, 3, 2]])
c.sort(axis=1)
c

array([[4, 5, 6],
       [2, 2, 3]])

In [124]:
c

array([[4, 5, 6],
       [2, 2, 3]])

In [125]:
#sorting with fancy indexing , argsort return a list of sorted index of elements
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
j

array([2, 3, 1, 0], dtype=int64)

In [126]:
# to get sorted list pass argsort object as  a index.
a[j]

array([1, 2, 3, 4])

In [127]:
d = np.array([[5, 4, 6], [2, 3, 2]])[:,np.newaxis]
d

array([[[5, 4, 6]],

       [[2, 3, 2]]])

In [130]:
e = np.argsort(d,)
e

array([[[1, 0, 2]],

       [[0, 2, 1]]], dtype=int64)

In [131]:
d[e]

IndexError: index 2 is out of bounds for axis 0 with size 2

#  create new 5\*5 to 5\*1

In [156]:
a1 = np.random.random([5,5])
print(a1)

i1 = np.arange(len(a1))[:, np.newaxis]
print(i1)



[[0.65783663 0.96477344 0.74001954 0.29936836 0.83818026]
 [0.36760198 0.40895199 0.65908112 0.28597371 0.0242166 ]
 [0.84991665 0.23525737 0.22666423 0.28923759 0.71928029]
 [0.86043747 0.45341391 0.94541556 0.1725718  0.1732904 ]
 [0.99158341 0.77798439 0.50719882 0.38432509 0.06732277]]
[[0]
 [1]
 [2]
 [3]
 [4]]


In [158]:
j1 = np.argsort(a1, axis=1)
print(j1)

a_sorted = a1[i1, j1]
print(a_sorted)

[[3 0 2 4 1]
 [4 3 0 1 2]
 [2 1 3 4 0]
 [3 4 1 0 2]
 [4 3 2 1 0]]
[[0.29936836 0.65783663 0.74001954 0.83818026 0.96477344]
 [0.0242166  0.28597371 0.36760198 0.40895199 0.65908112]
 [0.22666423 0.23525737 0.28923759 0.71928029 0.84991665]
 [0.1725718  0.1732904  0.45341391 0.86043747 0.94541556]
 [0.06732277 0.38432509 0.50719882 0.77798439 0.99158341]]


In [159]:
np.sort(a1, axis=1)

array([[0.29936836, 0.65783663, 0.74001954, 0.83818026, 0.96477344],
       [0.0242166 , 0.28597371, 0.36760198, 0.40895199, 0.65908112],
       [0.22666423, 0.23525737, 0.28923759, 0.71928029, 0.84991665],
       [0.1725718 , 0.1732904 , 0.45341391, 0.86043747, 0.94541556],
       [0.06732277, 0.38432509, 0.50719882, 0.77798439, 0.99158341]])

In [160]:
 arr = np.array([[5, 2, 4],
[3, 3, 3],
[6, 1, 2]])

    


### argsort for 2-d

In [132]:
arr = np.array([[3,1,2],[8,9,2]])
arr

array([[3, 1, 2],
       [8, 9, 2]])

In [133]:
#argsort 
arg_sort = np.argsort(arr)
arg_sort

array([[1, 2, 0],
       [2, 0, 1]], dtype=int64)

In [134]:
np.take_along_axis(arr,arg_sort, axis=-1)

array([[1, 2, 3],
       [2, 8, 9]])

In [189]:
help(np.take_along_axis)

Help on function take_along_axis in module numpy:

take_along_axis(arr, indices, axis)
    Take values from the input array by matching 1d index and data slices.
    
    This iterates over matching 1d slices oriented along the specified axis in
    the index and data arrays, and uses the former to look up values in the
    latter. These slices can be different lengths.
    
    Functions returning an index along an axis, like `argsort` and
    `argpartition`, produce suitable indices for this function.
    
    .. versionadded:: 1.15.0
    
    Parameters
    ----------
    arr: ndarray (Ni..., M, Nk...)
        Source array
    indices: ndarray (Ni..., J, Nk...)
        Indices to take along each 1d slice of `arr`. This must match the
        dimension of arr, but dimensions Ni and Nj only need to broadcast
        against `arr`.
    axis: int
        The axis to take 1d slices along. If axis is None, the input array is
        treated as if it had first been flattened to 1d, for con

In [190]:
help(np.argsort)

Help on function argsort in module numpy:

argsort(a, axis=-1, kind='quicksort', order=None)
    Returns the indices that would sort an array.
    
    Perform an indirect sort along the given axis using the algorithm specified
    by the `kind` keyword. It returns an array of indices of the same shape as
    `a` that index data along the given axis in sorted order.
    
    Parameters
    ----------
    a : array_like
        Array to sort.
    axis : int or None, optional
        Axis along which to sort.  The default is -1 (the last axis). If None,
        the flattened array is used.
    kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
        Sorting algorithm.
    order : str or list of str, optional
        When `a` is an array with fields defined, this argument specifies
        which fields to compare first, second, etc.  A single field can
        be specified as a string, and not all fields need be specified,
        but unspecified fields will still be used