# Numpy Tutorial

NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays.
Install by `pip install numpy`

Advantage:
- Less Memory
- Convenient and fast
- Reliable, tested scientiﬁc functions



In [2]:
import numpy as np

## Basic attributes

* NumPy’s main object is the homogeneous multidimensional array.
* NumPy dimensions are called axes.
* NumPy’s array class is called ndarray


In [11]:
a = np.arange(0,15).reshape(3, 5)

In [12]:
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [13]:
a.shape

(3, 5)

In [14]:
a.ndim

2

In [15]:
a.size

15

In [16]:
a.dtype

dtype('int64')

In [191]:
type(a)

numpy.ndarray

In [192]:
a.data

<memory at 0x00000225FFA76558>

In [17]:
a.itemsize

8

In [18]:
np.info(a)

class:  ndarray
shape:  (3, 5)
strides:  (40, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x7ffff99b7990
byteorder:  little
byteswap:  False
type: int64


## Array Creation

##### Array from a regular Python list or tuple 

In [11]:
arr = np.array([2,3,4])
arr

array([2, 3, 4])

In [13]:
arr2 = np.array([(1.5,2,3), (4,5,6)])
arr2

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [15]:
# The type of the array can also be explicitly specified at creation time
arr3 = np.array( [ [1,2], [3,4] ], dtype=complex )
arr3

array([[1.+0.j, 2.+0.j],
       [3.+0.j, 4.+0.j]])

##### Generic array creation with initial placeholder content

In [19]:
np.zeros( (3,4,5) )

array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]])

In [17]:
np.ones( (2,3,4), dtype=np.int16 )   #specifying dtype

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int16)

In [21]:
np.empty( (2,3) )                   #empty array

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

#####  create sequences of numbers

 - `arange` a function analogous to range that returns arrays
 - `linspace` receives as an argument the number of elements that we want, instead of the step

In [26]:
# numbers between 10 and 30 with step-size 3
np.arange( 10, 30, 3 ) 

array([10, 13, 16, 19, 22, 25, 28])

In [20]:
# 3 numbers between 10 and 30
np.linspace( 10, 30, 7 ) 

array([10.        , 13.33333333, 16.66666667, 20.        , 23.33333333,
       26.66666667, 30.        ])

In [53]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [54]:
np.diag(np.array([1, 2, 3, 4]))

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [56]:
np.random.rand(4,3)

array([[0.73297332, 0.81200927, 0.44362297],
       [0.77123657, 0.84660165, 0.47827026],
       [0.44106355, 0.25517798, 0.02479093],
       [0.29066935, 0.02578189, 0.53827117]])

In [30]:
# can also use linspace to evaluate lots of points.

from numpy import pi
x = np.linspace( 0, 2*pi, 100 )
f = np.sin(x)
print(x)
print(f)

[0.         0.06346652 0.12693304 0.19039955 0.25386607 0.31733259
 0.38079911 0.44426563 0.50773215 0.57119866 0.63466518 0.6981317
 0.76159822 0.82506474 0.88853126 0.95199777 1.01546429 1.07893081
 1.14239733 1.20586385 1.26933037 1.33279688 1.3962634  1.45972992
 1.52319644 1.58666296 1.65012947 1.71359599 1.77706251 1.84052903
 1.90399555 1.96746207 2.03092858 2.0943951  2.15786162 2.22132814
 2.28479466 2.34826118 2.41172769 2.47519421 2.53866073 2.60212725
 2.66559377 2.72906028 2.7925268  2.85599332 2.91945984 2.98292636
 3.04639288 3.10985939 3.17332591 3.23679243 3.30025895 3.36372547
 3.42719199 3.4906585  3.55412502 3.61759154 3.68105806 3.74452458
 3.8079911  3.87145761 3.93492413 3.99839065 4.06185717 4.12532369
 4.1887902  4.25225672 4.31572324 4.37918976 4.44265628 4.5061228
 4.56958931 4.63305583 4.69652235 4.75998887 4.82345539 4.88692191
 4.95038842 5.01385494 5.07732146 5.14078798 5.2042545  5.26772102
 5.33118753 5.39465405 5.45812057 5.52158709 5.58505361 5.648520

## Copying Array

In [199]:
# mutable objects are passed as references
a = np.arange(12)
print(a)
b = a
print(b is a)
b.resize(3,4)
print(a)

[ 0  1  2  3  4  5  6  7  8  9 10 11]
True
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


##### View or Shallow Copy

In [205]:
a = np.arange(12)
c = a.view()
print(c is a)
c.resize(3,4)
print(a)
print(c)

#but the data changes
c[0,2] = -2
print('*'*20)
print(a)
print(c)

False
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
********************
[ 0  1 -2  3  4  5  6  7  8  9 10 11]
[[ 0  1 -2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


##### Deep Copy

In [207]:
a = np.arange(12)
d = a.copy()
d.resize(3,4)
d[0,2] = -2
print(a)
print(d)

[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1 -2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


## Indexing and slicing
**One-dimensional** arrays can be indexed, sliced and iterated over, much like lists and other Python sequences.

In [80]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [81]:
# one elemnt
a[0], a[2], a[-1]

(0, 2, 9)

In [82]:
# from index 2 to end
print(a[2:])

[2 3 4 5 6 7 8 9]


In [83]:
# from index 2 to 4
print(a[2:5])

[2 3 4]


In [86]:
# Slicing as [start:end:step]
print(a[2:9:3])
print(a[::2])

[2 5 8]
[0 2 4 6 8]


In [85]:
# reverse order
print(a[::-1])

[9 8 7 6 5 4 3 2 1 0]


**Multidimensional arrays** can have one index per axis. These indices are given separated by commas

In [94]:
a = np.random.rand(5,5)
a

array([[0.95898421, 0.37583814, 0.73701383, 0.87844457, 0.75816469],
       [0.90070162, 0.94092422, 0.62913649, 0.74660331, 0.6023703 ],
       [0.91566586, 0.01092521, 0.29157433, 0.38079877, 0.81694494],
       [0.62662055, 0.94285831, 0.43887707, 0.39027263, 0.32209331],
       [0.96845128, 0.84854402, 0.1753587 , 0.52327414, 0.39993454]])

In [71]:
# one element
print(a[1, 1],a[1, -1],a[-1,-1])
# taking all elements on one axes
print(a[1])

1 0 2
[0 1 0]


In [96]:
print(a[0,3:5])

[0.87844457 0.75816469]


In [99]:
print(a[2:,2:])

[[0.29157433 0.38079877 0.81694494]
 [0.43887707 0.39027263 0.32209331]
 [0.1753587  0.52327414 0.39993454]]


## Fancy Indexing

**Using boolean masks**

Masks are boolean or integer arrays. It creates copies

In [21]:
a = np.random.randint(0, 21, 15)
a

array([ 1,  9,  9,  9, 10, 14, 12, 13,  7, 20,  0,  3, 14, 19,  6])

In [22]:
(a % 3 == 0)

array([False,  True,  True,  True, False, False,  True, False, False,
       False,  True,  True, False, False,  True])

In [25]:
mask = (a % 3 == 0)
mask[0]= True

In [26]:
mask

array([ True,  True,  True,  True, False, False,  True, False, False,
       False,  True,  True, False, False,  True])

In [27]:
extract_from_a = a[mask]   #equivalent to a[a%3==0]
extract_from_a

array([ 1,  9,  9,  9, 12,  0,  3,  6])

**indexing with list**

Indexing can be done with an array of integers, where the same index can be repeated

In [116]:
arr = np.arange(0, 100, 10)
arr

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [117]:
arr[[2, 3, 4,2]]

array([20, 30, 40, 20])

In [118]:
# can also assign values
arr[[9, 7]] = -100
arr

array([   0,   10,   20,   30,   40,   50,   60, -100,   80, -100])

## Changing the shape of an array

In [138]:
a = np.floor(10*np.random.random((3,4)))

In [140]:
print(a.shape)
print(a)

(3, 4)
[[8. 8. 3. 4.]
 [1. 3. 6. 7.]
 [8. 1. 5. 6.]]


In [141]:
a.ravel() 

array([8., 8., 3., 4., 1., 3., 6., 7., 8., 1., 5., 6.])

In [145]:
# array of m * n should be rechaped in a way that product of 'm' and 'n' remains unchanged
a.reshape(6,2)

array([[8., 8.],
       [3., 4.],
       [1., 3.],
       [6., 7.],
       [8., 1.],
       [5., 6.]])

In [146]:
a.T

array([[8., 1., 8.],
       [8., 3., 1.],
       [3., 6., 5.],
       [4., 7., 6.]])

In [147]:
a.T.shape

(4, 3)

In [148]:
a.resize((2,6))
a

array([[8., 8., 3., 4., 1., 3.],
       [6., 7., 8., 1., 5., 6.]])

**NOTE**: "*ravel*","*reshape*","*.T*" Does not change original array whereas "*resize*" modifies the array itself

## Stacking different arrays

Several arrays can be stacked together along different axes

In [208]:
a = np.floor(10*np.random.random((2,2)))
a

array([[2., 7.],
       [4., 0.]])

In [209]:
b = np.floor(10*np.random.random((2,2)))
b

array([[9., 0.],
       [6., 5.]])

In [210]:
#vertical stack
np.vstack((a,b))

array([[2., 7.],
       [4., 0.],
       [9., 0.],
       [6., 5.]])

In [211]:
#horizontal stack
np.hstack((a,b))

array([[2., 7., 9., 0.],
       [4., 0., 6., 5.]])

In [212]:
#for 1D array use "column_stack"
a = np.array([4,2])
b = np.array([3,8])
np.column_stack((a,b))

array([[4, 3],
       [2, 8]])

In [213]:
np.hstack((a,b))

array([4, 2, 3, 8])

In [218]:
np.vstack((a,b))

array([[4, 2],
       [3, 8]])

## Splitting different arrays

Splitting array into smaller ones

In [251]:
a = np.floor(10*np.random.random((2,12)))
a

array([[6., 9., 9., 1., 9., 5., 3., 4., 9., 6., 7., 8.],
       [7., 5., 1., 9., 0., 8., 4., 7., 0., 4., 2., 6.]])

In [254]:
# Horizontal split
from pprint import pprint
pprint(np.hsplit(a,3))   #split into 3
pprint(np.hsplit(a,5))

[array([[6., 9., 9., 1.],
       [7., 5., 1., 9.]]),
 array([[9., 5., 3., 4.],
       [0., 8., 4., 7.]]),
 array([[9., 6., 7., 8.],
       [0., 4., 2., 6.]])]


ValueError: array split does not result in an equal division

In [253]:
pprint(np.hsplit(a,(3,4))) #Split at the third and the fourth column

[array([[6., 9., 9.],
       [7., 5., 1.]]),
 array([[1.],
       [9.]]),
 array([[9., 5., 3., 4., 9., 6., 7., 8.],
       [0., 8., 4., 7., 0., 4., 2., 6.]])]


In [228]:
pprint(np.vsplit(a.reshape(6,4),3))   #split into 3

[array([[1., 4., 5., 2.],
       [5., 9., 8., 7.]]),
 array([[2., 4., 0., 7.],
       [7., 9., 2., 0.]]),
 array([[4., 1., 0., 0.],
       [9., 7., 6., 5.]])]


## Basic Arithmetic Operations
### Element Vise Operations

* This works on arrays of same length

In [120]:
a = np.array( [20,30,40,50] )

In [121]:
b = np.arange( 4 )

In [122]:
print(a)
print(b)

[20 30 40 50]
[0 1 2 3]


In [123]:
# operation on two arrays
c = a-b

In [124]:
c

array([20, 29, 38, 47])

In [125]:
# multiplying constant
b*2

array([0, 2, 4, 6])

In [126]:
# ** = power operation.
print(2**b)
print(b**2)

[1 2 4 8]
[0 1 4 9]


In [127]:
10*np.sin(a)

array([ 9.12945251, -9.88031624,  7.4511316 , -2.62374854])

In [128]:
# element wise multiplication
a*b

array([  0,  30,  80, 150])

* It’s also possible to do operations on arrays of different sizes if NumPy can transform these arrays so that they all have the same size: this conversion is called **broadcasting.**

In [275]:
a = np.array([[ 0,  0,  0],
       [10, 10, 10],
       [20, 20, 20],
       [30, 30, 30]])
a

array([[ 0,  0,  0],
       [10, 10, 10],
       [20, 20, 20],
       [30, 30, 30]])

In [276]:
b = np.array([0, 1, 2])

In [277]:
a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

### Array Operations

In [129]:
# Matrix multiplication
print(a @ b)
print(a.dot(b))

260
260


In [130]:
# Lets Try 2D
A = np.array( [[1,1],[0,1]])
B = np.array( [[2,0],[3,4]])

In [135]:
A.dot(B)

array([[5, 4],
       [3, 4]])

These operations are of course much faster than pure python

In [136]:
a = np.arange(10000)
%timeit a + 1 

7.93 µs ± 502 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [137]:
list1 = range(10000)
%timeit [i+1 for i in list1]

676 µs ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Comparisons and Other Operations

In [271]:
# Element-wise comparisons
a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])
print(a == b)
print(a > b)

[False  True False  True]
[False False  True False]


In [272]:
print(np.any(a == b))
print(np.all(a == b))

True
False


In [230]:
# Array-wise comparisons
a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])
c = np.array([1, 2, 3, 4])

print(np.array_equal(a,b))
print(np.array_equal(a,c))

False
True


In [231]:
# Logical operations.
a = np.array([1, 1, 0, 0], dtype=bool)
b = np.array([1, 0, 1, 0], dtype=bool)
print(np.logical_or(a, b))
print(np.logical_and(a, b))

[ True  True  True False]
[ True False False False]


In [244]:
# Transcendental operations.
a = np.arange(1,5)
print(np.sin(a))
print(np.log(a))
print(np.exp(a))

[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]
[0.         0.69314718 1.09861229 1.38629436]
[ 2.71828183  7.3890561  20.08553692 54.59815003]


In [248]:
a= np.arange(3)
print(a)
np.sqrt(a)

[0 1 2]


array([0.        , 1.        , 1.41421356])

In [233]:
# Computing sums
x = np.array([1, 2, 3, 4])
x.sum()

10

In [234]:
# Sum by rows and by columns
x = np.array([[1, 1], [2, 2]])
x

array([[1, 1],
       [2, 2]])

In [235]:
print(x.sum(axis=0)) #Columns
print(x.sum(axis=1)) #rows

[3 3]
[2 4]


In [236]:
print("sum of first column: %d "%x[1:2, 0:1].sum())  #sum of first column
print("sum of second row: %d "%x[1, :].sum())  #sum of second row

sum of first column: 3 
sum of second row: 4 


In [237]:
# min , max
x = np.array([1, 3, 2])
print("Min: %d "%x.min())
print("Min Index: %d "%x.argmin())
print("Max: %d "%x.max())
print("Max Index: %d "%x.argmax())

Min: 1 
Min Index: 0 
Max: 3 
Max Index: 1 


In [238]:
# Statistics
x = np.array([1, 2, 3, 1])
y = np.array([[1, 2, 3], [5, 6, 1]])
print(x.mean())
print(np.median(x))
print(x.std())
print(np.median(y, axis=-1))

1.75
1.5
0.82915619758885
[2. 5.]


In [240]:
x.cumsum(axis=0)   

array([1, 3, 6, 7], dtype=int32)

In [241]:
b = np.arange(12).reshape(3,4)
b.sum(axis=0) 

array([12, 15, 18, 21])

In [242]:
b.cumsum(axis=1)

array([[ 0,  1,  3,  6],
       [ 4,  9, 15, 22],
       [ 8, 17, 27, 38]], dtype=int32)