In [1]:
import numpy as np

In [2]:
a=np.array([0,1,2,3])
print(a)
print(np.arange(10))

[0 1 2 3]
[0 1 2 3 4 5 6 7 8 9]


# Numpy array provides memory efficient container that help for fast numerical operation

In [3]:
L=range(1000)
%timeit [i**2 for i in L]

672 µs ± 28.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [4]:
b=np.arange(1000)
%timeit b**2

3.43 µs ± 859 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


# Creating Numpy Array

In [5]:
# 1D array
a=np.array([0,1,2,3])
a

array([0, 1, 2, 3])

In [6]:
#Dimension and Shape and length
print(a.ndim)
print(a.shape)
print(len(a))

1
(4,)
4


In [7]:
# 2D array
b=np.array([[1,2,3],[4,5,6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [8]:
# Dimension and shape and length
print(b.ndim)
print(b.shape)
print(len(b))

2
(2, 3)
2


In [9]:
# 3D array
c=np.array([[[1,2,0],[3,4,1]],[[5,6,2],[7,8,3]]])
c

array([[[1, 2, 0],
        [3, 4, 1]],

       [[5, 6, 2],
        [7, 8, 3]]])

In [10]:
# Dimension and shape and length
print(c.ndim)
print(c.shape)
print(len(c))

3
(2, 2, 3)
2


# Creating Numpy array using functions

In [11]:
#using arange 
a=np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
a=np.arange(1,10,2) #start ,end(exclusive) ,step
a

array([1, 3, 5, 7, 9])

In [13]:
#using linspace
a=np.linspace(0,1,6) #start ,end(inclusive), no of points
a

array([0. , 0.2, 0.4, 0.6, 0.8, 1. ])

In [14]:
# common array
a=np.ones((3,3))
a

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [15]:
a=np.zeros((3,3))
a

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [16]:
# Identity matrix using eye function
a=np.eye(3)
a

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [17]:
a=np.eye(3,2)
a

array([[1., 0.],
       [0., 1.],
       [0., 0.]])

In [18]:
# using diag function
a=np.diag([1,2,3,4])
a

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [19]:
b=np.diag(a)
b

array([1, 2, 3, 4])

In [20]:
# using random module
a=np.random.rand(4) # elements of the array sampled from uniform distribution
a

array([0.61059196, 0.57026211, 0.68163804, 0.96711877])

In [21]:
a=np.random.randn(4)  # elements of the array sampled from standard normal distribution
a

array([ 0.23769293,  0.25506167,  0.63219961, -0.16492655])

# Basic Data Types:

In [22]:
a=np.arange(10)
a.dtype

dtype('int32')

In [23]:
#changing data type when created numpy array
a=np.arange(10,dtype="float64") # 5. means 5.0 and so on
a

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [24]:
# creating matrix using ones or zeros,eye,linspace method always gives float data types
a=np.ones((3,3))
a.dtype

dtype('float64')

In [25]:
a=np.zeros((3,3))
a.dtype

dtype('float64')

In [26]:
# complex data types
a=np.array([1+2j,2+3j])
a.dtype

dtype('complex128')

In [27]:
# boolean types array
a=np.array([True,False,False,True])
a.dtype

dtype('bool')

In [28]:
# string data types
a=np.array(["Ram","Shyam","Jadu","Madhu"])
a.dtype

dtype('<U5')

# Indexing and Slicing:

In [29]:
# accesing element in 1D numpy array
a=np.arange(10)
print(a[5])

5


In [30]:
# accesing element in multidimensional numpy array
a=np.diag([1,2,3])
print(a[2,2])

3


In [31]:
a[2,1]=5    # Assigning element
print(a)

[[1 0 0]
 [0 2 0]
 [0 5 3]]


In [32]:
a=np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [33]:
a[1:8:2]  # [stat:end(exclude):step]

array([1, 3, 5, 7])

In [34]:
# we can combine assignment and slicing
a[5:]=10
a

array([ 0,  1,  2,  3,  4, 10, 10, 10, 10, 10])

In [35]:
# reverse 
a[::-1]

array([10, 10, 10, 10, 10,  4,  3,  2,  1,  0])

In [36]:
b=np.arange(5)
a[5:]=b[::-1] # it happans step by step
a

array([0, 1, 2, 3, 4, 4, 3, 2, 1, 0])

In [37]:
# For memory efficiency numpy shares memory between an array and its sub array without making a copy
a=np.arange(10)
b=a[5:]
np.shares_memory(a,b)

True

In [38]:
# changng in b also effect in a and vice-versa
b[0]=15
a

array([ 0,  1,  2,  3,  4, 15,  6,  7,  8,  9])

In [39]:
# To avoid this problem we use copy() to force to make a copy
a=np.arange(10)
b=a[5:].copy()
np.shares_memory(a,b)

False

In [40]:
b[0]=15
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Fancy Indexing

In [41]:
# Mask
a=np.random.randint(0,20,15)   #(start,end(excluded),no of pts)
a

array([ 6, 19, 19, 12,  0, 13, 14, 13, 19,  6,  2, 15, 10, 18, 19])

In [42]:
mask=(a%2==0)
print(mask)

[ True False False  True  True False  True False False  True  True False
  True  True False]


In [43]:
extracted_from_a=a[mask]
extracted_from_a

array([ 6, 12,  0, 14,  6,  2, 10, 18])

In [44]:
a[mask]=-1
a

array([-1, 19, 19, -1, -1, 13, -1, 13, 19, -1, -1, 15, -1, -1, 19])

In [45]:
# indexing with list
a=np.arange(0,100,10)
a

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [46]:
a[[1,5,2,3,2]] # at a time we accesing multiple element

array([10, 50, 20, 30, 20])

In [47]:
# assignment
a[[7,9]]=-200
a

array([   0,   10,   20,   30,   40,   50,   60, -200,   80, -200])

# Numerical Operations On Numpy

In [48]:
# All arithmetic operates elementwise
a=np.array([1,2,3,4])
a+1

array([2, 3, 4, 5])

In [49]:
a**2

array([ 1,  4,  9, 16], dtype=int32)

In [50]:
b=np.ones(4)+1
b

array([2., 2., 2., 2.])

In [51]:
a==b

array([False,  True, False, False])

In [52]:
a>b

array([False, False,  True,  True])

In [53]:
# Array compare
c=np.array([1,2,3,4])
np.array_equal(a,b)

False

In [54]:
np.array_equal(a,c)

True

In [55]:
a+b

array([3., 4., 5., 6.])

In [56]:
a-b

array([-1.,  0.,  1.,  2.])

In [57]:
a*b

array([2., 4., 6., 8.])

In [58]:
a/b

array([0.5, 1. , 1.5, 2. ])

In [59]:
# Matrix Multiplication
A=np.diag([1,2,3,4])
print(A*A)
print("**************")
print(A.dot(A))

[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]
**************
[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]


In [60]:
# Logical Operation
a=np.array([1,1,0,0],dtype=bool)
b=np.array([0,1,0,1],dtype=bool)
print(np.logical_or(a,b))
print(np.logical_and(a,b))

[ True  True False  True]
[False  True False False]


In [61]:
# Apply Mathematical function
a=np.arange(5)
print(np.sin(a))
print(np.log(a))
print(np.exp(a))

[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025 ]
[      -inf 0.         0.69314718 1.09861229 1.38629436]
[ 1.          2.71828183  7.3890561  20.08553692 54.59815003]


  after removing the cwd from sys.path.


In [62]:
# Shape Mismatch
a=np.arange(5)
a+np.array([2,3])

ValueError: operands could not be broadcast together with shapes (5,) (2,) 

In [63]:
# Basic Reduction
a=np.array([1,2,3,4])
a.sum()

10

In [64]:
a=np.array([[1,1],[2,2]])
a

array([[1, 1],
       [2, 2]])

In [65]:
# Colum wise sum
a.sum(axis=0)

array([3, 3])

In [66]:
# Row wise sum
a.sum(axis=1)

array([2, 4])

In [67]:
# minimum & maximum
a=np.array([1,3,2])
print(a.min())
print(a.max())

1
3


In [68]:
# index of min and max element
print(a.argmin())
print(a.argmax())

0
1


In [69]:
# Other logical operation
a=np.array([True,False,False])
print(np.all(a))
print(np.any(a))

False
True


In [70]:
# Above technique can be used for array comparison
a=np.zeros((4,4))
print(np.any(a!=0))
print(np.all(a==a))

False
True


In [71]:
a=np.array([1,2,3,2])
b=np.array([2,2,3,2])
c=np.array([4,5,6,7])
np.all((a<=b)&(b<=c))

True

In [72]:
# Statistics
x=np.array([1,2,3,1])
y=np.array([[1,2,3],[6,5,7]])
print(np.mean(x))
print(np.median(x))
print(np.std(x))
print(np.median(y,axis=-1))   # last axis that means row wise

1.75
1.5
0.82915619758885
[2. 6.]


# Example:

In [73]:
# load the population data
data=np.loadtxt("Downloads/populations.txt")
data

array([[ 1900., 30000.,  4000., 48300.],
       [ 1901., 47200.,  6100., 48200.],
       [ 1902., 70200.,  9800., 41500.],
       [ 1903., 77400., 35200., 38200.],
       [ 1904., 36300., 59400., 40600.],
       [ 1905., 20600., 41700., 39800.],
       [ 1906., 18100., 19000., 38600.],
       [ 1907., 21400., 13000., 42300.],
       [ 1908., 22000.,  8300., 44500.],
       [ 1909., 25400.,  9100., 42100.],
       [ 1910., 27100.,  7400., 46000.],
       [ 1911., 40300.,  8000., 46800.],
       [ 1912., 57000., 12300., 43800.],
       [ 1913., 76600., 19500., 40900.],
       [ 1914., 52300., 45700., 39400.],
       [ 1915., 19500., 51100., 39000.],
       [ 1916., 11200., 29700., 36700.],
       [ 1917.,  7600., 15800., 41800.],
       [ 1918., 14600.,  9700., 43300.],
       [ 1919., 16200., 10100., 41300.],
       [ 1920., 24700.,  8600., 47300.]])

In [74]:
data.T

array([[ 1900.,  1901.,  1902.,  1903.,  1904.,  1905.,  1906.,  1907.,
         1908.,  1909.,  1910.,  1911.,  1912.,  1913.,  1914.,  1915.,
         1916.,  1917.,  1918.,  1919.,  1920.],
       [30000., 47200., 70200., 77400., 36300., 20600., 18100., 21400.,
        22000., 25400., 27100., 40300., 57000., 76600., 52300., 19500.,
        11200.,  7600., 14600., 16200., 24700.],
       [ 4000.,  6100.,  9800., 35200., 59400., 41700., 19000., 13000.,
         8300.,  9100.,  7400.,  8000., 12300., 19500., 45700., 51100.,
        29700., 15800.,  9700., 10100.,  8600.],
       [48300., 48200., 41500., 38200., 40600., 39800., 38600., 42300.,
        44500., 42100., 46000., 46800., 43800., 40900., 39400., 39000.,
        36700., 41800., 43300., 41300., 47300.]])

In [75]:
year,hares,lynxes,carrots=data.T

In [76]:
print(year)

[1900. 1901. 1902. 1903. 1904. 1905. 1906. 1907. 1908. 1909. 1910. 1911.
 1912. 1913. 1914. 1915. 1916. 1917. 1918. 1919. 1920.]


In [77]:
population=data[:,1:]
population

array([[30000.,  4000., 48300.],
       [47200.,  6100., 48200.],
       [70200.,  9800., 41500.],
       [77400., 35200., 38200.],
       [36300., 59400., 40600.],
       [20600., 41700., 39800.],
       [18100., 19000., 38600.],
       [21400., 13000., 42300.],
       [22000.,  8300., 44500.],
       [25400.,  9100., 42100.],
       [27100.,  7400., 46000.],
       [40300.,  8000., 46800.],
       [57000., 12300., 43800.],
       [76600., 19500., 40900.],
       [52300., 45700., 39400.],
       [19500., 51100., 39000.],
       [11200., 29700., 36700.],
       [ 7600., 15800., 41800.],
       [14600.,  9700., 43300.],
       [16200., 10100., 41300.],
       [24700.,  8600., 47300.]])

In [78]:
np.mean(population,axis=0)

array([34080.95238095, 20166.66666667, 42400.        ])

In [79]:
np.median(population,axis=0)

array([25400., 12300., 41800.])

In [80]:
np.std(population,axis=0)

array([20897.90645809, 16254.59153691,  3322.50622558])

In [81]:
# finding maximum species each year
np.argmax(population,axis=1)

array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2],
      dtype=int64)

# Broadcasting:
 **NOTE: Atleast one of the array must be 2D, otherwise if both are 1D array then it gives size mismatch.**

In [82]:
a=np.tile(np.arange(0,40,10),(3,1))
print(a)
print("*****************")
print(a.T)

[[ 0 10 20 30]
 [ 0 10 20 30]
 [ 0 10 20 30]]
*****************
[[ 0  0  0]
 [10 10 10]
 [20 20 20]
 [30 30 30]]


In [83]:
b=np.array([0,1,2])
a.T +b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [84]:
a=np.array([0,10,20,30])
a=a[:,np.newaxis]    # adding one column that means tranform it to 2D array
print(a)

[[ 0]
 [10]
 [20]
 [30]]


In [85]:
a+b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

# Array Shape Manipulation

In [93]:
# flataning using ravel
a=np.array([[1,2,3],[4,5,6]])
a.ravel()

array([1, 2, 3, 4, 5, 6])

In [87]:
b=a.T
b.ravel()

array([1, 4, 2, 5, 3, 6])

In [94]:
# reshape
b=a.ravel()
b=b.reshape((2,3))

In [95]:
b

array([[1, 2, 3],
       [4, 5, 6]])

In [99]:
# changes in b also changes in a
b[0,0]=100
a

array([[100,   2,   3],
       [  4,   5,   6]])

In [100]:
# Sometimes reshape may also produce a copy!
a=np.zeros((3,2))
b=a.T.reshape(3,2)
print(a)
print("************")
print(b)

[[0. 0.]
 [0. 0.]
 [0. 0.]]
************
[[0. 0.]
 [0. 0.]
 [0. 0.]]


In [101]:
# changes in b dosen't affect in a    (beware!!!!!!!)
b[0,0]=100
a

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

# Adding a Dimension:

In [102]:
x=np.array([1,2,3])
print(x)
print(x.ndim)

[1 2 3]
1


In [103]:
x=x[:,np.newaxis]     # 1D ------->   2D
print(x)
print(x.ndim)

[[1]
 [2]
 [3]]
2


In [104]:
y=x[:,np.newaxis]    # 2D ------------>   3D
y.ndim

3

# Dimension Shuffling:

In [105]:
a=np.arange(4*3*2).reshape(4,3,2)
a

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15],
        [16, 17]],

       [[18, 19],
        [20, 21],
        [22, 23]]])

In [106]:
a[0,2,1]

5

In [110]:
# Resize
a=np.array([1,2,3,4])
a.resize((8,))
a

array([1, 2, 3, 4, 0, 0, 0, 0])

In [108]:
b=a
a.resize((4,))

ValueError: cannot resize an array that references or is referenced
by another array in this way.
Use the np.resize function or refcheck=False

# Sorting:

In [115]:
a=np.array([[5,4,6],[2,3,2]])
b=np.sort(a,axis=1)    # Row wise     # not doing inplace
b

array([[4, 5, 6],
       [2, 2, 3]])

In [114]:
a

array([[5, 4, 6],
       [2, 3, 2]])

In [116]:
b=np.sort(a,axis=0)    # column wise
b

array([[2, 3, 2],
       [5, 4, 6]])

In [117]:
# argsort
a=np.array([3,2,7,4,5])
j=np.argsort(a)
j

array([1, 0, 3, 4, 2], dtype=int64)

In [118]:
a[j]   # sorting done !!!!!!

array([2, 3, 4, 5, 7])