In [3]:
import numpy as np
import sys

# NumPy is pure gold. It is fast, easy to learn, feature-rich, and 
# therefore at the core of almost all popular scientific packages in the 
# Python universe (including SciPy and Pandas, two most widely used packages
# for data science and statistical modeling). 
# LISTs are good, you can change values but you cannot apply mathematical 
# functions

In [4]:
distance = [10, 15, 17, 26]
time     = [1,   2,  3,  4]
a=distance + time
a

[10, 15, 17, 26, 1, 2, 3, 4]

In [5]:
speed = distance / time
speed

TypeError: unsupported operand type(s) for /: 'list' and 'list'

In [6]:
np_distance = np.array(distance)
np_time     = np.array(time)

In [7]:
np_time

array([1, 2, 3, 4])

In [8]:
np_time
type(np_time)

numpy.ndarray

In [9]:
np_time.dtype

dtype('int32')

In [10]:
speed = np_distance/np_time
speed

array([10.        ,  7.5       ,  5.66666667,  6.5       ])

In [None]:
# why use numpy
# compare numpy array with standard python LIST type array

In [11]:
for i in range(5):
    print(i)

0
1
2
3
4


In [18]:
# PYTHON LIST
data = range(1000)

print(sys.getsizeof(1)*len(data))

# NUMPY ARRAY
data_np = np.arange(1000)
print(data_np.size * data_np.itemsize)

28000
4000


In [None]:
# The above output shows that the memory allocated by list is 28000 whereas the 
# memory allocated by the numpy array is just 4000. 
# From this, you can conclude that there is a major difference between the two and 
# this makes python numpy array as the preferred choice over list.

In [17]:
### Example - getsizeof
print('Integer memory size : ', sys.getsizeof(5) )
## increase size with incearse in digits
print('Integer memory size : ', sys.getsizeof(566666666) )
print('Integer memory size : ', sys.getsizeof(5666666954) )
print('Float   memory size : ', sys.getsizeof(5.05) )
print('Float   memory size : ', sys.getsizeof(5.058585885) )

print('\nString  memory size : ', sys.getsizeof('5') )
print('String  memory size : ', sys.getsizeof('A') )
print('String  memory size : ', sys.getsizeof('Ab') )
print('String  memory size : ', sys.getsizeof('AAA') )
print('String  memory size : ', sys.getsizeof('AAAA') )
print('String  memory size : ', sys.getsizeof('AAAAA') )

Integer memory size :  28
Integer memory size :  28
Integer memory size :  32
Float   memory size :  24
Float   memory size :  24

String  memory size :  50
String  memory size :  50
String  memory size :  51
String  memory size :  52
String  memory size :  53
String  memory size :  54


In [26]:
a = [1,2,3,4,5]
b= [10,11,12,13,14,15]
x = zip(a,b)
list(x)

[(1, 10), (2, 11), (3, 12), (4, 13), (5, 14)]

In [17]:
import time
import sys
 
SIZE = 1000000
 
L1= range(SIZE)
L2= range(SIZE)
A1= np.arange(SIZE)
A2=np.arange(SIZE)
 
start= time.time()
result=[(x,y) for x,y in zip(L1,L2)]
print((time.time()-start)*1000)
 
start=time.time()
result= A1+A2
print((time.time()-start)*1000)

277.7698040008545
79.7874927520752


In [27]:
SIZE = 10
L1= range(SIZE)
L2= range(SIZE)
[(x,y) for x,y in zip(L1,L2)]

[(0, 0),
 (1, 1),
 (2, 2),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 7),
 (8, 8),
 (9, 9)]

In [19]:
np_arr = np.arange(1000000)
normal_arr = list(range(1000000))


In [20]:
%%time 
# loop using numpy array
for i in range(5):
    np_arr2 = np_arr * 2

Wall time: 19.9 ms


In [35]:
%%time 
# loop using normal python array, list
for i in range(5):
    normal_arr2 = [x * 2 for x in normal_arr]
    
  
a= 10
b= 60
for i in range(100):
    c= a+b

Wall time: 712 ms


In [18]:
print(np_arr2)

[      0       2       4 ... 1999994 1999996 1999998]


In [31]:
# NumPy-based algorithms are generally 10 to 100 times faster (or more) 
# than their pure Python counterparts and use significantly less memory.

In [32]:
# NDIM

In [29]:
a = np.array([(1,2,3), (4,5,6)])

print(a.ndim)     # number of dimensions in the array
print(a.itemsize) # So every element occupies 4 byte in the above numpy array.
print(a.size)
print(a.shape)

2
4
6
(2, 3)


In [30]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [34]:
# linespace
# Equally-spaced numbers

In [24]:
np.linspace(1, 10, 5)

array([ 1.  ,  3.25,  5.5 ,  7.75, 10.  ])

In [26]:
np.linspace(0, 5, 10)

array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

In [37]:
# Random
# The random module provides functions to generate random numbers 
# (and also statistical distributions) of any given shape.

# numpy.random.randn generates samples from the normal distribution, 
# numpy.random.rand  generates samples from the unifrom (in range [0,1)).

In [43]:
# Random numbers between [0,1) of shape 2,3
print(np.random.rand(2, 3))

[[0.9079346  0.05760097 0.57337076]
 [0.29097431 0.28598622 0.02840491]]


In [39]:
# Random integers between [0, 10) of shape 2,2
print(np.random.randint(0, 15, size=[3,3]))

[[ 9 11 13]
 [ 8  4  6]
 [ 4 14  6]]


In [40]:
# One random number between [0, 1)
print(np.random.random())

0.5764664065317986


In [41]:
# Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2, 2]))

[[0.08244909 0.77647577]
 [0.2682347  0.62079719]]


In [57]:
# Pick 10 items from a given list, with equal probability
print(np.random.choice(['deepak', 'raj', 'rakesh', 'owl', 'monkey'], size=3, replace=True))  

['monkey' 'owl' 'deepak']


In [28]:
# If you want to repeat the same set of random numbers every time, you need to set the seed 
# or the random state. The see can be any value. The only requirement is you must set the seed 
# to the same value every time you want to generate the same set of random numbers.

# Once np.random.RandomState is created, all the functions of the np.random module becomes 
# available to the created randomstate object.

In [89]:
# Create the random state
rn = np.random.RandomState(0)

# Create random numbers between [0,1) of shape 2,2
print(rn.rand(3,4))

[[0.5488135  0.71518937 0.60276338 0.54488318]
 [0.4236548  0.64589411 0.43758721 0.891773  ]
 [0.96366276 0.38344152 0.79172504 0.52889492]]


In [91]:
# Set the random seed
np.random.seed(6)

# Create random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

[[0.89286015 0.33197981]
 [0.82122912 0.04169663]]


In [92]:
# How to get the unique items and the counts?
# The np.unique method can be used to get the unique items. 
# If you want the repetition counts of each item, set the return_counts parameter to True.

# Create random integers of size 10 between [0,10)
np.random.seed(100)
arr_rand = np.random.randint(0, 10, size=10)
print(arr_rand)

[8 8 3 7 7 0 4 2 5 2]


In [95]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts= True)
print("Unique items : ", uniqs)
print("Counts       : ", counts)

Unique items :  [0 2 3 4 5 7 8]
Counts       :  [1 2 1 1 1 2 2]


In [96]:
# Get the unique items and their counts
uniqs = np.unique(arr_rand)
print("Unique items : ", uniqs)
#print("Counts       : ", counts)

Unique items :  [0 2 3 4 5 7 8]


In [97]:
# Normal distribution with mean=0 and variance=1
data = np.random.randn(3, 3)

In [98]:
data

array([[-0.23471518, -1.43973704,  0.4328165 ],
       [ 2.30447259, -0.12263538,  0.01545931],
       [ 0.26601635, -0.11379467,  1.01106614]])

In [99]:
data + data

array([[-0.46943035, -2.87947407,  0.86563301],
       [ 4.60894519, -0.24527076,  0.03091862],
       [ 0.5320327 , -0.22758934,  2.02213229]])

In [100]:
data.shape

(3, 3)

In [101]:
data.dtype

dtype('float64')

In [102]:
data.var()

0.9164691680698515

In [103]:
# creating ndarray
data1 = [6, 7.1, 85, 0, 1]

In [104]:
arr1 = np.array(data1)

In [105]:
arr1

array([ 6. ,  7.1, 85. ,  0. ,  1. ])

In [106]:
# nested array
data2 = [[1, 2.3, 3, 4], [5, 6, 7, 8]]

In [107]:
arr2 = np.array(data2)

In [108]:
arr2

array([[1. , 2.3, 3. , 4. ],
       [5. , 6. , 7. , 8. ]])

In [109]:
arr2.shape

(2, 4)

In [110]:
arr2.ndim

2

In [111]:
arr2.dtype

dtype('float64')

In [33]:
# other methods of creating arrays

In [110]:
# zeros
a = np.zeros(10)
a

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [168]:
b = np.zeros((3, 6))
b

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [169]:
c = np.zeros_like(b)
c

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [112]:
# ones
a = np.ones(10)
a

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [113]:
b = np.ones((3, 6))
b

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [114]:
c = np.ones_like(b)
c

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [116]:
# arange
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [172]:
a.dtype

dtype('int32')

In [117]:
# convert the data type
b = a.astype(np.float64)
b

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14.])

In [74]:
# Arithmetic with NumPy Arrays
# Any arithmetic operations between equal-size arrays applies the 
# operation element-wise:

In [118]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [119]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [177]:
arr=arr + 10
arr

array([[21., 22., 23.],
       [24., 25., 26.]])

In [120]:
arr + arr

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [179]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [121]:
# Comparisons between arrays of the same size yield boolean arrays:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])

In [122]:
arr

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [125]:
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [124]:
arr2 > 5

array([[False, False, False],
       [ True, False,  True]])

In [184]:
arr2[arr2 > 5]

array([ 7., 12.])

In [126]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

In [127]:
# Basic Indexing and Slicing
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [128]:
arr[5]

5

In [190]:
arr[5:8]

array([5, 6, 7])

In [130]:
arr[7]=70

In [131]:
# broadcasting a value to multiple array positions
arr[5:8] = 12

In [132]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [133]:
arr_slice[2]=20

In [134]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 20,  8,  9])

In [137]:
arr_slice

array([12345,    12,    20])

In [136]:
# change values in arr_slice, the mutations are reflected in the original 
# array arr
arr_slice[0] = 12345

In [201]:
arr

array([    0,     1,     2,     3,     4, 12345,    12,    20,     8,
           9])

In [138]:
# using : only means all values
arr_slice[1:2] = 9999

In [139]:
arr_slice

array([12345,  9999,    20])

In [140]:
arr

array([    0,     1,     2,     3,     4, 12345,  9999,    20,     8,
           9])

In [141]:
arr

array([    0,     1,     2,     3,     4, 12345,  9999,    20,     8,
           9])

In [None]:
# if you just assign a portion of an array to another array, the new array you just 
# created actually refers to the parent array in memory.
# That means, if you make any changes to the new array, it will reflect in the parent 
# array as well.

# So to avoid disturbing the parent array, you need to make a copy of it using copy(). 
# All numpy arrays come with the copy() method.

In [206]:
# If you want a copy of a slice of an ndarray instead of a view, you
# will need to explicitly copy the array — for example,
# arr[5:8].copy().
arr_slice = arr[5:8].copy()

In [207]:
arr_slice

array([12345,  9999,    20])

In [208]:
arr_slice[:] = 888

In [209]:
arr_slice

array([888, 888, 888])

In [210]:
# the original array is not changed
arr

array([    0,     1,     2,     3,     4, 12345,  9999,    20,     8,
           9])

In [142]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]
print(list2)
arr2 = np.array(list2, dtype='float')
arr2

[[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8]]


array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [143]:
arr2a = arr2[:2,:2]
arr2a


array([[1., 2.],
       [3., 4.]])

In [145]:
arr2a[:1, :1] = 100  # 100 will reflect in arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,   4.,   5.,   6.],
       [  5.,   6.,   7.,   8.]])

In [216]:
# Copy portion of arr2 to arr2b
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 101  # 101 will not reflect in arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,   4.,   5.,   6.],
       [  5.,   6.,   7.,   8.]])

In [217]:
arr2b

array([[101.,   2.],
       [  3.,   4.]])

In [239]:
# Indexing with slices
arr = np.arange(10)

In [240]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [241]:
arr[1:6]

array([1, 2, 3, 4, 5])

In [219]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [220]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [221]:
arr2d[2]

array([7, 8, 9])

In [222]:
arr2d[0][2]

3

In [223]:
# expression arr2d[:2] as “select the first two rows of arr2d.”
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [224]:
# first 2 rows and all columns from 1st pos
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [167]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [168]:
# 1st row, all columns 0th and 1st
arr2d[1, :2]

array([4, 5])

In [169]:
arr2d[:2, 2]

array([3, 6])

In [170]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [119]:
arr2d[:, 1]

array([2, 5, 8])

In [121]:
arr2d[:2, 1:] = 0

In [146]:
arr2d

NameError: name 'arr2d' is not defined

In [150]:
# Boolean Indexing
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.randn(7, 4)

In [151]:
names.dtype

dtype('<U7')

In [152]:
data

array([[-2.26909897, -0.9565702 ,  0.19481524, -1.42373815],
       [-1.34536714, -0.35877153,  0.60472388,  0.63780573],
       [-0.55948633,  0.05620095, -0.10788254,  0.08897395],
       [-0.41584698,  1.88400146, -1.87701393, -0.86617252],
       [ 1.52518019,  0.38749346, -0.05276209,  0.1124642 ],
       [-0.88674841, -0.83371605,  1.99693119,  0.6792209 ],
       [ 0.95965714, -0.02645698,  1.80954955,  0.59910474]])

In [153]:
# Suppose each name corresponds to a row in the data array and we wanted to 
# select all the rows with corresponding name 'Rajat'.
names == 'Rajat'

array([ True, False, False,  True, False, False, False])

In [235]:
data[names == 'Rajat']

array([[-2.26909897, -0.9565702 ,  0.19481524, -1.42373815],
       [-0.41584698,  1.88400146, -1.87701393, -0.86617252]])

In [40]:
# The boolean array must be of the same length as the array axis it’s 
# indexing.

In [154]:
# what happens if they are of different lengths
names = np.array(['Rajat', 'Maruthi', 'Rajat', 'Bhanu', 'Viswa' ])
data = np.random.randn(7, 4)

In [155]:
data[names == 'Rajat']

IndexError: boolean index did not match indexed array along dimension 0; dimension is 7 but corresponding boolean dimension is 5

In [157]:
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.randn(7, 4)
data

array([[ 0.46356387, -0.91407538,  0.89197757, -0.24572027],
       [ 1.33713372, -1.02185406, -2.13159728,  0.08917259],
       [-0.0704072 , -1.47593493, -0.13225217,  0.39333064],
       [ 2.4250628 , -1.67028184, -0.91582358, -1.75439357],
       [ 1.39720228, -1.59810449, -0.30973661,  0.54801802],
       [ 0.09566247, -1.59203835,  0.69948798,  0.11292401],
       [-0.76978559,  0.3038585 , -1.2977107 , -0.32988693]])

In [158]:
data[names == 'Rajat']

array([[ 0.46356387, -0.91407538,  0.89197757, -0.24572027],
       [ 2.4250628 , -1.67028184, -0.91582358, -1.75439357]])

In [159]:
data[names == 'Rajat', :2]

array([[ 0.46356387, -0.91407538],
       [ 2.4250628 , -1.67028184]])

In [160]:
data[names == 'Rajat', 2]

array([ 0.89197757, -0.91582358])

In [133]:
# To select everything but 'Rajat', you can either use != or negate 
# the condition using ~

In [247]:
names != 'Rajat'

array([False,  True,  True, False,  True,  True,  True])

In [248]:
data[names != 'Rajat']

array([[-0.76557812, -0.14811843,  0.90492003,  0.30568476],
       [-1.26940026,  1.25621318, -0.68644723, -2.19446119],
       [-0.30264257,  0.46026512,  0.21663232, -0.29681501],
       [ 0.79997837, -0.42069976,  1.14731173, -0.4635474 ],
       [-0.00278037, -0.42134602, -1.73084024, -0.08606841]])

In [249]:
data[~(names == 'Rajat')]

array([[-0.76557812, -0.14811843,  0.90492003,  0.30568476],
       [-1.26940026,  1.25621318, -0.68644723, -2.19446119],
       [-0.30264257,  0.46026512,  0.21663232, -0.29681501],
       [ 0.79997837, -0.42069976,  1.14731173, -0.4635474 ],
       [-0.00278037, -0.42134602, -1.73084024, -0.08606841]])

In [251]:
cond = names == 'Rajat'
cond

array([ True, False, False,  True, False, False, False])

In [252]:
data[cond]

array([[-0.96689767,  0.55227176, -1.96426406, -0.62976376],
       [ 0.52966574, -1.48721676, -0.52004546, -0.22390027]])

In [253]:
data[~cond]

array([[-0.76557812, -0.14811843,  0.90492003,  0.30568476],
       [-1.26940026,  1.25621318, -0.68644723, -2.19446119],
       [-0.30264257,  0.46026512,  0.21663232, -0.29681501],
       [ 0.79997837, -0.42069976,  1.14731173, -0.4635474 ],
       [-0.00278037, -0.42134602, -1.73084024, -0.08606841]])

In [140]:
# Selecting two of the three names to combine multiple boolean conditions, 
# use boolean arithmetic operators like & (and) and | (or):

In [163]:
# The Python keywords and and or do not work with boolean arrays.
# Use & (and) and | (or) instead.
cond = (names == 'Rajat') | (names == 'Bhanu')

In [164]:
data[cond]

array([[ 0.46356387, -0.91407538,  0.89197757, -0.24572027],
       [ 2.4250628 , -1.67028184, -0.91582358, -1.75439357],
       [ 1.39720228, -1.59810449, -0.30973661,  0.54801802]])

In [165]:
cond = data > 0

In [166]:
data[cond]

array([0.46356387, 0.89197757, 1.33713372, 0.08917259, 0.39333064,
       2.4250628 , 1.39720228, 0.54801802, 0.09566247, 0.69948798,
       0.11292401, 0.3038585 ])

In [258]:
data[cond] = 0

In [259]:
data

array([[-0.96689767,  0.        , -1.96426406, -0.62976376],
       [-0.76557812, -0.14811843,  0.        ,  0.        ],
       [-1.26940026,  0.        , -0.68644723, -2.19446119],
       [ 0.        , -1.48721676, -0.52004546, -0.22390027],
       [-0.30264257,  0.        ,  0.        , -0.29681501],
       [ 0.        , -0.42069976,  0.        , -0.4635474 ],
       [-0.00278037, -0.42134602, -1.73084024, -0.08606841]])

In [167]:
# change data for Rajat = 7
data[names != 'Rajat'] = 7

In [168]:
name1="ABC"
name2= "XYZ"

if(name1 === name2):
    print("OK")
else:
    print("not ok")

SyntaxError: invalid syntax (<ipython-input-168-5ca1d1ccfe70>, line 4)

In [169]:
data

array([[ 0.46356387, -0.91407538,  0.89197757, -0.24572027],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 2.4250628 , -1.67028184, -0.91582358, -1.75439357],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ]])

In [149]:
# Transposing Arrays and Swapping Axes
# Transposing is a special form of reshaping that similarly returns a view on the underlying
# data without copying anything

In [260]:
a = np.array([(8,9,10),(11,12,13)])
print(a)
print("---------")
a=a.reshape(6,1)
print(a)

[[ 8  9 10]
 [11 12 13]]
---------
[[ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]]


In [261]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [170]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [264]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [153]:
# Universal Functions: Fast Element-Wise Array Functions


In [175]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [176]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [172]:
np.exp(arr)

array([[1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
        5.45981500e+01],
       [1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03,
        8.10308393e+03],
       [2.20264658e+04, 5.98741417e+04, 1.62754791e+05, 4.42413392e+05,
        1.20260428e+06]])

In [177]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [174]:
np.floor(arr)

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.]])

In [158]:
# useful in data anlytics
# is not a number
np.isnan(arr)

array([False, False, False, False, False, False, False, False, False, False], dtype=bool)

In [180]:
# binary functions
x = np.random.randn(8)
y = np.random.randn(8)

In [181]:
x

array([-0.96689767,  0.55227176, -1.96426406, -0.62976376, -0.76557812,
       -0.14811843,  0.90492003,  0.30568476])

In [182]:
y

array([-1.26940026,  1.25621318, -0.68644723, -2.19446119,  0.52966574,
       -1.48721676, -0.52004546, -0.22390027])

In [183]:
np.maximum(x, y)

array([-0.96689767,  1.25621318, -0.68644723, -0.62976376,  0.52966574,
       -0.14811843,  0.90492003,  0.30568476])

In [184]:
np.minimum(x, y)

array([-1.26940026,  0.55227176, -1.96426406, -2.19446119, -0.76557812,
       -1.48721676, -0.52004546, -0.22390027])

In [None]:
# Array-Oriented Programming with Arrays
# NumPy arrays enables you to express many kinds of data processing tasks 
# as concise array expressions that might otherwise require writing loops

In [None]:
# This practice of replacing explicit loops with array expressions 
# is commonly referred to as vectorization.

In [None]:
# vectorized array operations will often be one or two (or more) orders
# of magnitude faster than their pure Python equivalents

In [26]:
# to evaluate the function sqrt(x^2 + y^2) 
# across a regular grid of values.

In [185]:
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points

In [186]:
x = np.arange(10)
y = np.arange(10)

In [187]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [188]:
y

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [273]:
np.sqrt(np.add(x ** 2 , y ** 2))

array([ 0.        ,  1.41421356,  2.82842712,  4.24264069,  5.65685425,
        7.07106781,  8.48528137,  9.89949494, 11.3137085 , 12.72792206])

In [None]:
# random number **********************************************

In [274]:
# mean at 0 and standard deviation of 1
# This will return random floats in the half-open interval [0.0, 1.0) 
# following the “continuous uniform” distribution.
np.random.randn(5)

array([0.83647887, 0.72927917, 0.52606288, 0.67049467, 0.75480813])

In [275]:
np.random.random((2,2))

array([[0.5927053 , 0.21856877],
       [0.48801456, 0.62094638]])

In [276]:
# Generate A Random Number From The Normal Distribution
np.random.normal()

-0.0031941735216437397

In [277]:
# Generate Four Random Numbers From The Normal Distribution
np.random.normal(size=4)

array([-0.83245381, -1.77034816, -0.56640401,  0.98461573])

In [238]:
# Random number from distribution with mean 15, variance 2
np.random.normal(15, 2)

18.294623469239344

In [257]:
# an array of 10 points randomly sampled from a normal distribution
# loc=mean, scale=std deviation
np.random.normal(loc=0.0, scale=1.0, size=10)

array([ 0.09707755,  0.96864499, -0.70205309, -0.32766215, -0.39210815,
       -1.46351495,  0.29612028,  0.26105527,  0.00511346, -0.23458713])

In [174]:
# To return a 8 by 5 array of random numbers from the same distribution: 
np.random.normal(15, 2, size=(3, 4))

array([[ 11.78503353,  15.36926772,  15.51976559,  16.56364574],
       [ 12.52609858,  12.35908677,  16.04388313,  15.59396935],
       [ 15.5009857 ,  15.69289642,  13.63995056,  15.46450739]])

In [176]:
# A 5 by 3 array of random numbers from the standard normal distribution 
# with mean 1 and variance 1:

np.random.normal(size=(2, 4))

array([[ 1.15859558, -0.82068232,  0.96337613,  0.41278093],
       [ 0.82206016,  1.89679298, -0.24538812, -0.75373616]])

In [177]:
# Generate Four Random Integers Between 1 and 100
np.random.randint(low=1, high=100, size=4)

array([22, 30, 38, 51])

In [239]:
# Randomly constructing 1D array
array = np.random.rand(5)
print("1D Array filled with random values : \n", array)

1D Array filled with random values : 
 [0.03400909 0.88722056 0.4971579  0.02616028 0.8918212 ]


In [182]:
# Randomly constructing 1D array following Gaussian Distribution

# 1D Array
array = np.random.normal(0.0, 1.0, 5)
print("1D Array filled with random values "
      "as per gaussian distribution : \n", array)

# 3D array
array = np.random.normal(0.0, 1.0, (2, 3))
print("\n\n2D Array filled with random values "
      "as per gaussian distribution : \n", array)

1D Array filled with random values as per gaussian distribution : 
 [-0.60390437  0.30444912  0.25720749  0.02393181  0.87191399]


3D Array filled with random values as per gaussian distribution : 
 [[ 1.43735633  0.00730637  1.33088133]
 [ 0.98820261  0.23229616  0.17618092]]


In [None]:
# Making random numbers predictable *******************

In [240]:
# Set the state of the random number generator
np.random.seed(42)

In [241]:
# One set of random numbers
first_random_arr = np.random.normal(size=(2, 2))
print(first_random_arr)

# Another set
second_random_arr = np.random.normal(size=(2, 2))
print(second_random_arr)

# Reset the state of the random number generator
np.random.seed(42)

# The same as "first_random_arr" above.
print(np.random.normal(size=(2, 2)))

# The same as "second_random_arr" above.
print(np.random.normal(size=(2, 2)))

[[ 0.49671415 -0.1382643 ]
 [ 0.64768854  1.52302986]]
[[-0.23415337 -0.23413696]
 [ 1.57921282  0.76743473]]
[[ 0.49671415 -0.1382643 ]
 [ 0.64768854  1.52302986]]
[[-0.23415337 -0.23413696]
 [ 1.57921282  0.76743473]]


In [278]:
# Create an object array to hold numbers as well as strings
arr1d_obj = np.array([1, 'a'], dtype='object')
arr1d_obj

array([1, 'a'], dtype=object)

In [279]:
# Convert an array back to a list
arr1d_obj.tolist()

[1, 'a']

In [281]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr2 = np.array(list2, dtype='float')
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [285]:
arr2[::-1,]

array([[5., 6., 7., 8.],
       [3., 4., 5., 6.],
       [1., 2., 3., 4.]])

In [286]:
# Reverse the row and column positions
arr2[::-1, ::-1]

array([[8., 7., 6., 5.],
       [6., 5., 4., 3.],
       [4., 3., 2., 1.]])

In [257]:
# represent missing values and infinite?
# Missing values can be represented using np.nan object, while np.inf represents infinite. 
# Let’s place some in arr2d.

# Insert a nan and an inf
arr2[1,1] = np.nan  # not a number
arr2[1,2] = np.inf  # infinite
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., nan, inf,  6.],
       [ 5.,  6.,  7.,  8.]])

In [258]:
# Replace nan and inf with -1. 
missing_bool = np.isnan(arr2) | np.isinf(arr2)
print(missing_bool)

arr2[missing_bool] = -1  
arr2

[[False False False False]
 [False  True  True False]
 [False False False False]]


array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [259]:
# compute mean, min, max on the ndarray
# mean, max and min
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())

Mean value is:  3.5833333333333335
Max value is:  8.0
Min value is:  -1.0


In [287]:
# compute the minimum values row wise or column wise, use the np.amin 

# Row wise and column wise min
print("Column wise minimum : ", np.amin(arr2, axis=0))
print("Row wise minimum    : ", np.amin(arr2, axis=1))

Column wise minimum :  [1. 2. 3. 4.]
Row wise minimum    :  [1. 3. 5.]


In [None]:
# Reshaping and Flattening Multidimensional arrays
# Reshaping is changing the arrangement of items so that shape of the array changes 
# while maintaining the same number of dimensions.

# Flattening, however, will convert a multi-dimensional array to a flat 1d array. 
# And not any other shape.

# First, let’s reshape the arr2 array from 3×4 to 4×3 shape.

In [261]:
# Reshape a 3x4 array to 4x3 array
arr2.reshape(4, 3)

array([[ 1.,  2.,  3.],
       [ 4.,  3., -1.],
       [-1.,  6.,  5.],
       [ 6.,  7.,  8.]])

In [None]:
# What is the difference between flatten() and ravel()?
# There are 2 popular ways to implement flattening. That is using the flatten() method 
# and the other using the ravel() method.

# The difference between ravel and flatten is, the new array created using ravel is 
# actually a reference to the parent array. 
# So, any changes to the new array will affect the parent as well. 
# But is memory efficient since it does not create a copy.

In [189]:
# Flatten it to a 1d array
arr2.flatten()

array([100.,   2.,   3.,   4.,   3.,   4.,   5.,   6.,   5.,   6.,   7.,
         8.])

In [190]:
# Changing the flattened array does not change parent
b1 = arr2.flatten()  
b1[0] = 105  # changing b1 does not affect arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,   4.,   5.,   6.],
       [  5.,   6.,   7.,   8.]])

In [191]:
b1

array([105.,   2.,   3.,   4.,   3.,   4.,   5.,   6.,   5.,   6.,   7.,
         8.])

In [195]:
arr2

array([[101.,   2.,   3.,   4.],
       [  3.,   4.,   5.,   6.],
       [  5.,   6.,   7.,   8.]])

In [193]:
arr2.ravel() 

array([100.,   2.,   3.,   4.,   3.,   4.,   5.,   6.,   5.,   6.,   7.,
         8.])

In [194]:
# Changing the raveled array changes the parent also.
b2 = arr2.ravel()  
b2[0] = 101  # changing b2 changes arr2 also
arr2


array([[101.,   2.,   3.,   4.],
       [  3.,   4.,   5.,   6.],
       [  5.,   6.,   7.,   8.]])

In [294]:
# create sequences, repetitions 
# Lower limit is 0 be default
print(np.arange(5))  

[0 1 2 3 4]


In [295]:
# 0 to 9
print(np.arange(0, 10))  

[0 1 2 3 4 5 6 7 8 9]


In [296]:
# 0 to 9 with step of 2
print(np.arange(0, 10, 2))  

[0 2 4 6 8]


In [297]:
# 10 to 1, decreasing order
print(np.arange(10, 0, -1))

[10  9  8  7  6  5  4  3  2  1]


In [197]:
# create repeating sequences?
# np.tile will repeat a whole list or array n times. 
# Whereas, np.repeat repeats each item n times.

a = [1,2,3] 

# Repeat whole of 'a' two times
print('Tile:   ', np.tile(a, 4))

# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 3))

Tile:    [1 2 3 1 2 3 1 2 3 1 2 3]
Repeat:  [1 1 1 2 2 2 3 3 3]


In [None]:
# Arrays support vectorised operations, while lists don’t.
# Once an array is created, you cannot change its size. You will have to create a new array 
# or overwrite the existing one.
# Every array has one and only one dtype. All items in it should be of that dtype.
# An equivalent numpy array occupies much less space than a python list of lists.