In [1]:
import numpy as np
import sys

# NumPy is pure gold. It is fast, easy to learn, feature-rich, and 
# therefore at the core of almost all popular scientific packages in the 
# Python universe (including SciPy and Pandas, two most widely used packages
# for data science and statistical modeling). 
# LISTs are good, you can change values but you cannot apply mathematical 
# functions

In [2]:
distance = [10, 12, 18, 16]
time     = [1,   2,  3,  4]
a=distance + time
a

[10, 12, 18, 16, 1, 2, 3, 4]

In [5]:
speed = distance / time
speed

TypeError: unsupported operand type(s) for /: 'list' and 'list'

In [4]:
np_distance = np.array(distance)
np_time     = np.array(time)

In [5]:
np_time

array([1, 2, 3, 4])

In [9]:
np_time
type(np_time)

numpy.ndarray

In [6]:
np_time.dtype

dtype('int32')

In [7]:
speed = np_distance/np_time
speed

array([10.,  6.,  6.,  4.])

In [8]:
speed.dtype

dtype('float64')

In [8]:
# why use numpy
# compare numpy array with standard python LIST type array

In [9]:
for i in range(5):
    print(i)

0
1
2
3
4


In [4]:
data = range(1000)
print(type(data))
data = np.array(data)

<class 'range'>


In [4]:
data = range(1000)

In [22]:
d=np.array(data)
d.dtype

dtype('int32')

In [6]:
len(data)

1000

In [9]:
# PYTHON LIST
data = range(1000)

print(sys.getsizeof(9)*len(data))

# NUMPY ARRAY
data_np = np.arange(1000)
print(data_np.size * data_np.itemsize)

28000
4000


In [12]:
# The above output shows that the memory allocated by list is 28000 whereas the 
# memory allocated by the numpy array is just 4000. 
# From this, you can conclude that there is a major difference between the two and 
# this makes python numpy array as the preferred choice over list.

In [6]:
### Example - getsizeof
print('Integer memory size : ', sys.getsizeof(5) )
## increase size with incearse in digits
print('Integer memory size : ', sys.getsizeof(566666666) )
print('Integer memory size : ', sys.getsizeof(5666666954) )
print('Float   memory size : ', sys.getsizeof(5.05) )
print('Float   memory size : ', sys.getsizeof(5111111111.05858588522) )

print('\nString  memory size : ', sys.getsizeof('5') )
print('String  memory size : ', sys.getsizeof('A') )
print('String  memory size : ', sys.getsizeof('Ab') )
print('String  memory size : ', sys.getsizeof('AAA') )
print('String  memory size : ', sys.getsizeof('AAAA') )
print('String  memory size : ', sys.getsizeof('AAAAA') )

Integer memory size :  28
Integer memory size :  28
Integer memory size :  32
Float   memory size :  24
Float   memory size :  24

String  memory size :  50
String  memory size :  50
String  memory size :  51
String  memory size :  52
String  memory size :  53
String  memory size :  54


In [14]:
a = [1,2,3,4,5]
b= [10,11,12,13,14,15]
x = zip(a,b)
list(x)

[(1, 10), (2, 11), (3, 12), (4, 13), (5, 14)]

In [7]:
import time
import sys
 
SIZE = 1000000
 
L1= range(SIZE)
L2= range(SIZE)
A1= np.arange(SIZE)
A2=np.arange(SIZE)
 
start= time.time()
result=[(x,y) for x,y in zip(L1,L2)]
print((time.time()-start)*1000)
 
start=time.time()
result= A1+A2
print((time.time()-start)*1000)

281.2306880950928
62.493085861206055


In [17]:
SIZE = 5
L1= range(SIZE)
L2= range(SIZE)
[(x,y) for x,y in zip(L1,L2)]

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]

In [17]:
np_arr = np.arange(1000000)
normal_arr = list(range(1000000))


In [18]:
%%time 
# loop using numpy array
for i in range(5):
    np_arr2 = np_arr * 2

Wall time: 15.6 ms


In [19]:
%%time 
# loop using normal python array, list
for i in range(5):
    normal_arr2 = [x * 2 for x in normal_arr]
    
  
a= 10
b= 60
for i in range(100):
    c= a+b

Wall time: 692 ms


In [20]:
print(np_arr2)

[      0       2       4 ... 1999994 1999996 1999998]


In [21]:
# NumPy-based algorithms are generally 10 to 100 times faster (or more) 
# than their pure Python counterparts and use significantly less memory.

In [22]:
# NDIM

In [2]:
import numpy as np
a = np.array([[1,2,3,4], [4,5,3,6], [4,5,3,6]])
print(a)


print(a.ndim)     # number of dimensions in the array
print(a.itemsize) # So every element occupies 4 byte in the above numpy array.
print(a.size)
print(a.shape)

[[1 2 3 4]
 [4 5 3 6]
 [4 5 3 6]]
2
4
12
(3, 4)


In [23]:
a

array([[[1, 2, 3, 4],
        [4, 5, 3, 6],
        [4, 5, 3, 6]]])

In [25]:
# linespace
# Equally-spaced numbers

In [24]:
np.linspace(1, 16, 4)

array([ 1.,  6., 11., 16.])

In [25]:
np.linspace(0, 5, 5)

array([0.  , 1.25, 2.5 , 3.75, 5.  ])

In [28]:
# Random
# The random module provides functions to generate random numbers 
# (and also statistical distributions) of any given shape.

# numpy.random.randn generates samples from the normal distribution, 
# numpy.random.rand  generates samples from the unifrom (in range [0,1)).

In [10]:
import numpy as np
# Random numbers between [0,1) of shape 2,3
print(np.random.rand(3, 3))

[[0.83588759 0.99041888 0.87626481]
 [0.75561136 0.20940482 0.47169925]
 [0.2999258  0.78819486 0.29514069]]


In [15]:
# Random integers between [0, 10) of shape 2,2
print(np.random.randint(0, 10, size=[3,3]))

[[6 2 8]
 [4 1 7]
 [1 0 3]]


In [26]:
# One random number between [0, 1)
print(np.random.random())

0.3790297363481808


In [22]:
# Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2, 2]))

[[0.44484246 0.97411764]
 [0.75507261 0.54686442]]


In [30]:
# Pick 10 items from a given list, with equal probability
np.random.seed(1)
print(np.random.choice(['deepak', 'raj', 'rakesh', 'owl', 'monkey'], size=3, replace=True))  

['owl' 'monkey' 'deepak']


In [69]:
# If you want to repeat the same set of random numbers every time, you need to set the seed 
# or the random state. The see can be any value. The only requirement is you must set the seed 
# to the same value every time you want to generate the same set of random numbers.

# Once np.random.RandomState is created, all the functions of the np.random module becomes 
# available to the created randomstate object.

In [31]:
# Create the random state
rn = np.random.RandomState(1)
print(rn.rand(3,2),end="\n\n")
print(rn.rand(2,2),end="\n\n")
new=np.random.RandomState(4)

# Create random numbers between [0,1) of shape 2,2

print(new.rand(3,2))

[[4.17022005e-01 7.20324493e-01]
 [1.14374817e-04 3.02332573e-01]
 [1.46755891e-01 9.23385948e-02]]

[[0.18626021 0.34556073]
 [0.39676747 0.53881673]]

[[0.96702984 0.54723225]
 [0.97268436 0.71481599]
 [0.69772882 0.2160895 ]]


In [87]:
# Set the random seed
np.random.seed(1)

# Create random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))
print(np.random.rand(2,2))

[[4.17022005e-01 7.20324493e-01]
 [1.14374817e-04 3.02332573e-01]]
[[0.14675589 0.09233859]
 [0.18626021 0.34556073]]


In [29]:
np.random.seed(6)

print(np.random.rand(2,2))
print(np.random.rand(2,2))

[[0.89286015 0.33197981]
 [0.82122912 0.04169663]]
[[0.10765668 0.59505206]
 [0.52981736 0.41880743]]


In [9]:
# How to get the unique items and the counts?
# The np.unique method can be used to get the unique items. 
# If you want the repetition counts of each item, set the return_counts parameter to True.

# Create random integers of size 10 between [0,10)
import numpy as np
np.random.seed(140)

arr_rand = np.random.randint(0, 5, size=10)
print(arr_rand)

[0 0 0 3 3 1 3 2 4 4]


In [8]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts= True)
print("Unique items : ", uniqs)
print("Counts       : ", counts)

Unique items :  [0 3 6 7 8 9]
Counts       :  [2 2 2 2 1 1]


In [39]:
# Get the unique items and their counts
uniqs= np.unique(arr_rand, return_counts= False)
print("Unique items : ", uniqs)
#print("Counts       : ", counts)

Unique items :  [0 1 4 8 9]


In [15]:
# Normal distribution with mean=0 and variance=1
data = np.random.randn(3, 3)

In [16]:
data

array([[-0.52102007,  0.73230458, -0.20165116],
       [-1.29997777,  1.10653993,  0.80188955],
       [ 0.17852349, -0.93081149,  1.60763861]])

In [12]:
data + data

array([[ 1.19019479,  0.49618317,  2.56255056],
       [ 3.62755812, -0.81392608, -0.58807088],
       [-4.13039125,  2.07560585,  0.49188484]])

In [94]:
data.shape

(3, 3)

In [97]:
data.dtype

dtype('float64')

In [96]:
type(data)

numpy.ndarray

In [98]:
data.var()

0.46844309652497923

In [99]:
# creating ndarray
data1 = [6, 7.1, 85, 0, 1]

In [100]:
arr1 = np.array(data1)

In [101]:
arr1

array([ 6. ,  7.1, 85. ,  0. ,  1. ])

In [102]:
# nested array
data2 = [[1, 2.3, 3, 4], [5, 6, 7, 8]]

In [111]:
arr2 = np.array(data2)
arr2

array([[1. , 2.3, 3. , 4. ],
       [5. , 6. , 7. , 8. ]])

In [104]:
arr2.shape

(2, 4)

In [105]:
arr2.ndim

2

In [106]:
arr2.dtype

dtype('float64')

In [107]:
q=np.array([[1,2,0,],[0,0,3]])
q

array([[1, 2, 0],
       [0, 0, 3]])

In [53]:
p = np.zeros_like(q)
p

array([[0, 0, 0],
       [0, 0, 0]])

In [None]:
# other methods of creating arrays

In [108]:
# zeros
a = np.zeros(10)
a

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [109]:
b = np.zeros((3, 6))
b

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [112]:
c = np.zeros_like(arr2)
c

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [113]:
# ones
a = np.ones(10)
a

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [114]:
b = np.ones((3, 6))
b

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [115]:
c = np.ones_like(b)
c

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [116]:
# arange
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [117]:
a.dtype

dtype('int32')

In [122]:
# convert the data type
b = a.astype(np.float64)
b

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14.])

In [None]:
# Arithmetic with NumPy Arrays
# Any arithmetic operations between equal-size arrays applies the 
# operation element-wise:

In [26]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [27]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [28]:
arr=arr + 10
arr

array([[11., 12., 13.],
       [14., 15., 16.]])

In [None]:
arr + arr

In [None]:
arr - arr

In [29]:
# Comparisons between arrays of the same size yield boolean arrays:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [5]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [6]:
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [7]:
arr2 > 5

array([[False, False, False],
       [ True, False,  True]])

In [8]:
arr2[arr2 > 5]

array([ 7., 12.])

In [9]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

In [43]:
ac=np.array([12,23,42,12])

In [31]:
# Basic Indexing and Slicing
arr = np.arange(10,20)
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [147]:
arr[5]

15

In [148]:
arr[5:8]

array([15, 16, 17])

In [149]:
arr[7]=70

In [34]:
arr

array([10, 11, 12, 13, 14, 12, 12, 12, 18, 19])

In [33]:
# broadcasting a value to multiple array positions
arr[5:8]=12

In [152]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [153]:
arr_slice[2]=20

In [154]:
arr

array([10, 11, 12, 13, 14, 12, 12, 20, 18, 19])

In [28]:
arr_slice

array([12345,    12,    20])

In [26]:
# change values in arr_slice, the mutations are reflected in the original 
# array arr
arr_slice[0] = 12345

In [27]:
arr

array([   10,    11,    12,    13,    14, 12345,    12,    20,    18,
          19])

In [29]:
# using : only means all values
arr_slice[1:2] = 9999

In [30]:
arr_slice

array([12345,  9999,    20])

In [31]:
arr

array([   10,    11,    12,    13,    14, 12345,  9999,    20,    18,
          19])

In [None]:
arr

In [None]:
# if you just assign a portion of an array to another array, the new array you just 
# created actually refers to the parent array in memory.
# That means, if you make any changes to the new array, it will reflect in the parent 
# array as well.

# So to avoid disturbing the parent array, you need to make a copy of it using copy(). 
# All numpy arrays come with the copy() method.

In [155]:
# If you want a copy of a slice of an ndarray instead of a view, you
# will need to explicitly copy the array — for example,
# arr[5:8].copy().
arr_slice1 = arr[5:8].copy()

In [156]:
arr_slice1

array([12, 12, 20])

In [157]:
arr_slice1[:] = 9388

In [158]:
arr_slice1

array([9388, 9388, 9388])

In [159]:
# the original array is not changed
arr

array([10, 11, 12, 13, 14, 12, 12, 20, 18, 19])

In [38]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]
print(list2)
arr2 = np.array(list2, dtype='float')
arr2

[[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8]]


array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [1]:
import numpy as np
list_arr = np.array([[1.2,2,3.8],[2.,4,9.2]], dtype = 'int')

In [2]:
list_arr

array([[1, 2, 3],
       [2, 4, 9]])

In [8]:
arr2a = list_arr[0:2,0:2]
arr2a


array([[1, 2],
       [2, 4]])

IndexError: index 2 is out of bounds for axis 0 with size 2

In [48]:
arr2a[:1, :1] = 100  # 100 will reflect in arr2
arr2a

array([[100,   2],
       [  2,   4]])

In [49]:
# Copy portion of arr2 to arr2b
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 101  # 101 will not reflect in arr2
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [50]:
arr2b

array([[101.,   2.],
       [  3.,   4.]])

In [None]:
arr2

In [None]:
# Indexing with slices
arr = np.arange(10)

In [None]:
arr

In [None]:
arr[1:6]

In [49]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [50]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [48]:
arr2d[2]

[7, 8, 9]

In [51]:
arr2d[0][2]

3

In [52]:
# expression arr2d[:2] as “select the first two rows of arr2d.”
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [53]:
# first 2 rows and all columns from 1st pos
arr2d[:2, 1:]


array([[2, 3],
       [5, 6]])

In [None]:
arr2d

In [None]:
# 1st row, all columns 0th and 1st
arr2d[1, :2]

In [None]:
arr2d[:2, 2]

In [None]:
arr2d

In [None]:
arr2d[:, 1]

In [None]:
arr2d[:2, 1:] = 0

In [None]:
arr2d

In [1]:
import numpy as np

In [2]:
# Boolean Indexing
names = np.array(['Rajat', 'Maruthi12', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.randn(7, 4)

In [3]:
names.shape

(7,)

In [4]:
data

array([[ 0.80738165,  0.13697391,  0.03995172,  1.2400832 ],
       [ 1.12333366,  0.84304543,  0.14969404,  1.14761756],
       [ 1.56211039,  1.42919174, -0.56476853, -1.41523243],
       [ 0.994092  , -1.03058151,  0.66193006, -0.56037108],
       [-0.60698247, -1.7973395 , -0.51036095, -1.16003763],
       [ 1.24000513,  0.43753635, -0.78277943,  0.32643319],
       [-0.5652978 , -2.77392258, -0.60382366,  2.4403582 ]])

In [5]:
# Suppose each name corresponds to a row in the data array and we wanted to 
# select all the rows with corresponding name 'Rajat'.
names == 'Rajat'

array([ True, False, False,  True, False, False, False])

In [7]:
data[(2,6),3]

array([-1.41523243,  2.4403582 ])

In [60]:
data[names == 'Rajat']

array([[-0.13508914,  1.05823251, -0.8033125 , -1.30280753],
       [ 0.46212565, -0.97882576, -0.49925557,  1.6780167 ]])

In [None]:
# The boolean array must be of the same length as the array axis it’s 
# indexing.

In [68]:
# what happens if they are of different lengths
names = np.array(['Rajat', 'Maruthi', 'Rajat', 'Bhanu', 'Viswa' ])
data = np.random.randn(7, 4)

In [75]:
a=np.array([['anad','sandeep'],['komal','d4']])
a

array([['anad', 'sandeep'],
       ['komal', 'd4']], dtype='<U7')

In [8]:
data[names == 'Rajat']

array([[ 0.80738165,  0.13697391,  0.03995172,  1.2400832 ],
       [ 0.994092  , -1.03058151,  0.66193006, -0.56037108]])

In [14]:
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.rand(7,4)

array([[0.24267668, 0.23552203, 0.34183445, 0.71250492],
       [0.70577654, 0.57598068, 0.69519404, 0.66891002],
       [0.19045848, 0.25865719, 0.22414758, 0.82526161],
       [0.63442703, 0.50927201, 0.8923701 , 0.17268299],
       [0.02299894, 0.38420888, 0.67698662, 0.47062065],
       [0.65579003, 0.35753625, 0.38685725, 0.99265001],
       [0.55472643, 0.78905374, 0.55657392, 0.94468923]])

In [36]:
data[1,2]=2
data

array([[0.24267668, 0.23552203, 0.34183445, 0.71250492],
       [0.70577654, 0.57598068, 2.        , 2.        ],
       [0.19045848, 0.25865719, 0.22414758, 2.        ],
       [0.63442703, 0.50927201, 0.8923701 , 2.        ],
       [0.02299894, 0.38420888, 0.67698662, 0.47062065],
       [0.65579003, 0.35753625, 0.38685725, 0.99265001],
       [0.55472643, 0.78905374, 0.55657392, 0.94468923]])

In [31]:
data

array([[0.24267668, 0.23552203, 0.34183445, 0.71250492],
       [0.70577654, 0.57598068, 2.        , 2.        ],
       [0.19045848, 0.25865719, 0.22414758, 2.        ],
       [0.63442703, 0.50927201, 0.8923701 , 2.        ],
       [0.02299894, 0.38420888, 0.67698662, 0.47062065],
       [0.65579003, 0.35753625, 0.38685725, 0.99265001],
       [0.55472643, 0.78905374, 0.55657392, 0.94468923]])

In [39]:
data[names == 'Rajat']

array([[0.24267668, 0.23552203, 0.34183445, 0.71250492],
       [0.63442703, 0.50927201, 0.8923701 , 2.        ]])

In [37]:
data[names == 'Rajat', :2]

array([[0.24267668, 0.23552203],
       [0.63442703, 0.50927201]])

In [38]:
data[names == 'Rajat', 2]

array([0.34183445, 0.8923701 ])

In [None]:
# To select everything but 'Rajat', you can either use != or negate 
# the condition using ~

In [14]:
names != 'Rajat'

array([False,  True,  True, False,  True,  True,  True])

In [84]:
data[names != 'Rajat']

array([[-0.20800335, -0.84253577, -1.9278995 , -0.86005425],
       [ 0.97556692, -0.6241064 ,  1.05329376, -0.48015182],
       [-1.12486013,  0.82735984,  0.67400019,  0.46575797],
       [ 0.97761237, -0.30212264, -0.56140621, -3.15586053],
       [-1.93625946,  0.42347678,  0.91949796,  0.54452643]])

In [85]:
print("*******####")
data[~(names == 'Rajat')]

array([[-0.20800335, -0.84253577, -1.9278995 , -0.86005425],
       [ 0.97556692, -0.6241064 ,  1.05329376, -0.48015182],
       [-1.12486013,  0.82735984,  0.67400019,  0.46575797],
       [ 0.97761237, -0.30212264, -0.56140621, -3.15586053],
       [-1.93625946,  0.42347678,  0.91949796,  0.54452643]])

In [40]:
cond = names == 'Rajat'
cond

array([ True, False, False,  True, False, False, False])

In [41]:
data[cond]

array([[0.24267668, 0.23552203, 0.34183445, 0.71250492],
       [0.63442703, 0.50927201, 0.8923701 , 2.        ]])

In [88]:
data[~cond]

array([[-0.20800335, -0.84253577, -1.9278995 , -0.86005425],
       [ 0.97556692, -0.6241064 ,  1.05329376, -0.48015182],
       [-1.12486013,  0.82735984,  0.67400019,  0.46575797],
       [ 0.97761237, -0.30212264, -0.56140621, -3.15586053],
       [-1.93625946,  0.42347678,  0.91949796,  0.54452643]])

In [None]:
# Selecting two of the three names to combine multiple boolean conditions, 
# use boolean arithmetic operators like & (and) and | (or):

In [90]:
# The Python keywords and and or do not work with boolean arrays.
# Use & (and) and | (or) instead.
cond = (names == 'Rajat') | (names == 'Bhanu')

In [91]:
data[cond]

array([[-0.05714863, -0.29276269, -0.37107635,  0.72206575],
       [-1.05910682, -0.0729824 , -0.20625159,  0.45927175],
       [-1.12486013,  0.82735984,  0.67400019,  0.46575797]])

In [42]:
cond = data > 0

In [43]:
data[cond]

array([0.24267668, 0.23552203, 0.34183445, 0.71250492, 0.70577654,
       0.57598068, 2.        , 2.        , 0.19045848, 0.25865719,
       0.22414758, 2.        , 0.63442703, 0.50927201, 0.8923701 ,
       2.        , 0.02299894, 0.38420888, 0.67698662, 0.47062065,
       0.65579003, 0.35753625, 0.38685725, 0.99265001, 0.55472643,
       0.78905374, 0.55657392, 0.94468923])

In [None]:
data[cond] = 0

In [None]:
data

In [None]:
# change data for Rajat = 7
data[names != 'Rajat'] = 7

In [None]:
name1="ABC"
name2= "XYZ"

if(name1 === name2):
    print("OK")
else:
    print("not ok")

In [None]:
data

In [None]:
# Transposing Arrays and Swapping Axes
# Transposing is a special form of reshaping that similarly returns a view on the underlying
# data without copying anything

In [45]:
a = np.array([(8,9,10),(11,12,13)])
print(a)
print("---------")
a=a.reshape(6,1)
print(a)

[[ 8  9 10]
 [11 12 13]]
---------
[[ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]]


In [46]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [47]:
arr = np.arange(15).reshape((5, 3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [48]:
arr.T

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

In [None]:
# Universal Functions: Fast Element-Wise Array Functions


In [101]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [102]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [104]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [105]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [107]:
np.floor(arr)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [106]:
# useful in data anlytics
# is not a number
np.isnan(arr)

array([False, False, False, False, False, False, False, False, False,
       False])

In [108]:
# binary functions
x = np.random.randn(8)
y = np.random.randn(8)

In [113]:
z=np.random.randn(8)

In [109]:
x

array([ 0.41363362, -0.05598195,  0.63597051,  0.67881985, -0.72940908,
        0.31663242,  0.35943378, -0.05388746])

In [110]:
y

array([ 0.36822595, -0.03097939,  0.08499492, -0.02550349, -0.64999163,
       -1.11248628,  0.41966799,  0.44546221])

In [114]:
np.maximum(x, y)

array([ 0.41363362, -0.03097939,  0.63597051,  0.67881985, -0.64999163,
        0.31663242,  0.41966799,  0.44546221])

In [116]:
np.minimum(x, y,z)

array([ 0.36822595, -0.05598195,  0.08499492, -0.02550349, -0.72940908,
       -1.11248628,  0.35943378, -0.05388746])

In [None]:
# Array-Oriented Programming with Arrays
# NumPy arrays enables you to express many kinds of data processing tasks 
# as concise array expressions that might otherwise require writing loops

In [None]:
# This practice of replacing explicit loops with array expressions 
# is commonly referred to as vectorization.

In [None]:
# vectorized array operations will often be one or two (or more) orders
# of magnitude faster than their pure Python equivalents

In [None]:
# to evaluate the function sqrt(x^2 + y^2) 
# across a regular grid of values.

In [117]:
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points

In [119]:
x = np.arange(10)
y = np.arange(10)

In [120]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [121]:
y

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [122]:
np.sqrt(np.add(x ** 2 , y ** 2))

array([ 0.        ,  1.41421356,  2.82842712,  4.24264069,  5.65685425,
        7.07106781,  8.48528137,  9.89949494, 11.3137085 , 12.72792206])

In [None]:
# random number **********************************************

In [123]:
# mean at 0 and standard deviation of 1
# This will return random floats in the half-open interval [0.0, 1.0) 
# following the “continuous uniform” distribution.
np.random.randn(5)

array([-0.9924059 , -0.46892582, -1.4275216 , -0.07190899, -1.12435409])

In [124]:
np.random.random((2,2))

array([[0.96488998, 0.28109545],
       [0.36730077, 0.12298962]])

In [125]:
# Generate A Random Number From The Normal Distribution
np.random.normal()

-3.377301897673844

In [126]:
# Generate Four Random Numbers From The Normal Distribution
np.random.normal(size=4)

array([ 0.37459637, -0.95633897, -1.4055796 ,  0.73984779])

In [127]:
# Random number from distribution with mean 15, variance 2
np.random.normal(15, 2)

13.74958447788593

In [None]:
# an array of 10 points randomly sampled from a normal distribution
# loc=mean, scale=std deviation
np.random.normal(loc=0.0, scale=1.0, size=10)

In [128]:
# To return a 8 by 5 array of random numbers from the same distribution: 
np.random.normal(15, 2, size=(3, 4))

array([[17.32860462, 17.14096983, 16.30003503, 14.20020302],
       [14.53415923, 17.74242383, 12.74153821, 11.20181401],
       [15.00653265, 15.73554288, 14.84373224, 16.04317509]])

In [129]:
# A 5 by 3 array of random numbers from the standard normal distribution 
# with mean 1 and variance 1:

np.random.normal(size=(2, 4))

array([[ 0.18151764, -0.33070701, -0.18474518, -1.56946719],
       [-0.5297613 ,  0.54388703, -0.26960114,  1.66970544]])

In [165]:
# Generate Four Random Integers Between 1 and 100
np.random.randint(high =100, low =1, size =(3,4))

array([[88, 85, 93, 27],
       [49, 73, 84, 30],
       [60, 44, 57, 68]])

In [17]:
# Randomly constructing 1D array
array = np.random.rand(5)
print("1D Array filled with random values : \n", array)

1D Array filled with random values : 
 [0.66875427 0.30678623 0.00891125 0.93981984 0.79342948]


In [None]:
# Randomly constructing 1D array following Gaussian Distribution

# 1D Array
array = np.random.normal(0.0, 1.0, 5)
print("1D Array filled with random values "
      "as per gaussian distribution : \n", array)

# 3D array
array = np.random.normal(0.0, 1.0, (2, 3))
print("\n\n2D Array filled with random values "
      "as per gaussian distribution : \n", array)

In [None]:
# Making random numbers predictable *******************

In [None]:
# Set the state of the random number generator
np.random.seed(42)

In [None]:
# One set of random numbers
first_random_arr = np.random.normal(size=(2, 2))
print(first_random_arr)

# Another set
second_random_arr = np.random.normal(size=(2, 2))
print(second_random_arr)

# Reset the state of the random number generator
np.random.seed(42)

# The same as "first_random_arr" above.
print(np.random.normal(size=(2, 2)))

# The same as "second_random_arr" above.
print(np.random.normal(size=(2, 2)))

In [32]:
# Create an object array to hold numbers as well as strings
arr1d_obj = np.array([[1, 'a'],[1,2]], dtype='object')
arr1d_obj

array([[1, 'a'],
       [1, 2]], dtype=object)

In [33]:
# Convert an array back to a list
arr1d_obj.tolist()

[[1, 'a'], [1, 2]]

In [34]:
a= np.random.randint(1,12,(3,4))
a

array([[11,  9,  7,  5],
       [ 4,  1,  5,  7],
       [ 9, 11,  2,  9]])

In [35]:
a=a.astype(float)

In [37]:
a[2,2] = np.nan

In [38]:
a

array([[11.,  9.,  7.,  5.],
       [ 4.,  1.,  5.,  7.],
       [ 9., 11., nan,  9.]])

In [1]:
# Create a 2d array with 3 rows and 4 columns
import numpy as np
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr2 = np.array(list2, dtype='float')
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [10]:
from numpy import random

In [11]:
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr3 = np.array(random.randn(3,3))
arr3

array([[-0.31086796, -0.34482424,  1.26055563],
       [ 3.33921341,  0.39864238,  0.49688531],
       [ 0.71613985,  0.12134869,  2.62794016]])

In [12]:
arr3[1,1]=np.nan

In [13]:
arr3

array([[-0.31086796, -0.34482424,  1.26055563],
       [ 3.33921341,         nan,  0.49688531],
       [ 0.71613985,  0.12134869,  2.62794016]])

In [2]:
arr2[::-1,]

array([[5., 6., 7., 8.],
       [3., 4., 5., 6.],
       [1., 2., 3., 4.]])

In [3]:
# Reverse the row and column positions

arr2[::-1, ::-1]

array([[8., 7., 6., 5.],
       [6., 5., 4., 3.],
       [4., 3., 2., 1.]])

In [4]:
# represent missing values and infinite?
# Missing values can be represented using np.nan object, while np.inf represents infinite. 
# Let’s place some in arr2d.

# Insert a nan and an inf
arr2[1,1] = np.nan  # not a number
arr2[1,2] = np.inf  # infinite
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., nan, inf,  6.],
       [ 5.,  6.,  7.,  8.]])

In [53]:
arr2.T

array([[ 1.,  3.,  5.],
       [ 2., nan,  6.],
       [ 3., inf,  7.],
       [ 4.,  6.,  8.]])

In [4]:
# Replace nan and inf with -1. 
missing_bool = np.isnan(arr2) | np.isinf(arr2)
print(missing_bool)

arr2[missing_bool] = -1  
#missing_bool = -1  
missing_bool
arr2

[[False False False False]
 [False  True  True False]
 [False False False False]]


array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [None]:
# compute mean, min, max on the ndarray
# mean, max and min
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())

In [136]:
# compute the minimum values row wise or column wise, use the np.amin 
 np.amin(arr2, axis=0)
print("Column wise minimum : ", np.amin(arr2, axis=0))
print("Row wise minimum    : ", np.amin(arr2, axis=1))

Column wise minimum :  [ 1. -1. -1.  4.]
Row wise minimum    :  [ 1. -1.  5.]


In [15]:
import numpy as np
#A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.# Row wise and column wise min
a = np.arange(6).reshape(2,3)
print(a)
print("########## after nditer")
for x in np.nditer(a,order = "K" , flags=['external_loop']):
    print(x)
 
type(x)



[[0 1 2]
 [3 4 5]]
########## after nditer
[0 1 2 3 4 5]


numpy.ndarray

In [17]:
for x in np.nditer(a,order = "F" , flags=['external_loop']):
    print(x)

[0 3]
[1 4]
[2 5]


In [18]:
a = np.arange(6).reshape(2,3)
it = np.nditer(a, flags=['f_index'])
while not it.finished:
    print("%d <%d>" % (it[0], it.index), end=' ')
    it.iternext()

0 <0> 1 <2> 2 <4> 3 <1> 4 <3> 5 <5> 

In [19]:
it = np.nditer(a, flags=['multi_index'])
while not it.finished:
    print("%d <%s>" % (it[0], it.multi_index), end=' ')
    it.iternext()

0 <(0, 0)> 1 <(0, 1)> 2 <(0, 2)> 3 <(1, 0)> 4 <(1, 1)> 5 <(1, 2)> 

In [20]:
it = np.nditer(a, flags=['multi_index'], op_flags=['writeonly'])
with it:
    while not it.finished:
        it[0] = it.multi_index[1] - it.multi_index[0]
        it.iternext()

In [21]:
a

array([[ 0,  1,  2],
       [-1,  0,  1]])

In [None]:
# Reshaping and Flattening Multidimensional arrays
# Reshaping is changing the arrangement of items so that shape of the array changes 
# while maintaining the same number of dimensions.

# Flattening, however, will convert a multi-dimensional array to a flat 1d array. 
# And not any other shape.

# First, let’s reshape the arr2 array from 3×4 to 4×3 shape.

In [44]:
# Reshape a 3x4 array to 4x3 array
arr2.reshape(4, 3)
arr2.reshape(6,2)

array([[1., 2.],
       [3., 4.],
       [3., 4.],
       [5., 6.],
       [5., 6.],
       [7., 8.]])

In [None]:
# What is the difference between flatten() and ravel()?
# There are 2 popular ways to implement flattening. That is using the flatten() method 
# and the other using the ravel() method.

# The difference between ravel and flatten is, the new array created using ravel is 
# actually a reference to the parent array. 
# So, any changes to the new array will affect the parent as well. 
# But is memory efficient since it does not create a copy.

In [5]:
# Flatten it to a 1d array
arr2.flatten()

array([ 1.,  2.,  3.,  4.,  3., -1., -1.,  6.,  5.,  6.,  7.,  8.])

In [6]:
# Changing the flattened array does not change parent
b1 = arr2.flatten()  
b1[0] = 105  # changing b1 does not affect arr2
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [139]:
b1

array([105.,   2.,   3.,   4.,   3.,  -1.,  -1.,   6.,   5.,   6.,   7.,
         8.])

In [7]:
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [141]:
arr2.ravel() 

array([ 1.,  2.,  3.,  4.,  3., -1., -1.,  6.,  5.,  6.,  7.,  8.])

In [142]:
# Changing the raveled array changes the parent also.
b2 = arr2.ravel()  
b2[0] = 101  # changing b2 changes arr2 also
arr2


array([[101.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [None]:
# create sequences, repetitions 
# Lower limit is 0 be default
print(np.arange(5))  

In [None]:
# 0 to 9
print(np.arange(0, 10))  

In [143]:
# 0 to 9 with step of 2
print(np.arange(0, 10, 2))  

[0 2 4 6 8]


In [144]:
# 10 to 1, decreasing order
print(np.arange(10, 0, -1))

[10  9  8  7  6  5  4  3  2  1]


In [145]:
# create repeating sequences?
# np.tile will repeat a whole list or array n times. 
# Whereas, np.repeat repeats each item n times.

a = [1,2,3] 

# Repeat whole of 'a' two times
print('Tile:   ', np.tile(a, 4))

# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 4))

Tile:    [1 2 3 1 2 3 1 2 3 1 2 3]
Repeat:  [1 1 1 1 2 2 2 2 3 3 3 3]


In [None]:
# Arrays support vectorised operations, while lists don’t.
# Once an array is created, you cannot change its size. You will have to create a new array 
# or overwrite the existing one.
# Every array has one and only one dtype. All items in it should be of that dtype.
# An equivalent numpy array occupies much less space than a python list of lists.