Performance comparison between NumPy array and Python list

In [1]:
import numpy as np

In [2]:
my_arr = np.arange(1000000)
my_list = list(range(1000000))

In [3]:
%time for _ in range(10): my_arr2 = my_arr*2

Wall time: 29 ms


In [4]:
%time for _ in range(10): my_list2 = [x*2 for x in my_list]

Wall time: 1.1 s


 NumPy  is  its  N-dimensional  array  object,  or  ndarray

In [5]:
import numpy as np

In [6]:
# generate some random data
data = np.random.randn(2,3)
print(data)

[[-0.47256536 -0.05930089 -2.89951521]
 [-0.49096469  1.3384599   1.0536932 ]]


In [7]:
print(data*10)

[[ -4.72565357  -0.5930089  -28.99515206]
 [ -4.90964689  13.38459899  10.53693196]]


In [8]:
print(data*data)

[[2.23318017e-01 3.51659556e-03 8.40718843e+00]
 [2.41046326e-01 1.79147490e+00 1.11026935e+00]]


In [9]:
print(data+data)

[[-0.94513071 -0.11860178 -5.79903041]
 [-0.98192938  2.6769198   2.10738639]]


In [10]:
print(data.shape)

(2, 3)


In [11]:
print(data.dtype)

float64


## Creating ndarrays

In [12]:
# convert list to array
data1 = [6, 7.5 ,8, 0, 1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [13]:
# convert list to multidimensional array
data2 = [[1, 2, 3, 4],[5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [14]:
print(arr2.ndim)
print(arr2.shape)

2
(2, 4)


In [15]:
print(arr1.dtype)
print(arr2.dtype)

float64
int32


In [16]:
# create arrays of 0s or 1s, just pass a tuple for the shape
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [17]:
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [18]:
# return the values near 0
np.empty((2,3,2))

array([[[1.08982428e-311, 3.16202013e-322],
        [0.00000000e+000, 0.00000000e+000],
        [1.11260619e-306, 2.03874325e+184]],

       [[2.44550344e-056, 2.83244153e-032],
        [2.04082124e+184, 4.31583406e-061],
        [2.00561260e-076, 2.27326546e+184]]])

> It’s  not  safe  to  assume  that  np.empty   will  return  an  array  of  all zeros. In some cases, it may return uninitialized “garbage” values.

In [19]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### Array creation functions
![](./figs/4-1.jpg)

In [20]:
# Data Types for ndarrays
arr1 = np.array([1,2,3], dtype = np.float64)
arr1.dtype

dtype('float64')

In [21]:
arr2 = np.array([1,2,3], dtype=np.int32)
arr2.dtype

dtype('int32')

![](./figs/4-2.jpg)

In [22]:
# convert or cast an array from one dtype to another 
arr = np.array([1,2,3,4,5])
arr.dtype

dtype('int32')

In [23]:
float_arr = arr.astype(np.float64)
float_arr.dtype

dtype('float64')

In [24]:
# the devimal part will be deleted if integers were cast to floating point.
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [25]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10])

In [26]:
# convert string to numeric form
numeric_strings = np.array(["1.25", "-9.6", "42"], dtype=np.string_)
numeric_strings.astype(float) # float will be replaced as np.float by Numpy

array([ 1.25, -9.6 , 42.  ])

In [27]:
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype) # other variable's dype as input

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [28]:
# shorthand type code
empty_unit32 = np.empty(8, dtype="u4")
empty_unit32

array([         0, 1075314688,          0, 1075707904,          0,
       1075838976,          0, 1072693248], dtype=uint32)

> Calling  astype  always creates a new array (a copy of the data), even
if the new dtype is the same as the old dtype.

## Arithmetic with NumPy Arrays

In [29]:
# Any arithmetic operations between equal-size arrays applies the operation element-wise:
arr = np.array([[1., 2., 3.],[4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [30]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [31]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [32]:
# Arithmetic operations with scalars propagate the scalar argument to each element in the array:
1/arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [33]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [34]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [35]:
arr2 > arr1

array([[False,  True, False],
       [ True, False,  True]])

Operations  between  differently  sized  arrays  is  called  broadcasting

## Basic Indexing and Slicing

In [36]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [37]:
arr[5]

5

In [38]:
arr[5:8]

array([5, 6, 7])

In [39]:
arr[::1]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
arr[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

**Any modifications to the sliced array will be reflected in the source array:**

In [41]:
print(arr)
arr_slice = arr[5:8]
print(arr_slice)

[0 1 2 3 4 5 6 7 8 9]
[5 6 7]


In [42]:
arr_slice[:] = 100
print(arr)
print(arr_slice)

[  0   1   2   3   4 100 100 100   8   9]
[100 100 100]


In [43]:
# method for copying the array
arr_slice = arr[5:8].copy()
print(arr_slice)
arr_slice[1]=99
print(arr)
print(arr_slice)

[100 100 100]
[  0   1   2   3   4 100 100 100   8   9]
[100  99 100]


In [44]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [45]:
print(arr2d[0][2])
print(arr2d[0,2])

3
3


### Indexing with slices

In [46]:
arr

array([  0,   1,   2,   3,   4, 100, 100, 100,   8,   9])

In [47]:
arr[1:5]

array([1, 2, 3, 4])

In [48]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [49]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [50]:
arr2d[:2,:1]

array([[1],
       [4]])

In [51]:
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [52]:
arr2d[:2, 1:] = 0
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

## Boolean Indexing

In [53]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [54]:
# numpy.random.randn to generate some random normally distributed data
data = np.random.randn(7, 4)
data

array([[ 0.00587238,  1.12492538,  1.07716733, -0.01927844],
       [-0.86230611, -1.29481779,  0.01583385,  0.70906439],
       [-1.2326439 ,  1.09324126,  1.37540383, -0.15249996],
       [-1.39506145,  0.03826526, -1.61897596, -1.03536667],
       [-1.28247405, -1.7659634 , -0.0405346 ,  1.3453667 ],
       [-0.4719613 , -0.07448135, -1.34848659,  0.97344101],
       [ 0.1679674 ,  0.2129106 ,  1.98464854, -0.36216332]])

In [55]:
names == "Bob"

array([ True, False, False,  True, False, False, False])

In [56]:
data[names=="Bob"]

array([[ 0.00587238,  1.12492538,  1.07716733, -0.01927844],
       [-1.39506145,  0.03826526, -1.61897596, -1.03536667]])

In [57]:
data[names == 'Bob', 2:]

array([[ 1.07716733, -0.01927844],
       [-1.61897596, -1.03536667]])

In [58]:
data[names == 'Bob', 3]

array([-0.01927844, -1.03536667])

In [59]:
# selecte everything but 'Bob'
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [60]:
# The  ~  operator can be useful when you want to invert a general condition:
data[~(names=="Bob")] 

array([[-0.86230611, -1.29481779,  0.01583385,  0.70906439],
       [-1.2326439 ,  1.09324126,  1.37540383, -0.15249996],
       [-1.28247405, -1.7659634 , -0.0405346 ,  1.3453667 ],
       [-0.4719613 , -0.07448135, -1.34848659,  0.97344101],
       [ 0.1679674 ,  0.2129106 ,  1.98464854, -0.36216332]])

In [61]:
# &(and) and |(or)
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [62]:
data[mask]

array([[ 0.00587238,  1.12492538,  1.07716733, -0.01927844],
       [-1.2326439 ,  1.09324126,  1.37540383, -0.15249996],
       [-1.39506145,  0.03826526, -1.61897596, -1.03536667],
       [-1.28247405, -1.7659634 , -0.0405346 ,  1.3453667 ]])

In [63]:
# to set the negative values in data to 0
data[data<0] = 0
data

array([[0.00587238, 1.12492538, 1.07716733, 0.        ],
       [0.        , 0.        , 0.01583385, 0.70906439],
       [0.        , 1.09324126, 1.37540383, 0.        ],
       [0.        , 0.03826526, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 1.3453667 ],
       [0.        , 0.        , 0.        , 0.97344101],
       [0.1679674 , 0.2129106 , 1.98464854, 0.        ]])

In [64]:
# to set whole rows or columns using a one-dimensional boolean array
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.        , 0.01583385, 0.70906439],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.        , 0.        , 0.97344101],
       [0.1679674 , 0.2129106 , 1.98464854, 0.        ]])

## Fancy Indexing

In [65]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i]=i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [66]:
# to select out a subset of the rows in a particular order, you can simply pass a list
# or ndarray of integers specifying the desired order:
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [67]:
# Using negative indices selects rows from the end:
arr[[-3,-5,-7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [68]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [69]:
# Passing  multiple  index  arrays
arr[[1, 5, 7, 2], [0, 3, 1, 2]] #(1, 0), (5, 3), (7, 1) , (2, 2) 

array([ 4, 23, 29, 10])

## Transposing Arrays and Swapping Axes

In [70]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [71]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [72]:
arr.transpose()

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [73]:
# inner matrix product using np.dot
arr = np.random.randn(6,3)
arr

array([[-0.60753121,  1.44044756, -0.4663876 ],
       [ 1.47289532,  0.22328343, -1.25358394],
       [ 0.63393183,  0.76519295, -0.88349538],
       [ 0.79578683,  0.0268543 , -3.02703283],
       [ 0.01072565, -1.09093997,  0.52903828],
       [-0.95145645, -2.56563739,  0.91972994]])

In [74]:
np.dot(arr.T, arr)

array([[ 4.47904545,  2.38959793, -5.40141032],
       [ 2.38959793, 10.48363129, -4.64588722],
       [-5.40141032, -4.64588722, 12.85826661]])

In [75]:
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [76]:
'''
For higher dimensional arrays,  transpose  will accept a tuple of axis numbers to per‐
mute the axes
''' 
arr.transpose((1, 0, 2)) # the first axis second, the first axis second

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [77]:
# swapaxes
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [78]:
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])