# Numpy Basic #

**Based on book Python for Data Science Handbook (2017)**

Importing Library

In [1]:
#import library
import numpy as np

Creating array

In [5]:
#creating arrays from python list
l1 = np.array([1, 4, 2, 5, 3])
l2 = np.array([3.14, 4, 2, 3])
l3 = np.array([1, 2, 3, 4], dtype='float32')
l4 = np.array([range(i, i + 3) for i in [2, 4, 6]])

print(l1)
print(l2)
print(l3)
print(l4)

[1 4 2 5 3]
[3.14 4.   2.   3.  ]
[1. 2. 3. 4.]
[[2 3 4]
 [4 5 6]
 [6 7 8]]


In [6]:
#creating array from scratch
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
# Create a 3x5 floating-point array filled with 1s
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
# Create a 3x5 array filled with 3.14
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [9]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [10]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [21]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
np.random.random((3, 3))


array([[0.96025751, 0.24122147, 0.72610546],
       [0.67407878, 0.69978947, 0.80972423],
       [0.6373618 , 0.40625832, 0.8850862 ]])

In [36]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))


array([[ 1.00431291, -1.64251991,  1.94772169],
       [-0.07064895, -0.8475787 ,  1.14814977],
       [-0.27555202,  1.63267679,  0.49414708]])

In [116]:
# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 2, (10, 10))


array([[1, 0, 0, 0, 0, 1, 0, 1, 0, 1],
       [1, 1, 0, 1, 0, 1, 0, 1, 0, 1],
       [0, 1, 1, 0, 0, 1, 0, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 1, 1, 1, 0],
       [1, 1, 1, 0, 1, 1, 1, 1, 0, 0],
       [1, 0, 1, 0, 1, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
       [1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
       [1, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [1, 0, 1, 1, 0, 0, 1, 1, 1, 0]])

In [118]:
# Create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [119]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that
# memory location
np.empty(3)

array([1., 1., 1.])

Array attribute

In [122]:
np.random.seed(0) # seed for reproducibility
x1 = np.random.randint(10, size=6) # One-dimensional array
x2 = np.random.randint(10, size=(3, 4)) # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5)) # Three-dimensional arraynp.random.seed(0) # seed for reproducibility

print("x3 ndim: ", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size: ", x3.size)
print("dtype:", x3.dtype)
print("itemsize:", x3.itemsize, "bytes")
print("nbytes:", x3.nbytes, "bytes")



x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60
dtype: int32
itemsize: 4 bytes
nbytes: 240 bytes


Array Indexing

In [123]:
x = np.arange(10)

In [124]:
x[:5] # first five elements

array([0, 1, 2, 3, 4])

In [125]:
x[5:] # elements after index 5

array([5, 6, 7, 8, 9])

In [126]:
x[4:7] # middle subarray

array([4, 5, 6])

In [127]:
x[::2] # every other element

array([0, 2, 4, 6, 8])

In [128]:
x[1::2] 

array([1, 3, 5, 7, 9])

In [129]:
x[::-1] # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [130]:
x[5::-2] # reversed every other from index 5

array([5, 3, 1])

In [131]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

Array Slicing

In [132]:
x2[:2, :3]

array([[3, 5, 2],
       [7, 6, 8]])

In [133]:
x2[:3, ::2] 

array([[3, 2],
       [7, 8],
       [1, 7]])

In [134]:
x2[::-1, ::-1]

array([[7, 7, 6, 1],
       [8, 8, 6, 7],
       [4, 2, 5, 3]])

In [135]:
print(x2[:, 0])

[3 7 1]


Array Concatenation

In [137]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = [99, 99, 99]

print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [138]:
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])

# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [139]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [140]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])
# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [142]:
# horizontally stack the arrays
y = np.array([[99],
               [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

Reshaping Array

In [143]:
grid = np.arange(1, 10).reshape((3, 3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [144]:
x = np.array([1, 2, 3])
# row vector via reshape
x.reshape((1, 3))

array([[1, 2, 3]])

In [145]:
# row vector via newaxis
x[np.newaxis, :]

array([[1, 2, 3]])

In [147]:
# column vector via reshape
x.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [148]:
# column vector via newaxis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

Splitting of arrays

In [150]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [151]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [153]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [154]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


The Slowness of Loops

In [155]:
np.random.seed(0)
def compute_reciprocals(values):
   output = np.empty(len(values))
   for i in range(len(values)):
     output[i] = 1.0 / values[i]
   return output

values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [156]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array)

212 ms ± 3.37 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [158]:
print(compute_reciprocals(values))
print(1.0 / values)
%timeit (1.0 / big_array)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]
3.71 ms ± 966 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [159]:
np.arange(5) / np.arange(1, 6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [160]:
x = np.arange(9).reshape((3, 3))
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]], dtype=int32)

Array Arithmetic

In [161]:
x = np.arange(4)
print("x =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 2) # floor division

x = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]


In [162]:
print("-x = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2 = ", x % 2)


-x =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2 =  [0 1 0 1]


In [163]:
-(0.5*x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [166]:
#using numpy function
np.add(x, 2)
np.subtract(x, 2)
np.negative(x)
np.multiply(x, 2)
np.divide(x, 2)
np.floor_divide(x, 2)
np.power(x, 2)
np.mod(x, 2)


array([0, 1, 0, 1], dtype=int32)

In [167]:
x = np.array([-2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

In [168]:
np.absolute(x)

array([2, 1, 0, 1, 2])

In [169]:
np.abs(x)

array([2, 1, 0, 1, 2])

In [170]:
x = np.array([3 - 4j, 4 - 3j, 2 + 0j, 0 + 1j])
np.abs(x)

array([5., 5., 2., 1.])

Trigonometric Function

In [171]:
theta = np.linspace(0, np.pi, 3)
print("theta = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta =  [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [172]:
x = [-1, 0, 1]
print("x = ", x)
print("arcsin(x) = ", np.arcsin(x))
print("arccos(x) = ", np.arccos(x))
print("arctan(x) = ", np.arctan(x))

x =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [3.14159265 1.57079633 0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


In [173]:
x = [1, 2, 3]
print("x =", x)
print("e^x =", np.exp(x))
print("2^x =", np.exp2(x))
print("3^x =", np.power(3, x))

x = [1, 2, 3]
e^x = [ 2.71828183  7.3890561  20.08553692]
2^x = [2. 4. 8.]
3^x = [ 3  9 27]


In [174]:
x = [1, 2, 4, 10]
print("x =", x)
print("ln(x) =", np.log(x))
print("log2(x) =", np.log2(x))
print("log10(x) =", np.log10(x))

x = [1, 2, 4, 10]
ln(x) = [0.         0.69314718 1.38629436 2.30258509]
log2(x) = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


In [175]:
x = [0, 0.001, 0.01, 0.1]
print("exp(x) - 1 =", np.expm1(x))
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [0.         0.0010005  0.01005017 0.10517092]
log(1 + x) = [0.         0.0009995  0.00995033 0.09531018]


Special Function

In [176]:
from scipy import special

# Gamma functions (generalized factorials) and related functions
x = [1, 5, 10]
print("gamma(x) =", special.gamma(x))
print("ln|gamma(x)| =", special.gammaln(x))
print("beta(x, 2) =", special.beta(x, 2))

gamma(x) = [1.0000e+00 2.4000e+01 3.6288e+05]
ln|gamma(x)| = [ 0.          3.17805383 12.80182748]
beta(x, 2) = [0.5        0.03333333 0.00909091]


In [177]:
# Error function (integral of Gaussian)
# its complement, and its inverse
x = np.array([0, 0.3, 0.7, 1.0])
print("erf(x) =", special.erf(x))
print("erfc(x) =", special.erfc(x))
print("erfinv(x) =", special.erfinv(x))

erf(x) = [0.         0.32862676 0.67780119 0.84270079]
erfc(x) = [1.         0.67137324 0.32219881 0.15729921]
erfinv(x) = [0.         0.27246271 0.73286908        inf]


Advanced Function

In [178]:
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)


[ 0. 10. 20. 30. 40.]


In [179]:
y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)


[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


Aggregates

In [180]:
x = np.arange(1, 6)
np.add.reduce(x)

15

In [181]:
np.multiply.reduce(x)

120

In [182]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

In [183]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])

In [184]:
x = np.arange(1, 6)
np.multiply.outer(x, x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

Summing the Values in an Array

In [185]:
L = np.random.random(100)
sum(L)


50.461758453195614

In [186]:
np.sum(L)

50.46175845319564

In [187]:
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

71.6 ms ± 1.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.06 ms ± 31.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


Minimum and Maximum

In [188]:
min(big_array), max(big_array)

(7.071203171893359e-07, 0.9999997207656334)

In [189]:
np.min(big_array), np.max(big_array)

(7.071203171893359e-07, 0.9999997207656334)

In [190]:
%timeit min(big_array)
%timeit np.min(big_array)

48.1 ms ± 4.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
524 µs ± 42.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


Multidimensional aggregates

In [191]:
M = np.random.random((3, 4))
print(M)

[[0.79832448 0.44923861 0.95274259 0.03193135]
 [0.18441813 0.71417358 0.76371195 0.11957117]
 [0.37578601 0.11936151 0.37497044 0.22944653]]


In [192]:
M.sum()

5.1136763453287335

In [193]:
M.min(axis=0)

array([0.18441813, 0.11936151, 0.37497044, 0.03193135])

In [194]:
M.max(axis=1)

array([0.95274259, 0.76371195, 0.37578601])