# NumPy

NumPy is a Python package that allows the efficient handling on and operation on large arrays of numerical data. It's one of the most fundamental libraies for quantitative analysis.

In [1]:
import numpy as np

- NumPy Arrays  
- NumPy Indexing and Selection  
- NumPy Operations  
- NumPy Exercise (and Solution)

## Regular ndarray Object

In [2]:
# creating numpy array by casting a list
my_list = [1, 2, 3]
np.array(my_list)

array([1, 2, 3])

In [3]:
type(np.array(my_list))

numpy.ndarray

In [4]:
# creating a numpy array with the build in range function of python
a = np.array(range(24))

In [5]:
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [6]:
# creating a numpy array with the arange function of numpy
a1 = np.arange(0,5)
a1

array([0, 1, 2, 3, 4])

In [7]:
a2 = np.arange(1,11,2)
a2

array([1, 3, 5, 7, 9])

In [8]:
# creating arrays of zeros
np.zeros(3)

array([0., 0., 0.])

In [9]:
# same for two-dimensional arrays
# passing tuple to array function (index 1 rows, index two columns)
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [10]:
# creating arrays of ones
np.ones(4)

array([1., 1., 1., 1.])

In [11]:
# returning evenly spaced numbers over a specified interval
np.linspace(0, 10, 50)

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [12]:
# creating a matrix from a nested list
my_matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
np_matrix = np.array(my_matrix)

In [13]:
np_matrix # two dimensional matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [14]:
type(np_matrix)

numpy.ndarray

In [15]:
# numpy array as matrix
b = a.reshape((4, 6))

In [16]:
b

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [17]:
# creating an identiy matrix
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [18]:
# numpy's random library
# used to model something like monte carlo simulation
np.random.rand(1) # retunrs random float between zero and one

array([0.59864691])

In [19]:
np.random.rand(5,4) # returns 5 x 4 matrix with random numbers berween zero and one
# uniform distribution, every number has the same odds of being picked

array([[0.57249237, 0.73632973, 0.70270194, 0.68409431],
       [0.08353645, 0.46692956, 0.8053057 , 0.05051517],
       [0.26955109, 0.82259399, 0.61798157, 0.67704771],
       [0.5361833 , 0.66406849, 0.01453682, 0.5512905 ],
       [0.42244457, 0.99406694, 0.21111501, 0.18804817]])

In [20]:
np.random.randn(5) # returns sample from the standard_normal distribution

array([-1.56862902,  1.08850873, -0.21251314,  0.14499748, -0.3803092 ])

In [21]:
np.random.randn(5,5)

array([[ 2.58212528e-02,  1.38084148e-01, -5.90846416e-01,
        -1.22551768e+00, -1.47458960e-01],
       [ 2.60990367e+00,  7.39851684e-01, -2.51204969e+00,
        -2.06590554e+00, -8.70623659e-01],
       [-5.14552485e-01, -5.41388553e-01,  1.17323768e-03,
         1.45886331e+00,  2.26275453e-01],
       [ 2.47950428e+00, -2.20542383e-01,  1.07695422e+00,
        -4.25343714e-01,  4.66243144e-01],
       [-5.99472073e-01, -8.01059103e-01,  1.82146064e+00,
         7.28983650e-01, -1.76967810e-01]])

In [22]:
np.random.randint(1,100) # returns random integer from the "discrete uniform" distribution 

59

In [23]:
np.random.randint(1,100,10)

array([99, 79, 61, 28, 90, 64,  4,  6, 84, 78])

In [24]:
# array attributes and methods that are useful
arr = np.arange(25)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [25]:
ranarr = np.random.randint(0,50,10)
ranarr

array([11, 39,  7, 15, 47, 23, 49,  8, 12, 40])

In [26]:
# return an array with the same data but a different shape
arr.reshape(5,5) # reshape array to 5 by 5 matrix

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [27]:
arr.shape # returns shape of one dimensional array

(25,)

In [28]:
arr.reshape(5,5).shape # shape of 5 by 5 matrix

(5, 5)

In [29]:
arr.dtype # data type of the array

dtype('int64')

In [30]:
ranarr.max() # max number in an array

49

In [31]:
ranarr.argmax() # index location of max number in an array

6

In [32]:
ranarr.min() # min number in an array

7

In [33]:
ranarr.argmin() # index location of min number in an array

2

## NumPy Operations

In [34]:
arr = np.arange(0,10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
# array addition on element by element basis
arr + arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [36]:
# array multiplication on element by element basis
arr * arr

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [37]:
# substraction
arr - arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [38]:
# division
arr / arr # returns warning if 0 is in array
# 0/0 gets replaced with nan



array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [39]:
1/arr # 1/0 gets replaced by infinity

  1/arr # 1/0 gets replaced by infinity


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111])

In [40]:
# exponentials
arr ** 3

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [41]:
# numpy comes along with array functions
np.sqrt(arr) # square root of every element

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [42]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [43]:
# what's the maximum value in an array
np.max(arr)

9

In [44]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [45]:
np.log(arr)

  np.log(arr)


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])

## NumPy for Indexing and Selection

In [50]:
arr = np.arange(0,11)

In [54]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [51]:
# bracket indexing and selection
# element at index 8
arr[8]

8

In [52]:
# slice notation
arr[1:5]

array([1, 2, 3, 4])

In [53]:
# everything up to a certain position but not including it
arr[:5]

array([0, 1, 2, 3, 4])

In [57]:
# everything from a certain position up to the end
arr[3:]

array([ 3,  4,  5,  6,  7,  8,  9, 10])