# <b>Python for Data Analysis</b>
# 4.  NumPy Basics: Arrays and Vectorized Computation

In [1]:
import numpy as np
import pandas as pd
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels as sm

In [3]:
my_arr = np.arange(1_000_000)

In [4]:
my_list = list(range(1_000_000))

In [5]:
%timeit my_arr2 = my_arr * 2

1.91 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%timeit my_list2 = [x * 2 for x in my_list]

84.4 ms ± 5.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 4.1 The NumPy ndarray: A Multidimensional Array Object

In [7]:
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [8]:
data * 10

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [9]:
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [10]:
data.shape

(2, 3)

In [11]:
data.dtype

dtype('float64')

### Creating ndarrays

In [12]:
data1 = [6, 7.5, 8, 0, 1]

In [14]:
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [15]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [16]:
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [17]:
arr2.ndim

2

In [18]:
arr2.shape

(2, 4)

In [19]:
arr1.dtype

dtype('float64')

In [20]:
arr2.dtype

dtype('int32')

In [26]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [29]:
np.zeros((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [30]:
np.empty((2, 3, 2))

array([[[1.1697e-311, 3.1620e-322],
        [0.0000e+000, 0.0000e+000],
        [1.8915e-307, 2.4724e-056]],

       [[2.2529e+184, 2.2156e-056],
        [4.4280e-062, 1.2154e-046],
        [8.4695e+165, 5.1062e-066]]])

In [31]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [32]:
arrTest = np.arange(15)
arrTest.dtype

dtype('int32')

In [36]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [39]:
np.ones_like(arr2)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

In [40]:
np.empty_like(arr2)

array([[       1,        1,        0,      551],
       [     864,        0, 16777984,    32762]])

In [45]:
np.full_like(arr2, 6.322)

array([[6, 6, 6, 6],
       [6, 6, 6, 6]])

In [45]:
np.full_like(arr2, 6.322)

array([[6, 6, 6, 6],
       [6, 6, 6, 6]])

### Data Types for ndarrays

In [46]:
arr1 = np.array([1, 2, 3], dtype = np.float64)

In [47]:
arr2 = np.array([1, 2, 3], dtype = np.int32)

In [48]:
arr1.dtype

dtype('float64')

In [49]:
arr2.dtype

dtype('int32')

In [50]:
arr = np.array([1, 2, 3, 4, 5])

In [51]:
arr.dtype

dtype('int32')

In [52]:
float_arr = arr.astype(np.float64)

In [53]:
float_arr

array([1., 2., 3., 4., 5.])

In [54]:
float_arr.dtype

dtype('float64')

In [56]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [57]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10])

In [58]:
numeric_strings = np.array(["1.25", "-9.6", "42"], dtype = np.string_)

In [59]:
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

In [60]:
int_array = np.arange(10)

In [61]:
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype = np.float64)

In [62]:
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [63]:
zeros_uint32 = np.zeros(8, dtype = "u4")

In [66]:
zeros_uint32

array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint32)

### Arithmetic with NumPy Arrays

In [68]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [69]:
arr.dtype

dtype('float64')

In [70]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [71]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [72]:
1 / arr

array([[1.    , 0.5   , 0.3333],
       [0.25  , 0.2   , 0.1667]])

In [73]:
arr ** 2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [74]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [75]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

In [76]:
arr2 == arr

array([[False, False, False],
       [False, False, False]])

In [79]:
arr2 != arr

array([[ True,  True,  True],
       [ True,  True,  True]])

### Basic Indexing and Slicing

In [81]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [82]:
arr[5]

5

In [83]:
arr[5:8]

array([5, 6, 7])

In [84]:
arr[5:8] = 12

In [85]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [None]:
To give an example of this, I first create a slice of arr: