# Numpy 

In [2]:
import numpy as np

data = np.random.randn(2, 3)
data

array([[ 0.00351044, -1.90470935,  0.64871039],
       [-0.87359687, -0.64563624, -1.83887091]])

In [3]:
data.shape

(2, 3)

In [4]:
data.dtype

dtype('float64')

## Creating ndarrays
#### array() - accepts any sequence-like object

In [7]:
data1 = [6, 7.5, 8, 0, 1]

In [9]:
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

### zeros(), ones(), and empty()
#### empty() - returns uninitialized values. sometimes with garbage values. 

In [10]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [11]:
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [15]:
np.empty((2,3,2))

array([[[0.00000000e+000, 1.72723382e-077],
        [2.96439388e-323, 0.00000000e+000],
        [9.76118064e-313, 8.60952352e-072]],

       [[7.78934220e-091, 9.60074698e-071],
        [1.55137760e+184, 1.88793803e+185],
        [3.99910963e+252, 8.34402697e-309]]])

## Data Types for ndarrays

In [17]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr1.dtype

dtype('float64')

In [19]:
arr1 = np.array([1, 2, 3], dtype=np.int32)
arr1.dtype

dtype('int32')

## astype() - convert or cast an array to another dtype (decimal part may be truncated)

In [25]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [26]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [27]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

## Arithmetic with NumPy Arrays

In [30]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [31]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [32]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [33]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [34]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [35]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [36]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

## Basic Indexing and Slicing

In [13]:
arr = np.zeros((3,2))
arr

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [14]:
old_values = arr.copy() # copy
arr[:] = 64 # change in place
arr

array([[64., 64.],
       [64., 64.],
       [64., 64.]])

In [16]:
old_values 

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

## Boolean array

In [26]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7,4)
display(names)
display(data)

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

array([[-0.41108684,  0.04692426,  0.58096158,  0.64005975],
       [-1.86535294,  1.29904136,  0.43344688, -0.52963736],
       [-1.81885553,  1.50768453, -0.19794714,  0.66365444],
       [ 0.21012722, -0.02647   ,  0.58826982, -0.32197377],
       [ 1.85121011, -0.39216293,  1.15000898,  0.70698845],
       [ 0.7969093 , -1.08083373,  0.13471007,  0.97238882],
       [-3.08070718, -1.62722331,  0.82346654, -0.62977595]])

### Suppose each name corresponds to a row in the `data` array, and we want to select all the rows with corresponding name 'Bob'

### Check the length of two arrays before selection, because boolean selection will not fail if the lenght is not correct

In [35]:
names.shape[0] == data.shape[0]

True

In [27]:
names == 'Bob' # !!

array([ True, False, False,  True, False, False, False])

In [28]:
data[names == 'Bob']

array([[-0.41108684,  0.04692426,  0.58096158,  0.64005975],
       [ 0.21012722, -0.02647   ,  0.58826982, -0.32197377]])

In [29]:
data[names == 'Bob', :2]

array([[-0.41108684,  0.04692426],
       [ 0.21012722, -0.02647   ]])

In [30]:
data[names == 'Bob', 3]

array([ 0.64005975, -0.32197377])

In [31]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [33]:
data[~(names == 'Bob')]

array([[-1.86535294,  1.29904136,  0.43344688, -0.52963736],
       [-1.81885553,  1.50768453, -0.19794714,  0.66365444],
       [ 1.85121011, -0.39216293,  1.15000898,  0.70698845],
       [ 0.7969093 , -1.08083373,  0.13471007,  0.97238882],
       [-3.08070718, -1.62722331,  0.82346654, -0.62977595]])

In [34]:
condition = names == 'Bob'
data[~condition]

array([[-1.86535294,  1.29904136,  0.43344688, -0.52963736],
       [-1.81885553,  1.50768453, -0.19794714,  0.66365444],
       [ 1.85121011, -0.39216293,  1.15000898,  0.70698845],
       [ 0.7969093 , -1.08083373,  0.13471007,  0.97238882],
       [-3.08070718, -1.62722331,  0.82346654, -0.62977595]])

In [37]:
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [38]:
data[mask]

array([[-0.41108684,  0.04692426,  0.58096158,  0.64005975],
       [-1.81885553,  1.50768453, -0.19794714,  0.66365444],
       [ 0.21012722, -0.02647   ,  0.58826982, -0.32197377],
       [ 1.85121011, -0.39216293,  1.15000898,  0.70698845]])

## Setting values with boolean arrays

In [41]:
data[data < 0] = 0 ## !!!!
data

array([[0.        , 0.04692426, 0.58096158, 0.64005975],
       [0.        , 1.29904136, 0.43344688, 0.        ],
       [0.        , 1.50768453, 0.        , 0.66365444],
       [0.21012722, 0.        , 0.58826982, 0.        ],
       [1.85121011, 0.        , 1.15000898, 0.70698845],
       [0.7969093 , 0.        , 0.13471007, 0.97238882],
       [0.        , 0.        , 0.82346654, 0.        ]])

In [42]:
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 1.29904136, 0.43344688, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.7969093 , 0.        , 0.13471007, 0.97238882],
       [0.        , 0.        , 0.82346654, 0.        ]])

## Fancy Indexing

In [48]:
arr = np.empty((8,4))
for i in range(arr.shape[0]):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

### To select out a subset of the rows in a particular order, you can simply pass a list or ndarray of integers specifying the desired order.

In [49]:
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [51]:
arr = np.arange(32).reshape(8,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

### Elements (1, 0), (5, 3), (7, 1), and (2, 2) were selected. Regardless of how many dimensions the array has, the result of fancy indexing is always one-dimensional.

In [52]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

### Fancy indexing, unlike slicing, always `copies` the data into a new array.

## Transposing Arrays and Swapping Axes