# Data Science Basics - Numpy

In [1]:
import numpy as np

## The Basics

In [2]:
a = np.array([1, 2, 3])
a

array([1, 2, 3])

In [3]:
b = np.array([[1, 2, 3], [4, 5, 6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [4]:
# Get dimension
a.ndim, b.ndim

(1, 2)

In [5]:
# Get Shape
a.shape, b.shape

((3,), (2, 3))

In [6]:
# Get Type
a.dtype, b.dtype

(dtype('int64'), dtype('int64'))

In [7]:
# Get Size
a.itemsize, b.itemsize

(8, 8)

In [8]:
# Get Total Size
a.size, b.size

(3, 6)

In [9]:
# Get No. Of Bytes
a.nbytes, b.nbytes

(24, 48)

## Accessing/Changing specific elements, rows, columns, etc

In [10]:
c = np.array([[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14]])
c

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14]])

In [11]:
c.shape

(2, 7)

In [12]:
# Get a specific element [r, c]
c[1, 5]

13

In [13]:
# Using negative index
c[1, -2]

13

In [14]:
# Get a specific row
c[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [15]:
# Get a specific column
c[:, 0]

array([1, 8])

In [16]:
# More fancy way of getting the elements [rowNumber, startIndex:endIndex:stepSize]
c[0, 1:6:2]

array([2, 4, 6])

In [17]:
# Using negative index
c[0, 1:-1:2]

array([2, 4, 6])

In [18]:
# Changing the value of a element
c[1, 5] = 20
c

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 20, 14]])

In [19]:
# Changing the value of series of elements. Same value.
c[:, 5] = 20
c

array([[ 1,  2,  3,  4,  5, 20,  7],
       [ 8,  9, 10, 11, 12, 20, 14]])

In [20]:
# Changing the value of series of elements. Different values. Make sure the structure is same.
c[:, 5] = [30, 40]
c

array([[ 1,  2,  3,  4,  5, 30,  7],
       [ 8,  9, 10, 11, 12, 40, 14]])

In [21]:
## 3D - Example

In [22]:
d = np.array(
    [
        [
            [1, 2, 3, 4, 5, 6, 7], 
            [8, 9, 10, 11, 12, 13, 14]
        ], 
        [
            [15, 16, 17, 18, 19, 20, 21], 
            [22, 23, 24, 25, 26, 27, 28]
        ]
    ]
)
d

array([[[ 1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14]],

       [[15, 16, 17, 18, 19, 20, 21],
        [22, 23, 24, 25, 26, 27, 28]]])

In [23]:
# Get specific element, suggestion is to work outside in
d[0, 1, 4]

12

In [24]:
# Fancey way
d[:, :, 4]

array([[ 5, 12],
       [19, 26]])

In [25]:
# Changing the value of series of elements. Different values. Make sure the structure is same.
d[:, :, 4] = [[50, 50], [60, 60]]
d

array([[[ 1,  2,  3,  4, 50,  6,  7],
        [ 8,  9, 10, 11, 50, 13, 14]],

       [[15, 16, 17, 18, 60, 20, 21],
        [22, 23, 24, 25, 60, 27, 28]]])

## Initializing Different Types of array

In [26]:
# All Zeroes Matrix
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [27]:
# Different shape. 2D
np.zeros([2, 3])

array([[0., 0., 0.],
       [0., 0., 0.]])

In [28]:
# Different shape. 3D
np.zeros([2, 3, 3])

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [29]:
# Different shape. 4D
np.zeros([2, 3, 3, 3])

array([[[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]],


       [[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]]])

In [30]:
# All 1s matrix
np.ones(5)

array([1., 1., 1., 1., 1.])

In [31]:
# Any other number. Takes dimension and then value
np.full((2, 2), 100)


array([[100, 100],
       [100, 100]])

In [32]:
# Also, can use full_like to copy the dimension of the existing array. 
# Only the dimension is copied. Value needs to be provided
np.full_like(b, 4)

array([[4, 4, 4],
       [4, 4, 4]])

In [33]:
# Random decimal numbers
np.random.rand(4, 2, 3)

array([[[0.64705928, 0.32770452, 0.47701602],
        [0.49249965, 0.99594462, 0.64408995]],

       [[0.7441447 , 0.0668687 , 0.2593563 ],
        [0.84820673, 0.65838446, 0.44060854]],

       [[0.94971035, 0.91632397, 0.48123764],
        [0.1313747 , 0.05697527, 0.99993866]],

       [[0.91741478, 0.71103488, 0.51647809],
        [0.59393854, 0.99100477, 0.81744173]]])

In [34]:
# Random decimal numbers with shape
np.random.random_sample(b.shape)

array([[0.15396701, 0.86368234, 0.3989804 ],
       [0.44296207, 0.18093986, 0.24813604]])

In [35]:
# Random integer numbers
np.random.randint(100, size=(3, 3))

array([[12, 75, 83],
       [54, 97, 63],
       [97, 99, 79]])

In [36]:
# Identity matrix. By nature is a square matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [37]:
# Repeating an array. takes array and no of times to repeat.
np.repeat(b, 3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6])

In [38]:
# Also can provide an axis to repeat on.
np.repeat(b, 3, axis=1)

array([[1, 1, 1, 2, 2, 2, 3, 3, 3],
       [4, 4, 4, 5, 5, 5, 6, 6, 6]])

## Array manipulation

In [39]:
output = np.ones((5, 5))
output

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [40]:
z = np.zeros((3, 3))
z[1, 1] = 9
z

array([[0., 0., 0.],
       [0., 9., 0.],
       [0., 0., 0.]])

In [41]:
output[1:4, 1:4] = z
output

array([[1., 1., 1., 1., 1.],
       [1., 0., 0., 0., 1.],
       [1., 0., 9., 0., 1.],
       [1., 0., 0., 0., 1.],
       [1., 1., 1., 1., 1.]])

In [42]:
output[1:-1, 1:-1] = z
output

array([[1., 1., 1., 1., 1.],
       [1., 0., 0., 0., 1.],
       [1., 0., 9., 0., 1.],
       [1., 0., 0., 0., 1.],
       [1., 1., 1., 1., 1.]])

## Be careful when copying an array. 

In [43]:
# Here b=a represents that "b" points to "a". Any change in "b" will also chamge the "a"
b = a

In [44]:
# To avoid it use "copy"
b = a.copy()

## Mathematics

In [45]:
e = np.array([1, 2, 3, 4])
e

array([1, 2, 3, 4])

In [46]:
# Add
e + 2

array([3, 4, 5, 6])

In [47]:
# Substract
e - 2

array([-1,  0,  1,  2])

In [48]:
# Multiply
e * 2

array([2, 4, 6, 8])

In [49]:
# Divide
e / 2

array([0.5, 1. , 1.5, 2. ])

In [50]:
# Power
e ** 2

array([ 1,  4,  9, 16])

In [51]:
# Take the sin
np.sin(e)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [52]:
# Take the cos
np.cos(e)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

In [53]:
# Using another array
f = np.array([10, 11, 12, 13])
f

array([10, 11, 12, 13])

In [54]:
e + f

array([11, 13, 15, 17])

In [55]:
f - e

array([9, 9, 9, 9])

In [56]:
e * f

array([10, 22, 36, 52])

In [57]:
e / f

array([0.1       , 0.18181818, 0.25      , 0.30769231])

## Linear Algebra

In [58]:
g = np.ones((2, 3))
g

array([[1., 1., 1.],
       [1., 1., 1.]])

In [59]:
h = np.full((3, 2), 2)
h

array([[2, 2],
       [2, 2],
       [2, 2]])

In [60]:
# Multiplication
np.matmul(g, h)

array([[6., 6.],
       [6., 6.]])

In [61]:
# Find determinant
i = np.identity(3)
np.linalg.det(i)

1.0

In [62]:
## Try other stuff like - 
# Determinant
# Trace
# Singular Vector Decomposition
# Eigenvalues
# Matrix Norm
# Inverst
# Etc..

## Statistics

In [63]:
stats = np.array([[1, 2, 3], [4, 5, 6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [64]:
# Minimumn
np.min(stats)

1

In [65]:
# Maximum
np.max(stats)

6

In [66]:
# Minimumn based onf row. Use axis.
np.min(stats, axis=1)

array([1, 4])

In [67]:
# Maximun based on row. Use axis.
np.max(stats, axis=1)

array([3, 6])

In [68]:
# Sum
np.sum(stats, axis=1)

array([ 6, 15])

In [69]:
# Mean
np.mean(stats)

3.5

# Reorganizing Arrays

In [70]:
before = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
before

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [71]:
after = before.reshape(4, 2)
after

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [72]:
after = before.reshape(2, 2, 2)
after

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [73]:
# Veritcally stacking vectors
v1 = np.array([1, 2, 3, 4])
v2 = np.array([5, 6, 6, 8])
v1, v2

(array([1, 2, 3, 4]), array([5, 6, 6, 8]))

In [74]:
np.vstack([v1, v2])

array([[1, 2, 3, 4],
       [5, 6, 6, 8]])

In [75]:
np.vstack([v1, v2, v1, v2])

array([[1, 2, 3, 4],
       [5, 6, 6, 8],
       [1, 2, 3, 4],
       [5, 6, 6, 8]])

In [76]:
# Horizontal stack
np.hstack([v1, v2])

array([1, 2, 3, 4, 5, 6, 6, 8])

In [77]:
np.hstack([v1, v2, v1, v2])

array([1, 2, 3, 4, 5, 6, 6, 8, 1, 2, 3, 4, 5, 6, 6, 8])

## Miscellaneous

In [78]:
# Load data from file
filedata = np.genfromtxt("data/numpy-example-data", delimiter=",")
print(filedata)

# In different Type
print(filedata.astype("int32")) # Creates a copy and not modifies the existing

[[ 1.  2.  3.  4. 50.  6.  7.  8.  9.]
 [ 1.  2.  3.  4.  5. 60.  7.  8.  9.]
 [ 1.  2.  3.  4.  5.  6. 70.  8.  9.]]
[[ 1  2  3  4 50  6  7  8  9]
 [ 1  2  3  4  5 60  7  8  9]
 [ 1  2  3  4  5  6 70  8  9]]


## Masking and Advanced Indexing

In [79]:
# Boolean Masking
filedata > 50

array([[False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False, False, False],
       [False, False, False, False, False, False,  True, False, False]])

In [80]:
filedata < 50

array([[ True,  True,  True,  True, False,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True, False,  True,  True]])

In [81]:
# Find all
np.all(filedata > 50, axis=0)

array([False, False, False, False, False, False, False, False, False])

In [82]:
# Find any
np.any(filedata > 50, axis=0)

array([False, False, False, False, False,  True,  True, False, False])

In [83]:
# Find data in-between
((filedata > 50) & (filedata < 100))

array([[False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False, False, False],
       [False, False, False, False, False, False,  True, False, False]])

In [84]:
# Find reverse
(~(filedata > 50) & (filedata < 100))

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True, False,  True,  True]])

In [85]:
# Indexing
filedata[[1,2]]

array([[ 1.,  2.,  3.,  4.,  5., 60.,  7.,  8.,  9.],
       [ 1.,  2.,  3.,  4.,  5.,  6., 70.,  8.,  9.]])

In [86]:
# Indexing in a range
filedata[0:1,0:2]

array([[1., 2.]])

In [87]:
# Indexing specifics
filedata[[0,1],[0,2]]

array([1., 3.])