 NumPy and Pandas Basics (ML Relevant)

# NumPy Basics

Numpy library is used for numerical computations and data analysis. It provides support for large, multi-dimensional arrays and matrices , and is the foundation of most scientific computing in Python. Numpy arrays are similar to Python lists , but they are more efficient and provide additional functionality.

In [73]:
# Importing numpy library
import numpy as np

# Makes an array
myarr1 = np.array([2, 4, 6, 8])

# Makes array of zeros, 4 * 4 array
myarr2 = np.zeros((4, 4))

# Makes an array of ones, 2 * 4 - 2 rows, 4 columns
myarr3 = np.ones((2, 4))

# Makes an array of 6 evenly spaced numbers between 1 and 4.5, you can write num= 6 aswell
myarr4 = np.linspace(1, 4.5, 6)

# Makes an array from 1 to 10 with 2 steps each
myarr5 = np.arange(1, 10, 2)

# Array of 8 elements
myarr6 = np.arange(8)

# Makes an array of any number you wants, with the rows and no of columns provided
myarr9 = np.full((3, 4), 9)

# Makies an identity matrix of your desired length, same no of cols and rows
myarr10 = np.eye(5)

# Empty array of provided row and columns, not initialized with real data, garbage data
# You usually use it when you're going to fill the array yourself later.
myarr11 = np.empty((3, 4))

# Printing all arrays
print(myarr1)
print(myarr2)
print(myarr3)
print(myarr4)
print(myarr5)
print(myarr6)
print(myarr9)
print(myarr10)
print(myarr11)


[2 4 6 8]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[1.  1.7 2.4 3.1 3.8 4.5]
[1 3 5 7 9]
[0 1 2 3 4 5 6 7]
[[9 9 9 9]
 [9 9 9 9]
 [9 9 9 9]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[[4.4e-323 4.4e-323 4.4e-323 4.4e-323]
 [4.4e-323 4.4e-323 4.4e-323 4.4e-323]
 [4.4e-323 4.4e-323 4.4e-323 4.4e-323]]


In [136]:
# The endpoint = false will not include the last number -10 in the array

myarr= np.linspace(0, -10, num = 6, endpoint = False)
print(myarr)


[ 0.         -1.66666667 -3.33333333 -5.         -6.66666667 -8.33333333]


In [168]:
# Defining datatype while creating an array
myarr = np.array([2, 3, 6, 7, 9], dtype = np.float64)

# printing array
print(myarr)

# Taking another array of 12 elements and defining its datatype as int64
myArr = np.arange(12, dtype = np.int64)

# Printing array
print(myArr)

[2. 3. 6. 7. 9.]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


## 2 Dimensional Arrays

In [62]:
# Creating array
a = np.array([1,2,3,4,5,6]) # 1D array
b = np.array([[1,2,3] , [4,5,6]]) # 2D array

# Printing
print("1D ARRAY ", a.shape)
print("2D ARRAY ", b.shape)


1D ARRAY  (6,)
2D ARRAY  (2, 3)


In [59]:
# 2 dimmensional array, makes your array 2 dimensional

myarr7 = myarr1 [np.newaxis, :]
print(myarr7)
print(myarr7.shape)    # shows one block, and 4 columns
print()

myarr8 = myarr1 [:, np.newaxis]
print(myarr8)
print(myarr8.shape)    # shows 1 block and 4 rows, col 1 so no need to print


[[2 4 6 8]]
(1, 4)

[[2]
 [4]
 [6]
 [8]]
(4, 1)


## NumPy Properties

In [66]:
# Making an array
myarr = np.array([[1,2],[2,3],[3,4]])

# Properties
# Shows no of blocks if any, rows and columns
print("Shape of array: ", myarr.shape)

# Shows total number of dimensions , total []
print("Dimensions: ", myarr.ndim)

# Total elements inside
print("Size: ", myarr.size)

# Will count the length, the number of rows
print("Length: ", len(myarr))


Shape of array:  (3, 2)
Dimensions:  2
Size:  6
Length:  3


## Slicing and Indexing


In [129]:
# Creating an array
array1 = np.array([1,2,3,4,5,6])

# Slicing
print(array1[1 : 5])
print(array1[1 : 6 : 2])
print(array1[: 4])       # From start to 4 index
print(array1[2 : ])      # From 2 index to end
print(array1[ : ])       # Start to end
print()

# Indexing
print(array1[1])
print(array1[3])
print(array1[-1])


[2 3 4 5]
[2 4 6]
[1 2 3 4]
[3 4 5 6]
[1 2 3 4 5 6]

2
4
6


## Indexing in 2D Array

In [None]:
# Making an array
myarr = np.array([[1,2], [2,3], [3,4]])

# Element at row 3 and column 1
print(myarr[2, 1])

# prints all rows in column 1
print(myarr[:, 1])

# Prints row 2 and all columns in it
print(myarr[2, :])


4
[2 3 4]
[3 4]


## Boolean Indexing

In [140]:
# Creating an array
arrayBool = np.array([2, 4, 17, 6, 8, 7, 9, 14, 16])

# Condition
arrayFinal = arrayBool < 10
print(arrayFinal)

# Indexing
print(arrayBool[arrayFinal])


[ True  True False  True  True  True  True False False]
[2 4 6 8 7 9]


In [154]:
# Using two conditions
# Creating an array
arrayBool = np.array([2, 4, 17, 6, 8, 7, 9, 14, 16])

# 2 Conditions
result = (arrayBool <= 10) & (arrayBool >= 4)
result1 = (arrayBool == 17) | (arrayBool < 6 )
print(result)
print(result1)

# Indexing
print(arrayBool[result])
print(arrayBool[result1])
print()

# Condition for even
evenNum = (arrayBool % 2 == 0)
print(evenNum)
print(arrayBool[evenNum])


[False  True False  True  True  True  True False False]
[ True  True  True False False False False False False]
[4 6 8 7 9]
[ 2  4 17]

[ True  True False  True  True False False  True  True]
[ 2  4  6  8 14 16]


## Sorting

In [91]:
# Creating an array
arr = np.array([3, 1, 7, 5, 9])
print("Original Array: ", arr)

# Sorting
print("Sorted Array: ", np.sort(arr))

# argsort returns the indices that would sort the array
print("Indices that would sort the array: ", np.argsort(arr))


Original Array:  [3 1 7 5 9]
Sorted Array:  [1 3 5 7 9]
Indices that would sort the array:  [1 0 3 2 4]


## Concatenation

In [99]:
# Creating 2 arrays

arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

# Concatenating
finalArr = np.concatenate((arr1, arr2))
print("Concatenated Array: ", finalArr)

# Creating another 2 arrays
arr3 = np.array([[13, 14, 15], [16, 17, 18]])
arr4 = np.array([[19, 20, 21]])

# Concatenating
finalArray = np.concatenate((arr3, arr4))
print("Concatenated Array: ", finalArray)


Concatenated Array:  [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
Concatenated Array:  [[13 14 15]
 [16 17 18]
 [19 20 21]]


## Mathmatical operations

In [28]:
# Making 2 arrays
x = np.array([[1, 2, 3],[4, 5, 6]])
y = np.array([[4, 5, 6],[1, 2, 3]])

# Addition
print("Addition: ", x + y)

# Subtraction
print("Subtraction: ", x - y)

# Multiplication
print("Multiplication: ", x * y)

# Division
print("Division: ", x / y)

# Dot product, taking transpose
print("Dot product: ", np.dot(x, y.T))


Addition:  [[5 7 9]
 [5 7 9]]
Subtraction:  [[-3 -3 -3]
 [ 3  3  3]]
Multiplication:  [[ 4 10 18]
 [ 4 10 18]]
Division:  [[0.25 0.4  0.5 ]
 [4.   2.5  2.  ]]
Dot product:  [[32 14]
 [77 32]]


In [71]:
# Making an array of 6 elements
x = np.arange(6)
print(x)

# Squaring
y = x **2
print(y)


[0 1 2 3 4 5]
[ 0  1  4  9 16 25]


In [33]:
# # Randon Numbers, gives randome numbers of 4 rows, 3 columns, each number btw 0 - 1

print("Random Numbers btw 0 and 1: ", np.random.rand(4, 3))


Random Numbers btw 0 and 1:  [[0.30303329 0.72645068 0.20712169]
 [0.59208061 0.69644353 0.46220174]
 [0.48237016 0.96057833 0.52554289]
 [0.53923346 0.86308229 0.07350583]]


## Reshaping Arrays

In [69]:
# Just like making a new array from an already existing one
# Making an array
myarr = np.array([1, 2, 3, 4, 5, 7])
print(myarr)

# Reshaping into privoded rows and columns
print("Reshaped array: ", myarr.reshape(2, 3))
print("Reshaped array: ", myarr.reshape(3, 2))
print()

myarr = np.arange(16)
print(myarr)

print("Reshaped array: ", myarr.reshape(4, 4))
print("Reshaped array: ", myarr.reshape(2, 8))
print("Reshaped array: ", myarr.reshape(8, 2))


[1 2 3 4 5 7]
Reshaped array:  [[1 2 3]
 [4 5 7]]
Reshaped array:  [[1 2]
 [3 4]
 [5 7]]

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
Reshaped array:  [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
Reshaped array:  [[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]]
Reshaped array:  [[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]
 [12 13]
 [14 15]]


## vstack and hstack

In [163]:
# Vstak puts multiple arrays vertically
# Creating arrays
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr3 = np.array([7, 8, 9])

# Vstacking
result= np.vstack((arr1, arr2, arr3))
print(result)


[[1 2 3]
 [4 5 6]
 [7 8 9]]


ValueError: cannot reshape array of size 9 into shape (4,2)

In [167]:
# hstack puts multiple array horizontally
# Creating arrays
arr1 = np.array([1, 2])
arr2 = np.array([4, 5])
arr3 = np.array([7, 8])

# hstacking
result= np.hstack((arr1, arr2, arr3))
print(result)
print()

print("Reshaped: ", result.reshape((3, 2)))

[1 2 4 5 7 8]

Reshaped:  [[1 2]
 [4 5]
 [7 8]]


## 3 Dimensional Arrays

In [100]:
# Creating a 3 dimensional array
array3d = np.array([
    [[1, 2, 3], [7, 8, 9]],
                 [[3, 4, 5], [6, 7, 8]],
                  [[5, 6, 7], [8, 9, 10]]
                    ])
print(array3d)

[[[ 1  2  3]
  [ 7  8  9]]

 [[ 3  4  5]
  [ 6  7  8]]

 [[ 5  6  7]
  [ 8  9 10]]]


In [109]:
# Checking properties for this 3d array
print("Shape of array: ", array3d.shape)
print("Dimensions: ", array3d.ndim)
print("Size: ", array3d.size)
print("Length: ", len(array3d))
print()

# Reshaping
print("Reshaed array: ", array3d.reshape(3, 3, 2))
print("Reshaped array: ", array3d.reshape(2, 3, 3))


Shape of array:  (3, 2, 3)
Dimensions:  3
Size:  18
Length:  3

Reshaed array:  [[[ 1  2]
  [ 3  7]
  [ 8  9]]

 [[ 3  4]
  [ 5  6]
  [ 7  8]]

 [[ 5  6]
  [ 7  8]
  [ 9 10]]]
Reshaped array:  [[[ 1  2  3]
  [ 7  8  9]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 5  6  7]
  [ 8  9 10]]]


## Indexing in 3D Arrays

In [134]:
# Creating a 3d Array
array3 = np.array([
    [[1,2,3], [4,5,6]],
   [[7,8,9], [10,11,12]]
    ])

# Printing array
print(array3)

# Slicing and indexing in 3d array
print(array3[0,1,2])          # Block at index 0, row 1, column 2
print(array3[1,0,2])          # Block at index 1, row 0, column 2
print(array3[0,0,1])          # Block at index 0, row 0, column 1
print(array3[1,1,0])          # Block at index 1, row 1, column 0


[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
6
9
2
10


## Conversion from 3D to 2D

In [120]:
# Converting 3d to 2d with 2 columns
array2d = array3d.reshape(-1, 2)

# Printing
print(array2d)

# Converting 2d to a single simple array
array1d = array2d.reshape(-1)

# Practice
print(array1d)
print(np.sort(array1d))


[[ 1  2]
 [ 3  7]
 [ 8  9]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 5  6]
 [ 7  8]
 [ 9 10]]
[ 1  2  3  7  8  9  3  4  5  6  7  8  5  6  7  8  9 10]
[ 1  2  3  3  4  5  5  6  6  7  7  7  8  8  8  9  9 10]


## Matrices / Matrix

In [172]:
# Creating arrays
arr1 = np.array([[1,2,3,4,5],[6,7,8,9,10]])
print(arr1)
print()

# Creating array and defining it rows and columns side by side
arr2 = np.arange(1, 60, 3). reshape(4, 5)
print(arr2)


[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]

[[ 1  4  7 10 13]
 [16 19 22 25 28]
 [31 34 37 40 43]
 [46 49 52 55 58]]


array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [180]:
# Slicing of this 2d Matrix

print(arr2[:])        # From start to the end
print()
print(arr2[2:])        # From row index 2 to the last
print()
print(arr2[1:, :])       # for row part, from index 1 to end, for column, all columns
print()
print(arr2[1:3, :])       # for rows, index 1-3, fo columns, all columns


[[ 1  4  7 10 13]
 [16 19 22 25 28]
 [31 34 37 40 43]
 [46 49 52 55 58]]

[[31 34 37 40 43]
 [46 49 52 55 58]]

[[16 19 22 25 28]
 [31 34 37 40 43]
 [46 49 52 55 58]]

[[16 19 22 25 28]
 [31 34 37 40 43]]


In [187]:
# For row part, from index 2-3 and for column part, from index 2-4
print(arr2[2:3, 2:4])
print()

# For row, fro index 0-2, for column part from index 2 to the end
print(arr2[0:2, 2:])
print()

# For rows, index 1-3, for columns index 3-5
final = arr2[1:3, 3:5]
print(final)


[[37 40]]

[[ 7 10 13]
 [22 25 28]]

[[25 28]
 [40 43]]


In [203]:
# Finding min max sum mean
print(final)
print()

# Finding Max
print("Max: ", final.max())
print("Max at 0 axis: ", final.max(axis=0))    # maximum value in each column, axis = 0 refers to columns

# Finding min
print("Min: ", final.min())
print("Min at 1 axis: ", final.min(axis=1))   # Smallest value in each row, axis = 1 refers to rows

# Finding Sum
print("Sum: ", final.sum())

# Finding Mean
print("Mode: ", final.mean())


[[25 28]
 [40 43]]

Max:  43
Max at 0 axis:  [40 43]
Min:  25
Min at 1 axis:  [25 40]
Sum:  136
Mode:  34.0


## Broadcasting in Matrices

In [217]:
# Creating an array
arrayF = np.arange(0, 40, 5).reshape(4,2)
print(arrayF)
print()

# Add 2 to whole column at index 0, and 5 to whole column at index 1
print(arrayF + np.array([2, 5]))
print()
print(arrayF + np.array([5, 8]))


[[ 0  5]
 [10 15]
 [20 25]
 [30 35]]

[[ 2 10]
 [12 20]
 [22 30]
 [32 40]]

[[ 5 13]
 [15 23]
 [25 33]
 [35 43]]


In [222]:
# vstack and hstack

print("Vstacking: ", np.vstack(arrayF + np.array([2, 5])))
print()

print("Hstacking: ", np.hstack(arrayF + np.array([2, 5])))


Vstacking:  [[ 2 10]
 [12 20]
 [22 30]
 [32 40]]

Hstacking:  [ 2 10 12 20 22 30 32 40]


In [228]:
# Generating random numbers
# Importing random library
import random

# Generating 10 random numbers from 1-100
randNum = [random.randint(1, 50) for i in range(20)]
print(randNum)


[6, 15, 12, 22, 7, 4, 14, 14, 3, 12, 20, 6, 14, 48, 47, 37, 27, 23, 39, 9]


In [230]:
# Can be done using numpy, makes it easier

randNum = np.random.randint(1, 51, size=20)
print(randNum)


[19 26 46 39 40 27 20 46 40 30 14  9 42 17 35 45 44 13 46 24]
