<a href="https://colab.research.google.com/github/lanky441/sure_2025_python/blob/main/numpy_matplotlib.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NumPy
- NumPy (Numerical Python) is a Python library.
- NumPy is used for working with arrays.

### If you want to use `numpy` in your code, you need to import the `numpy` library.



In [None]:
import numpy

In [None]:
x = numpy.pi
print(x)

**It is recommended to import the numpy library under the `np` alias in the code**

In [None]:
import numpy as np
x = np.pi
print(x)

# You can use numpy to do lots of mathematical calculations.
### Here are few examples.

You can get a full list here: https://numpy.org/doc/stable/reference/routines.math.html

In [None]:
pi = np.pi
print(np.sin(pi/2))
print(np.cos(pi/3))
print(np.tan(pi/4))

**Be careful about the numerical precision. By default, it is accurate only upto 15th decimal place!**

In [None]:
print(np.sin(np.deg2rad(30)))

### Explore at home
### F-string

In [None]:
# How to print it rounded upto certain number of decimal points?
# Introducing f-string

print(f"Hello World!")

In [None]:
age = 31
print("I am", age, "years old.") #without using f-string
print(f"I am {age} years old.") #using f-string

In [None]:
x = np.sin(np.deg2rad(30))
print(f"sin(30) = {x}")
print(f"sin(30) = {x:.5f}") #prints upto 5th decimal place (rounded)
print(f"sin(30) = {x:.3f}") #print upto 3rd decimal place (rounded)

# Create a NumPy ndarray Object
But the most useful thing of numpy is numpy array.

NumPy is used to work with arrays. The array object in NumPy is called `ndarray`. Working with numpy arrays is much easier and much faster.

We can create a NumPy ndarray object by using the `array()` function.

In [None]:
# pass a python list to numpy array() function to create a numpy array
arr = np.array([1, 2, 3, 4, 5])

print(arr)
print(len(arr))
print(type(arr))

In [None]:
print(arr.ndim)
print(arr.shape)

In [None]:
# You can also create a python list and make it a numpy array
list1 = [1, 2, 3, 4, 5]
arr = np.array(list1)

print(arr)
print(type(arr))
print(type(list1))

In [None]:
# You can also create numpy arrays with more than one dimension
# Here is an example for creating a 2D array (2D matrix)
arr2 = np.array([[1, 2, 3], [4, 5, 6]])

print(arr2)
print(type(arr2))
print(arr2.ndim) # dimension of the array
print(arr2.shape) # shape of the array

In [None]:
# Check at home
# You can make array with even higher dimensions
# But 2D numpy arrays will probably be most useful for you
# Let us create a 3D array

arr3 = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

print(arr3)
print(arr3.ndim) # dimension of the array
print(arr3.shape) # shape of the array

# It may be a little confusing to understand the shape of 3 or higher dimensinsonal arrays
# We will mostly stick to 1 or 2D arrays for the rest of the workshop

# Some quick ways to create numpy arrays

In [None]:
# Creating an array of zeros with a specified shape
zeros_arr = np.zeros((2, 3))
print("Array of zeros with specified shape (2x3):")
print(zeros_arr)

In [None]:
print(np.zeros(5))

In [None]:
# Creating an array of ones with a specified shape
ones_arr = np.ones((3, 2))
print("Array of ones with specified shape (3x2):")
print(ones_arr)

In [None]:
# Creating an array with a range of values
range_arr = np.arange(0, 10, 2)  # Start: 0, Stop: 10 (exclusive), Step: 2
print("Array with a range of values (start=0, stop=10, step=2):")
print(range_arr)
print(type(range_arr))

In [None]:
# Remember `np.ranage` creates numpy arrays, but python `range` creates range function
arr = range(0, 10, 2)
print(arr)
print(type(arr))

In [None]:
# Creating an array of evenly spaced values
linspace_arr = np.linspace(0, 20, 11)  # Start: 0, Stop: 20, Num: 11
print("Array of evenly spaced values (start=0, stop=20, num=11):")
print(linspace_arr)

# Basic array operation

We perform basic array operations like addition, subtraction, multiplication, and division on NumPy arrays.

In [None]:
# Creating two NumPy arrays of same shape
arr1 = np.array([1, 2, 3, 4, 5])
print("Shape of arr1: ", arr1.shape)

arr2 = np.array([6, 7, 8, 9, 10])
print("Shape of arr2: ", arr2.shape)

In [None]:
# Addition of two arrays
addition_result = arr1 + arr2
print("Array Addition:")
print(addition_result)

In [None]:
# Remember what would have happened if they were python lists
list1 = [1, 2, 3, 4, 5]
list2 = [6, 7, 8, 9, 10]

print("Python List Addition:")
print(list1 + list2)

In [None]:
# Subtraction of two arrays
subtraction_result = arr2 - arr1
print("Array Subtraction:")
print(subtraction_result)

In [None]:
# Element-wise multiplication of two arrays
multiplication_result = arr1 * arr2
print("Array Multiplication:")
print(multiplication_result)

In [None]:
# Element-wise division of two arrays
division_result = arr2 / arr1
print("Array Division:")
print(division_result)

In [None]:
# What if the arrays are of different shapes
arr_5 = np.array([1, 2, 3, 4, 5])
arr_3 = np.array([6, 7, 8])

print(arr_5 + arr_3)

# Broadcasing

Broadcasting allows us to perform mathematical operations on each element of an array.

In [None]:
# Adding a scalar to all the elements of an array
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1 + 2)

In [None]:
arr1 = np.array([1,2,3,4,5])
print(arr1**2)

In [None]:
# You can do the same for arrays of higher dimension
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2 + 10)

# Array Manipulation


In [None]:
# Reshaping an array
arr4 = np.array([1, 2, 3, 4, 5, 6])
reshaped_arr = arr4.reshape(2, 3)

print("Original array:")
print(arr4)
print("Reshaped array:")
print(reshaped_arr)

In [None]:
# Transposing an array
transposed_arr = reshaped_arr.T
print("Transposed array:")
print(transposed_arr)

In [None]:
# Flattening an array
flattened_arr = reshaped_arr.flatten()
print("Falttened array:")
print(flattened_arr)

# Indexing and slicing

Remember indexing and slicing python list

In [None]:
# Accessing individual elements using indexing
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1[0])
print(arr1[2])
print(arr1[-1])

In [None]:
# slicing
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1[1:4]) # from index 1 (includes) to index 4 (includes)
print(arr1[:3]) # from index 0 (includes) to index 3 (includes)
print(arr1[3:]) # from index 3 (includes) to the end
print(arr1[1:5:2]) # from index 1 (includes) to index 5 (includes) with a step of 2
print(arr1[-3:]) # last 3 items

In [None]:
# you can choose some particular indices
arr = np.array([1, 2, 3, 4, 5])
print(arr[2])
print(arr1[[0, 3, 4]]) #note the double brackets

In [None]:
# you can check for the indices that satisfy a certain condition
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1>2)

In [None]:
# You can choose items from an array based on a condition
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1[arr1>2])

In [None]:
# you can use two conditions
arr1 = np.array([1, 2, 6, 3, 9, 4, 5, 6, 7, 8, 9, 1])
print(arr1[(arr1>2) & (arr1<7)]) # note that you have to use `&` symbol and not `and`

## slicing 2D arrays

In [None]:
# Creating a 2D NumPy array
arr2 = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

print("Array shape = ", arr2.shape)
print(arr2)

In [None]:
# Accessing individual elements of a 2D array using indexing
print(arr2[0, 0])
print(arr2[1, 2])

In [None]:
# You can also access individual rows
print(arr2[1])

In [None]:
# Slicing rows and columns
print("Slice of Rows (2nd and 3rd rows):")
print(arr2[1:, :])  # Rows from index 1 to the end

In [None]:
print("Slice of Columns (2nd and 3rd columns):")
print(arr2[:, 1:])  # Columns from index 1 to the end

In [None]:
# Slicing with a step
print("Sliced Array with Step (Every 2nd element in the 1st row):")
print(arr2[0, ::2])  # Every 2nd element in the 1st row

In [None]:
# Negative indexing and slicing
print("Negative Indexing and Slicing:")
print(arr2[-2:, -2:])  # Last 2 rows and last 2 columns

In [None]:
# Modifying elements using indexing
arr2[1, 1] = 10
print("Modified Array:")
print(arr2)

# Array Functions and Axis

In [None]:
arr1 = [0, 1, 2, 3, 4, 5, 7, 8, 9]

# Calculating Sum, Mean, Median, Minimum, and Maximum
print("Sum = ", np.sum(arr1))
print("Mean = ", np.mean(arr1))
print("Median = ", np.median(arr1))
print("Minimum = ", np.min(arr1))
print("Maximum = ", np.max(arr1))

In [None]:
# What about 2D arrays
# Creating a 2D NumPy array
arr2 = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

In [None]:
# Sum of all elements in the array
print("Sum of all elements in the array:", np.sum(arr2))

In [None]:
# Sum along columns (axis=0)
print("Sum along columns (axis=0):")
print(np.sum(arr2, axis=0))

In [None]:
# Sum along rows (axis=1)
print("Sum along rows (axis=1):")
print(np.sum(arr2, axis=1))

In [None]:
# Similarly
# Mean along rows (axis=0)
print("\nMean along rows (axis=0):")
print(np.mean(arr2, axis=0))

# Mean along columns (axis=1)
print("\nMean along columns (axis=1):")
print(np.mean(arr2, axis=1))

# Maximum along rows (axis=0)
print("\nMaximum along rows (axis=0):")
print(np.max(arr2, axis=0))

# Maximum along columns (axis=1)
print("\nMaximum along columns (axis=1):")
print(np.max(arr2, axis=1))

# Go through this if you are interested
# Numpy data types

Numpy has its own data types.

In [None]:
arr = np.array([1, 2, 3, 4])
# print the data type of the array elements
print(arr.dtype)
print(type(arr[0]))

In [None]:
arr = np.array(['apple', 'banana', 'cherry'])

print(arr.dtype)
print(arr.dtype.name)
print(type(arr[1]))

In [None]:
# By deault, you CAN NOT create a numpy array with different data types
arr = np.array([1, 2, 'a', 'b'])
print(arr) # It converts everything to string

print(arr.dtype.name)

print(type(arr[0]))
print(type(arr[2]))

In [None]:
# However, you can create numpy array with different data types by explicitly mentioning the data types to be 'object'
arr = np.array([1, 2, 'a', 'b'], dtype='object')
print(arr)
print(arr.dtype)
print(type(arr[0]))
print(type(arr[2]))

# Exercises

In [None]:
# 1. Create a 1D NumPy array containing the integers from 0 to 9.


In [None]:
# 2. Create a 3x3 NumPy array with all elements initialized to one.


In [None]:
# 3. Create a NumPy array with values ranging from 10 to 20, and extract all odd numbers from it.

In [None]:
# 4. Create a 2x3 numpy array and multiply each element by 5.

In [None]:
# 5. Create a 3x2 numpy array and compute the sum of all elements.

In [None]:
# 6. Create a 2x2 numpy array and calculate the mean along each row

In [None]:
# 7. For this 4x3 array, calculate the maximum value for each column
arr = np.array([[1, 2, 5], [7, 2, 0], [3, 4, 0], [1, 9, 5]])
print(arr)


# Plotting with matplotlib.pyplot

In [None]:
# import the function
import matplotlib.pyplot as plt

In [None]:
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

# Lets us plot y vs x
plt.plot(x, y)
plt.show()

In [None]:
# Add label for the x and y axis
plt.plot(x, y)
plt.xlabel('number')
plt.ylabel('square of the number')
plt.show()

In [None]:
# Add a title for the plot
plt.plot(x, y)
plt.xlabel('number')
plt.ylabel('square of the number')
plt.title('Square of Numbers')
plt.show()

In [None]:
# Now let's do it again with python list
x = [1, 2, 3, 4, 5]
y = x**2

print(y)

In [None]:
# Now let us try with numpy array
x = np.array([1, 2, 3, 4, 5])
y = x**2

print(y)

In [None]:
plt.plot(x, x**2)
plt.xlabel('x')
plt.ylabel('x^2')
plt.show()

In [None]:
# Explore Marker and line style

plt.plot(x, x**2, marker='o', ls="--")
plt.xlabel('x')
plt.ylabel('x^2')
plt.show()

In [None]:
# Different marker and ls to explore
# marker = [none, ., o, x, s, +, ^, v]
# ls = [-, --, -., :, none]

plt.plot(x, x**2, marker='^', ls=":")
plt.xlabel('x')
plt.ylabel('x^2')
plt.show()

## Scatter plot

In [None]:
# We can also plot just the points
x = np.arange(20)
y = x**2
plt.plot(x, y, marker='o', ls='none') # `ls` stands for `linestyle`
plt.xlabel('x')
plt.ylabel('x^2')
plt.show()

In [None]:
# we can achieve the same using scatter plot
x = np.arange(20)
y = x**2
plt.scatter(x, y)
plt.xlabel('x')
plt.ylabel('x^2')
plt.show()

# Multiple plots in a figure

In [None]:
# Multiple lines in a single plot
x = np.arange(11)
y1 = 10 * x**2
y2 = x**3

plt.plot(x, y1)
plt.plot(x, y2)

plt.xlabel('x')
plt.ylabel('f(x)')

plt.show()

In [None]:
# But which one is x^2 and which one is x^3?
# Use label to distinguish
x = np.arange(11)
y1 = 10 * x**2
y2 = x**3

plt.plot(x, y1, label='10x^2')
plt.plot(x, y2, label='x^3')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.show()

In [None]:
# Still no labels for the lines!!
# You need to use `plt.legend`
plt.plot(x, y1, label='10x^2')
plt.plot(x, y2, label='x^3')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.legend()

plt.show()

In [None]:
# You can choose the color for your lines
plt.plot(x, y1, label='10x^2', color='red')
plt.plot(x, y2, label='x^3', color='green')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.legend()

plt.show()

In [None]:
# If you want to show the points you are plotting in addition with the lines
# and choose different styles for the lines

plt.plot(x, y1, label='10x^2', color='red', marker='+', ls='--')
plt.plot(x, y2, label='x^3', color='green', marker='x', ls='-.')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.legend()

plt.show()

In [None]:
# you can put a grid in the plot
plt.plot(x, y1, label='10x^2', color='red')
plt.plot(x, y2, label='x^3', color='green')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.legend()

plt.grid()

plt.show()

In [None]:
# you can plot only for certain ranges of x and y
plt.plot(x, y1, label='10x^2', color='red')
plt.plot(x, y2, label='x^3', color='green')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.legend()

plt.grid()

plt.xlim(0, 5) #(lower, upper)
plt.ylim(0, 100) # (lower, upper)

plt.show()

In [None]:
# use `np.linspace` to crate x samples
x = np.linspace(0, 10, 100)

plt.plot(x, np.sin(x), label='sin x')
plt.plot(x, np.cos(x), label='cos x')

plt.xlabel('x')
plt.ylabel('f(x)')

plt.legend()

plt.grid()

plt.show()

## histograms

In [None]:
# you can plot the distribution of a parameter using histogram
# first, let us generate some random data
# If you want to know more about random generator, visit https://www.w3schools.com/python/numpy/numpy_random.asp
x = np.random.randn(50)
print(x)

In [None]:
# Now let us plot the histogram
plt.hist(x)
plt.xlabel('x')
plt.ylabel('number of occurances')
plt.show()

In [None]:
# you can choose the number of bins
plt.hist(x, bins=7) # number of bins = 7
plt.xlabel('x')
plt.ylabel('number of occurances')
plt.show()

In [None]:
# if you want certain bin edges of your choice
plt.hist(x, bins=[-2, -1, 0, 1, 2, 3], color='green', edgecolor='red') # number of bins will be 5
plt.xlabel('x')
plt.ylabel('number of occurances')
plt.show()

# Exercises

In [None]:
# Here is an array for the marks obtained by the students of a class
# Do the following
# 1. What is the average mark obtained by the students?
# 2. How many students got more than or equal to 80?
# 3. How many students got more than or equal to 40 but less than 80
# 4. What is the maximum score?

# This creates a numpy array of 200 integers ranging from [0, 100)
# marks = np.random.randint(0, 100, 200) # Not very realistic

# This creates marks normally distributed around 60
marks = np.asarray(np.random.normal(60, 15, 100), dtype=np.int32) # creates an array of 100 marks and converts them to integer
marks = marks[marks<100] # Remove any marks > 100

In [None]:
# Use the same array above to do the following
# Let us assume the marks are sorted by the roll number of the students ranging from 1, 2
# 1. Plot the marks (using a red dotted line and indicate the points by a square) obtained by the students with roll numbers from 20 (included) to 40 (included)
# 2. x-axis should show the roll number, y-axis should show the marks

In [None]:
# Scatter plot the same thing as above using plt.scatterplot

In [None]:
# Use the same array `marks` do do following
# 1. Plot a histogram of the marks obtained by the students
# 2. Use 20 bins
# 2. Put x and y labels

In [None]:
# Use the same arrray above
# Assume the following grades: 0-20, 20-45, 45-70, 70-90, 90-100
# Plot a histogram that shows how many students are within each of the above grade


# bar plot

In [None]:
# Sometimes it is very useful when you have discrete values of the object along the x-axis
fruits = ["apple", "banana", "cherry", "kiwi", "mango"]
counts = [4, 6, 7, 2, 2]

plt.bar(fruits, counts)
plt.xlabel('fruits')
plt.ylabel('counts')
plt.show()

In [None]:
# You can show multiple information
# Play with all the options
fruits = ['apple', 'blueberry', 'cherry', 'orange']
counts = [40, 100, 30, 55]
bar_labels = ['red', 'blue', 'red', 'orange']
bar_colors = ['red', 'blue', 'red', 'orange']

plt.bar(fruits, counts, width=0.8, label=bar_labels, color=bar_colors)

plt.ylabel('fruit supply')
plt.title('Fruit supply by kind and color')
plt.legend(title='Fruit color')

plt.show()