# Lab 1: Introduction to OpenCV

The goal of this first lab is to present a small introduction to image processing using OpenCV. In each section, you can find:
* a small example - analyse the code and try it
* some exercises

In [None]:
# Requirements for this tutorial
! pip install opencv-python
! pip install numpy

In [None]:
# If you prefer, you can convert this notebook to a Python script by uncommenting the following command
! pip install nbconvert
! jupyter nbconvert --to script 01-introduction.ipynb

In [2]:
import cv2
import numpy as np
import os

dataDir = './data'

### 1. Images – read, write and display; ROIs

In [3]:
# Opening an image
img = cv2.imread(os.path.join(dataDir, 'ml.jpg'))

# Showing the image
cv2.imshow("ml.jpg", img)

# Waiting for user to press a key to close the image
cv2.waitKey(0)

# Close the window after user pressed a key
cv2.destroyWindow("ml.jpg")

In [None]:
# Check image size
h, w, c = img.shape
print(f'height: {h}')
print(f'width: {w}')
print(f'channels: {c}')

In [None]:
# Saving image in bmp format
cv2.imwrite('ml_new.bmp', img)

Exercise 1.1 - Read any other color image from a file, show the mouse cursor over the image, and the coordinates and RGB components of the pixel under the cursor. When the user clicks on the mouse, let him modify the RGB components of the selected pixel.

In [4]:
new_color = (0, 0, 0)  # Initial color

def mouse_callback(event, x, y, flags, param):
    global img
    if event == cv2.EVENT_MOUSEMOVE:
        # Display RGB components of the pixel under the cursor
        if img is not None:
            pixel = img[y, x]
            # Clear previous marker
            img_with_marker = img.copy()
            # Draw a circle to indicate the current pixel under the cursor
            cv2.circle(img_with_marker, (x, y), 5, (0, 255, 0), -1)
            # Calculate text position
            text_position = (x - 100, y + 20)
            # Display coordinates and RGB components as text
            cv2.putText(img_with_marker, f"X: {x}, Y: {y}, RGB: {pixel}", text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1)
            cv2.imshow("ml.jpg", img_with_marker)
    elif event == cv2.EVENT_LBUTTONDOWN:
        # Modify the RGB components of the clicked pixel with the new color
        if img is not None:
            b, g, r = new_color
            img[y, x] = [r, g, b]
            # Update image with the modified pixel
            cv2.imshow("ml.jpg", img)

# Callback function for trackbar changes
def on_trackbar_change(value):
    global new_color
    new_color = (cv2.getTrackbarPos('R', 'Color Picker'),
                 cv2.getTrackbarPos('G', 'Color Picker'),
                 cv2.getTrackbarPos('B', 'Color Picker'))

# Opening an image
img = cv2.imread(os.path.join(dataDir, 'ml.jpg'))

# Showing the image
cv2.imshow("ml.jpg", img)

# Set mouse callback function
cv2.setMouseCallback("ml.jpg", mouse_callback)

# Create a window for the color picker
cv2.namedWindow('Color Picker', cv2.WINDOW_NORMAL)  # Specify the window size using cv2.WINDOW_NORMAL
#cv2.resizeWindow('Color Picker', 200, 100)  # Set the window size (width, height)
cv2.createTrackbar('R', 'Color Picker', 0, 255, on_trackbar_change)
cv2.createTrackbar('G', 'Color Picker', 0, 255, on_trackbar_change)
cv2.createTrackbar('B', 'Color Picker', 0, 255, on_trackbar_change)

cv2.waitKey(0)
cv2.destroyAllWindows()

Exercise 1.2 - Allow the user to select a region of interest (ROI) in the image, by clicking on two points that identify two opposite corners of the selected ROI, and save the ROI into another file.

In [6]:
import cv2
import numpy as np
import os

# Global variables to store image and clicked points
img = None
point1 = None
point2 = None
click_count = 0

def roi_mouse_callback(event, x, y, flags, param):
    global point1, point2, click_count, img

    if event == cv2.EVENT_LBUTTONDOWN:
        click_count += 1
        if click_count == 1:
            point1 = (x, y)
        elif click_count == 2:
            point2 = (x, y)
            click_count = 0

            # Draw rectangle around the selected ROI
            roi_img = img.copy()
            if point1:
                cv2.rectangle(roi_img, point1, (x,y), (0, 255, 0), 2)
            cv2.imshow("Image", roi_img)
    
    if event == cv2.EVENT_MOUSEMOVE:
        roi_img = img.copy()
        if click_count == 1:
            cv2.rectangle(roi_img, point1, (x,y), (0, 255, 0), 2)
            cv2.imshow("Image", roi_img)

def save_roi(image, point1, point2, output_file):
    # Extract ROI from the original image
    roi = image[min(point1[1], point2[1]):max(point1[1], point2[1]), 
                min(point1[0], point2[0]):max(point1[0], point2[0])]
    # Save ROI to another file
    cv2.imwrite(output_file, roi)
    print(f"ROI saved to {output_file}")

# Open an image
dataDir = './data'
image_path = os.path.join(dataDir, 'ml.jpg')
img = cv2.imread(image_path)

# Display the image
cv2.imshow("Image", img)

# Set mouse callback function for selecting ROI
cv2.setMouseCallback("Image", roi_mouse_callback)

# Wait for user to select ROI
cv2.waitKey(0)
cv2.destroyAllWindows()

# Check if both points are selected
if point1 and point2:
    # Save ROI to another file
    output_file = "roi.jpg"
    save_roi(img, point1, point2, output_file)
else:
    print("ROI selection canceled.")


ROI saved to roi.jpg


### 2. Images – representation, grayscale and color, color spaces

In [16]:
# Create a white image
m = np.ones((100,200,1), np.uint8)

# Change the intensity to 100
m = m * 100

# Display the image
cv2.imshow('Grayscale image', m)
cv2.waitKey(0)
cv2.destroyWindow('Grayscale image')

In [25]:
# Draw a line with thickness of 5 px
cv2.line(m, (0,0), (200,100), 255, 5)
cv2.line(m, (200, 0), (0, 100), 255, 5)
cv2.imshow('Grayscale image with diagonals', m)
cv2.waitKey(0)
cv2.destroyWindow('Grayscale image with diagonals')

Exercise 2.1 - Create a color image with 100(lines)x200(columns) pixels with yellow color; draw the two diagonals of the image, one in red color, the other in blue color. Display the image.

In [28]:
# Create a white image
height, width = 100, 200
yellow_color = (0, 255, 255)  # Yellow color in BGR format
color_img = np.full((height, width, 3), yellow_color, dtype=np.uint8)

# Draw diagonals with different colors
cv2.line(color_img, (0, 0), (width, height), (0, 0, 255), 5)  # Red diagonal
cv2.line(color_img, (width, 0), (0, height), (255, 0, 0), 5)  # Blue diagonal

# Display the image
cv2.imshow('Color image with diagonals', color_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Exercise 2.2 - Read any color image, in RGB format, display it in one window, convert it to grayscale, display the grayscale image in another window and save the grayscale image to a different file

In [None]:
# TODO

Exercise 2.3 - Split the 3 RGB channels and show each channel in a separate window. Add a constant value to one of the channels, merge the channels into a new color image and show the resulting image.

In [None]:
# TODO

Exercise 2.4 - Convert the image to HSV, split the 3 HSV channels and show each channel in a separate window. Add a constant value to saturation channel, merge the channels into a new color image and show the resulting image.

In [None]:
# TODO

### 3. Video – acquisition and simple processing

In [23]:
# Define a VideoCapture Object
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()

frame_nr = 0
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    # If frame is read correctly ret is True
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # Display the resulting frame
    cv2.imshow('webcam', frame)

    # Wait for user to press s to save frame
    if cv2.waitKey(1) == ord('s'):
        frame_name = 'frame' + str(frame_nr) + '.png'
        cv2.imwrite(frame_name, frame)
        cv2.imshow("Saved frame: " + frame_name, frame)
        cv2.waitKey(0)
        cv2.destroyWindow("Saved frame: " + frame_name)

    # Wait for user to press q to quit
    if cv2.waitKey(1) == ord('q'):
        break

    frame_nr += 1

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()

Exercise 3.1 - Using the previous example as the baseline, implement a script that acquires the video from the webcam, converts it to grayscale, and shows the frames in binary format (i.e. the intensity of each pixel is 0 or 255); use a threshold value of 128.

In [None]:
# TODO

Exercise 3.2 - Implement a simple detection/tracking algorithm for colored objects, using the following steps:
1) take each frame of the video;
2) convert from BGR to HSV color-space;
3) threshold the HSV image for a range of color values (creating a binary image);
4) extract the objects of the selected range (with a bitwise AND operation, using as operands the original and the binary image).

In [None]:
# TODO