In [1]:
import cv2
import numpy as np

### Video

In [2]:
cap = cv2.VideoCapture("city.mp4")

# 0 for the default webcam
# Path to a video file for playback
# 1, 2, ... for external cameras
# IP

In [3]:
ret, frame = cap.read()     # tuple type

In [4]:
frame.shape     # 3 dimensions

(1080, 1920, 3)

In [5]:
while True:
    ret, frame = cap.read()
    
    if frame is None:
        break

    frame = cv2.resize(frame, (500, 300))
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) == ord('q'):      # waitKey() for speed. can put the number (getting greater, getting slower)
        break
    
cap.release()    
cv2.destroyAllWindows()

2025-01-28 17:44:37.873 Python[39377:12742631] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-28 17:44:37.873 Python[39377:12742631] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [6]:
ord('a') #ASCII

97

### FPS (Frame Per Second)

In [7]:
desired_fps = 10

video_path = "city.mp4"
cap = cv2.VideoCapture(video_path)

original_fps = int(cap.get(cv2.CAP_PROP_FPS))       # get original fps from original video
frame_interval = int(original_fps / desired_fps)

frame_count = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Skip frames to match the desired FPS
    if frame_count % frame_interval == 0:
        cv2.imshow("Frame", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
    frame_count += 1
    
cap.release()
cv2.destroyAllWindows()

### Drawing Shapes

In [8]:
# blank image
# np.zeros((height, width, channels), dtype)
image = np.zeros((400, 400, 3), dtype=np.uint8)

# cv2.line(base image, pt1, pt2, colour, line thickness)
cv2.line(image, (50, 50), (350, 350), (255, 255, 255), thickness=3)

# draw rectangle line(pt1: top-left corner, pt2: bottom-right corner)
cv2.rectangle(image, (100, 100), (300,300), (0, 0, 255), thickness=5)

# cv2.circle(image, centre(centre of circle x,y), radius(should be int), colour, thickness)
cv2.circle(image, (200, 200), 100, (0, 255, 0), thickness=5)

cv2.imshow("line example", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Text

In [9]:
# blank image
# np.zeros((height, width, channels), dtype)
image = np.zeros((400, 400, 3), dtype=np.uint8)

# cv2.putText(image, text, org(start point: left-bottom), font, font_scale(the greater the bigger), colour, thickness, line_type)
cv2.putText(image, "Hello OpenCV!", (50, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), thickness=2, lineType=cv2.LINE_AA)

cv2.imshow("text example", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Point Operator
- The value of each pixel in the output depends only on the value of the same pixel in the input (and possibly some global information or some parameters)
- Each pixel is processed independently
- No influence from neighboring pixels
- e.g., Contrast Adjustment, Thresholding, Brightness Adjustment

#### Addition and Substraction
- Using arithmetic operations in Numpy does not work when values can go above 255 or lower than 0
- OpenCV's saturated arithmetic ensures that values above 255 are set to 255, and values below 0 are clamped to 0
- Use OpenCV functions instead

In [10]:
cat = cv2.imread("Cat.jpg")

# make brighter
# cat = cv2.add(cat, 110)

# only for last channel
# cat = cv2.add(cat[:,:,2], 110)

# make darker
cat = cv2.subtract(cat, 50)

cv2.imshow("cat", cat)
cv2.waitKey(0)
cv2.destroyAllWindows()

#### Multiply and Divide

In [15]:
cat = cv2.imread("Cat.jpg")

# make bright colors brighter - use scale > 1
# make dark colors darker - use scale < 1
cat = cv2.multiply(cat, np.ones(cat.shape, dtype="uint8"), scale=0.5)

cv2.imshow("cat", cat)
cv2.waitKey(0)
cv2.destroyAllWindows()

#### Thresholding
- A technique to segment an image by converting it into binary form
- Separates foreground (object) from background based on intensity values
- Object detection and segmentation
- Preprocessing for OCR and feature extraction

In [18]:
cat = cv2.imread("Cat.jpg")

#  retval, dst = cv2.threshold(image(gray scale is recommended), thresh(threshold value), maxval, type)
_, cat = cv2.threshold(cv2.cvtColor(cat, cv2.COLOR_BGR2GRAY), 127, 255, cv2.THRESH_BINARY)

cv2.imshow("cat", cat)
cv2.waitKey(0)
cv2.destroyAllWindows()

#### Linear Blend

In [21]:
img1 = cv2.imread("Dog and Cat.jpg")
img1 =cv2.resize(src=img1, dsize=(720,1028))

img2 = cv2.imread("Cat.jpg")
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
img3 = cv2.addWeighted(img1, 0.6, img2, 0.4, 0)     # img1: forefront, img2: background

cv2.imshow('shape', img3)
cv2.waitKey(0)

2025-01-28 18:10:32.389 Python[39377:12742631] _TIPropertyValueIsValid called with 16 on nil context!
2025-01-28 18:10:32.389 Python[39377:12742631] imkxpc_getApplicationProperty:reply: called with incorrect property value 16, bailing.
2025-01-28 18:10:32.389 Python[39377:12742631] Text input context does not respond to _valueForTIProperty:


100

: 