In [1]:
# Core libraries for computer vision
import cv2  # OpenCV - the main computer vision library
import numpy as np  # Numerical operations on arrays/images

# Libraries for display in Jupyter Notebook
from PIL import Image, ImageDraw, ImageFont  # Python Imaging Library
from IPython.display import display, clear_output
import IPython.display

print("All libraries imported successfully!")
print(f"OpenCV version: {cv2.__version__}")

All libraries imported successfully!
OpenCV version: 4.12.0


In [2]:
def plot_images(frame_size=(400, 250), columns=2, **kwargs):
    """
    Display multiple images in a grid with labels.
    
    Args:
        frame_size: Size for each image
        columns: Number of columns in the grid
        **kwargs: Dictionary of images with labels as keys
    
    Example:
        plot_images(**{"Original": img1, "Blurred": img2, "Edges": img3})
    """
    if not kwargs:
        print("No images provided!")
        return None
    
    images = []
    labels = []
    
    for label, frame in kwargs.items():
        # Ensure images are uint8
        if frame.dtype != np.uint8:
            frame = (frame * 255).astype(np.uint8)
        
        # Handle grayscale images
        if len(frame.shape) == 2:
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
        
        # Convert BGR to RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, frame_size)
        
        images.append(Image.fromarray(frame))
        labels.append(label)
    
    # Calculate grid dimensions
    cols = max(1, columns)
    rows = int(np.ceil(len(images) / cols))
    
    # Create canvas
    label_height = 40
    combined_width = frame_size[0] * cols
    combined_height = rows * (frame_size[1] + label_height)
    combined_img = Image.new("RGB", (combined_width, combined_height), (255, 255, 255))
    
    # Draw images and labels
    draw = ImageDraw.Draw(combined_img)
    try:
        font = ImageFont.truetype("arial.ttf", 18)
    except:
        font = ImageFont.load_default()
    
    for idx, (img_pil, label) in enumerate(zip(images, labels)):
        row = idx // cols
        col = idx % cols
        x_offset = col * frame_size[0]
        y_offset = row * (frame_size[1] + label_height)
        
        # Draw label
        draw.text((x_offset + 10, y_offset + 10), label, fill="black", font=font)
        
        # Paste image
        combined_img.paste(img_pil, (x_offset, y_offset + label_height))

    display(combined_img)
    
    return combined_img

In [3]:
def gamma_correction(image, gamma=1.2):
    """Apply gamma correction"""
    inv_gamma = 1.0 / gamma
    table = np.array([
        ((i / 255.0) ** inv_gamma) * 255
        for i in np.arange(0, 256)
    ]).astype("uint8")
    return cv2.LUT(image, table)


def intensity_normalization(image):
    """Normalize intensity to full [0,255] range"""
    normalized = cv2.normalize(
        image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX
    )
    return normalized

In [4]:
def binary_threshold(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    return(cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR))

def otsu_threshold(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, otsu = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY+ cv2.THRESH_OTSU)
    return(cv2.cvtColor(otsu, cv2.COLOR_GRAY2BGR))

def adaptive_threshold(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                     cv2.THRESH_BINARY, 11, 2)
    return(cv2.cvtColor(adaptive, cv2.COLOR_GRAY2BGR))

In [5]:
def erosion(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    
    # Erosion - removes small white regions
    eroded = cv2.erode(gray, kernel, iterations=1)
    return(cv2.cvtColor(eroded, cv2.COLOR_GRAY2BGR))

def dilation(image):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    
    # Erosion - removes small white regions
    dilated = cv2.dilate(image, kernel, iterations=1)
    return(cv2.cvtColor(dilated, cv2.COLOR_GRAY2BGR))

def closing(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    
    # Erosion - removes small white regions
    closed = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
    return(cv2.cvtColor(closed, cv2.COLOR_GRAY2BGR))

def opening(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    
    # Erosion - removes small white regions
    opened = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
    return(cv2.cvtColor(opened, cv2.COLOR_GRAY2BGR))

In [None]:
cap = cv2.VideoCapture('Mallet_videos/IMG_9105.MOV')

if not cap.isOpened():
    print("Error opening video file. Check the path and file format.")
    exit()
else:
    print("Video opened successfully.")

desired_width = 640
desired_height = 1000
display_size = (desired_width, desired_height)

fps = cap.get(cv2.CAP_PROP_FPS)

# CLAHE object for contrast enhancementfinal_mask
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # --------------------------------------
    # Resize (BGR, 3-channel)
    # --------------------------------------
    resized_frame = cv2.resize(
        frame, display_size, interpolation=cv2.INTER_AREA
    )

    # --------------------------------------
    # Convert to LAB (ONLY cvtColor used)
    # --------------------------------------
    lab_img = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2LAB)
    L, A, B = cv2.split(lab_img)
    lab_img2= cv2.cvtColor(resized_frame, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(lab_img2)
    B2, G, R = cv2.split(resized_frame)

    # --------------------------------------
    # Single-channel processing starts here
    # --------------------------------------
    img1 = cv2.GaussianBlur(, (7, 7), 0)


    img2 = gamma_correction(img1, gamma=1.5)
    img = intensity_normalization(img2)
    
    alpha = 0.9 # Contrast control (1.0-3.0 range is common for manual adjust)
    beta = 0    # Brightness control

    img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
    img = img.astype(np.uint8)

    # --------------------------------------
    # THRESHOLDING (INLINE, SAFE)
    # --------------------------------------

    # Binary threshold
    _, binary = cv2.threshold(
        img, 140, 255, cv2.THRESH_BINARY
    )

    # Otsu threshold
    _, otsu = cv2.threshold(
        img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )

    # Adaptive threshold
    adaptive = cv2.adaptiveThreshold(
        img,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        41,
        7
    )

    # Combine Binary & Otsu
    combined = cv2.bitwise_and(binary, otsu)

    # --------------------------------------
    # Morphology (1-channel only)
    # --------------------------------------
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))

    
    
    opened = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel, iterations=2)
    closed = cv2.morphologyEx(opened, cv2.MORPH_OPEN, kernel, iterations=1)
    # closed = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel, iterations=1)
    
  

    # Enforce clean binary
    _,closed = cv2.threshold(closed, 127, 255, cv2.THRESH_BINARY)
    
    closed = closed.astype(np.uint8)

    # --------------------------------------
    # Connected Components (FINAL MASK)
    # --------------------------------------
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        closed, connectivity=8
    )

    final_mask = np.zeros_like(closed)
    for i in range(1, num_labels):
        area = stats[i, cv2.CC_STAT_AREA]
        if area >= 100 and area <= 5000:  # Minimum area threshold
            final_mask[labels == i] = 255   

    

    
    


    # --------------------------------------
    # Apply mask to original frame
    # --------------------------------------
    masked_output = cv2.bitwise_and(
        resized_frame, resized_frame, mask=final_mask
    )

    # --------------------------------------
    # Visualization
    # --------------------------------------
    

        
    

    cv2.imshow("Original", resized_frame)
    cv2.imshow("Binary", binary)
    cv2.imshow("Otsu", otsu)
    cv2.imshow("Adaptive", adaptive)
    # cv2.imshow("combined", combined)
    # cv2.imshow("Refined", closed)
    cv2.imshow("Final Mask", final_mask)
    cv2.imshow("Masked Output", masked_output)


    if cv2.waitKey(int(1000 / fps)) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




Video opened successfully.


In [7]:
# import cv2
# import numpy as np

# cap = cv2.VideoCapture("Mallet_videos/IMG_9105.MOV")
# fps = cap.get(cv2.CAP_PROP_FPS)


# while cap.isOpened():
#     ret, frame = cap.read()
#     if not ret:
#         break

#     frame = cv2.resize(frame, (640, 1000))

#     # -----------------------------
#     # Downscale
#     # -----------------------------
#     small = cv2.resize(frame, None, fx=0.5, fy=0.5)

#     # -----------------------------
#     # Convert to HSV
#     # -----------------------------
#     hsv = cv2.cvtColor(small, cv2.COLOR_BGR2HSV)
#     H, S, V = cv2.split(hsv)

#     # -----------------------------
#     # Prepare K-Means data (H + S ONLY)
#     # -----------------------------
#     Z = np.stack((H.flatten(), S.flatten()), axis=1)
#     Z = np.float32(Z)

#     # -----------------------------
#     # K-Means
#     # -----------------------------
#     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
#     K = 4   # fewer clusters work better in HS space

#     _, labels, centers = cv2.kmeans(
#         Z,
#         K,
#         None,
#         criteria,
#         10,
#         cv2.KMEANS_RANDOM_CENTERS
#     )

#     # -----------------------------
#     # Select mallet cluster
#     # (highest saturation)
#     # -----------------------------
#     cluster_sat = []
#     for i in range(K):
#         sat = np.mean(Z[labels.flatten() == i][:, 1])
#         cluster_sat.append(sat)

#     mallet_cluster = np.argmax(cluster_sat)

#     # -----------------------------
#     # Binary mask
#     # -----------------------------
#     labels_2d = labels.reshape(small.shape[:2])
#     mask_small = np.zeros_like(labels_2d, dtype=np.uint8)
#     mask_small[labels_2d == mallet_cluster] = 255

#     mask = cv2.resize(
#         mask_small,
#         (frame.shape[1], frame.shape[0]),
#         interpolation=cv2.INTER_NEAREST
#     )

#     # -----------------------------
#     # Morphological cleanup
#     # -----------------------------
#     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
#     mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
#     mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

#     # -----------------------------
#     # Apply mask
#     # -----------------------------
#     result = cv2.bitwise_and(frame, frame, mask=mask)

#     # -----------------------------
#     # Display
#     # -----------------------------
#     cv2.imshow("Original", frame)
#     cv2.imshow("Mallet Mask", mask)
#     cv2.imshow("Segmented Mallet", result)



#     if cv2.waitKey(int(1000 / fps)) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()


In [8]:
# cap = cv2.VideoCapture('Mallet_videos/IMG_9113.MOV')

# if not cap.isOpened():
#     print("Error opening video file. Check the path and file format.")
#     exit()
# else:
#     print("Video opened successfully.")

# desired_width = 640
# desired_height = 1000
# display_size = (desired_width, desired_height)

# fps = cap.get(cv2.CAP_PROP_FPS)

# # CLAHE object for contrast enhancementfinal_mask
# clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

# while cap.isOpened():
#     ret, frame = cap.read()
#     if not ret:
#         break

#     # --------------------------------------
#     # Resize (BGR, 3-channel)
#     # --------------------------------------
#     resized_frame = cv2.resize(
#         frame, display_size, interpolation=cv2.INTER_AREA
#     )
#     h, w = resized_frame.shape[:2]
#     small = cv2.resize(frame, None, fx=0.5, fy=0.5)
#     Z = small.reshape((-1,3))
#     # convert to np.float32
#     Z = np.float32(Z)
#     # define criteria, number of clusters(K) and apply kmeans()
#     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
#     K = 8
#     ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
#     # Now convert back into uint8, and make original image
#     center = np.uint8(center)
#     res = center[label.flatten()]
#     res2 = res.reshape((small.shape))
#     cv2.imshow('Kmeans',res2)
#     # --------------------------------------
#     # Convert to LAB (ONLY cvtColor used)
#     # --------------------------------------
#     lab_img = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2LAB)
#     L, A, B = cv2.split(lab_img)
#     lab_img2= cv2.cvtColor(resized_frame, cv2.COLOR_BGR2HSV)
#     H, S, V = cv2.split(lab_img2)
#     B2, G, R = cv2.split(resized_frame)

#     # --------------------------------------
#     # Single-channel processing starts here
#     # --------------------------------------
#     img1 = cv2.GaussianBlur(B, (7, 7), 0)


#     img2 = gamma_correction(img1, gamma=1.5)
#     img = intensity_normalization(img2)
    
#     alpha = 0.9 # Contrast control (1.0-3.0 range is common for manual adjust)
#     beta = 0    # Brightness control

#     img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
#     img = img.astype(np.uint8)

#     # --------------------------------------
#     # THRESHOLDING (INLINE, SAFE)
#     # --------------------------------------

#     # Binary threshold
#     _, binary = cv2.threshold(
#         img, 140, 255, cv2.THRESH_BINARY
#     )

#     # Otsu threshold
#     _, otsu = cv2.threshold(
#         img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
#     )

#     # Adaptive threshold
#     adaptive = cv2.adaptiveThreshold(
#         img,
#         255,
#         cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
#         cv2.THRESH_BINARY,
#         41,
#         7
#     )

#     # Combine Binary & Otsu
#     combined = cv2.bitwise_and(binary, otsu)

#     # --------------------------------------
#     # Morphology (1-channel only)
#     # --------------------------------------
#     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))

    
    
#     opened = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel, iterations=2)
#     closed = cv2.morphologyEx(opened, cv2.MORPH_OPEN, kernel, iterations=1)
#     # closed = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel, iterations=1)
    
  

#     # Enforce clean binary
#     _,closed = cv2.threshold(closed, 127, 255, cv2.THRESH_BINARY)
    
#     closed = closed.astype(np.uint8)

#     # --------------------------------------
#     # Connected Components (FINAL MASK)
#     # --------------------------------------
#     num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
#         closed, connectivity=8
#     )

#     final_mask = np.zeros_like(closed)
#     for i in range(1, num_labels):
#         area = stats[i, cv2.CC_STAT_AREA]
#         if area >= 100 and area <= 5000:  # Minimum area threshold
#             final_mask[labels == i] = 255   

    

    
    


#     # --------------------------------------
#     # Apply mask to original frame
#     # --------------------------------------
#     masked_output = cv2.bitwise_and(
#         resized_frame, resized_frame, mask=final_mask
#     )

#     # --------------------------------------
#     # Visualization
#     # --------------------------------------
    

        
    

#     cv2.imshow("Original", resized_frame)
#     # cv2.imshow("Binary", binary)
#     # cv2.imshow("Otsu", otsu)
#     # cv2.imshow("combined", combined)
#     # cv2.imshow("Refined", closed)
#     cv2.imshow("Final Mask", final_mask)
#     cv2.imshow("Masked Output", masked_output)


#     if cv2.waitKey(int(1000 / fps)) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()

In [9]:
# K = 2 # Number of clusters (colors)
# # Define criteria: stop after 10 iterations or if the epsilon (accuracy) is 1.0
# criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
# # Open the video file (replace 'input_video.mp4' with your video file path)
# cap = cv2.VideoCapture('Mallet_videos/IMG_9117.MOV')

# if not cap.isOpened():
#     print("Error opening video file. Check the path and file format.")
#     exit()
# else:
#     print("Video opened successfully.")

# desired_width = 640
# desired_height = 1000
# display_size = (desired_width, desired_height)

# fps = cap.get(cv2.CAP_PROP_FPS)

# while cap.isOpened():
#     ret, frame = cap.read()
#     if not ret:
#         break

#     # 1. Reshape the frame for K-Means: from (height, width, 3) to (pixels, 3)
#     pixel_values = frame.reshape((-1, 3))
#     # Convert to float32 data type, which is required by the OpenCV kmeans function
#     pixel_values = np.float32(pixel_values)

#     # 2. Apply K-Means Clustering
#     # 'attempts' is the number of times the algorithm is executed with different initial labels
#     _, labels, centers = cv2.kmeans(pixel_values, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

#     # 3. Reconstruct the segmented image from the centers and labels
#     # Convert centers back to uint8
#     centers = np.uint8(centers)
#     # Map labels to their respective cluster center colors
#     segmented_data = centers[labels.flatten()]
#     # Reshape the data back to the original image dimensions
#     segmented_frame = segmented_data.reshape((frame.shape))

#     # Display the original and segmented frames
#     cv2.imshow('Original Video', frame)
#     cv2.imshow('K-Means Segmented Video', segmented_frame)

#     # Break the loop if 'q' is pressed
#     if cv2.waitKey(int(1000 / fps)) & 0xFF == ord('q'):
#         break

# # Release everything when the job is finished
# cap.release()
# cv2.destroyAllWindows()

In [10]:
# cap = cv2.VideoCapture('Mallet_videos/IMG_9117.MOV')

# if not cap.isOpened():
#     print("Error opening video file. Check the path and file format.")
#     exit()
# else:
#     print("Video opened successfully.")

# desired_width = 640
# desired_height = 1000
# display_size = (desired_width, desired_height)

# fps = cap.get(cv2.CAP_PROP_FPS)

# while cap.isOpened():
#     ret, frame = cap.read()
#     if not ret:
#         break

#     # --------------------------------------
#     # Resize (BGR, 3-channel)
#     # --------------------------------------
#     resized_frame = cv2.resize(
#         frame, display_size, interpolation=cv2.INTER_AREA
#     )
#     hsv = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2HSV)
#     H, S, V = cv2.split(hsv)

#     lower_orange = np.array([5, 80, 80])
#     upper_orange = np.array([25, 255, 255])

#     color_mask = cv2.inRange(hsv, lower_orange, upper_orange)

#     _, sat_mask = cv2.threshold(S, 60, 255, cv2.THRESH_BINARY)
#     color_mask = cv2.bitwise_and(color_mask, sat_mask)


#     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
#     clean = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, kernel, iterations=1)
#     clean = cv2.morphologyEx(clean, cv2.MORPH_CLOSE, kernel, iterations=2)

#     num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(clean, 8)

#     final_mask = np.zeros_like(clean)
#     for i in range(1, num_labels):
#         area = stats[i, cv2.CC_STAT_AREA]
#         if 500 < area < 20000:
#             final_mask[labels == i] = 255

#     edges = cv2.Canny(V, 50, 150)
#     edges = cv2.dilate(edges, None)

#     final_mask = cv2.bitwise_or(final_mask, edges)

#     cv2.imshow("Final Mask", final_mask)
#     if cv2.waitKey(int(1000 / fps)) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()




