In [None]:
# Computer Vision Assignment 1
# Sumanth Hegde
# 2023PAI9041
# Cohort 2





# Answer 1

import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from skimage import io
from sklearn.metrics import mean_squared_error

def kmeans_quantization(image, k):
    # Flatten the image array to make it compatible with KMeans
    pixels = image.reshape((-1, 3))
    
    # Fit KMeans model
    kmeans = KMeans(n_clusters=k, random_state=0).fit(pixels)
    
    # Quantize the image
    quantized_pixels = kmeans.cluster_centers_[kmeans.labels_]
    quantized_image = quantized_pixels.reshape(image.shape)
    
    return quantized_image

def calculate_mse(original_image, quantized_image):
    return mean_squared_error(original_image.flatten(), quantized_image.flatten())

def main(image_path):
    # Read the RGB image
    original_image = io.imread(image_path)
    
    # Display original image
    plt.figure(figsize=(8, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(original_image)
    plt.title('Original Image')
    plt.axis('off')
    
    # Convert RGB image to quantized image for various k values
    k_values = [1, 2, 4, 8, 16]
    mse_values = []
    quantized_images = []
    for k in k_values:
        quantized_image = kmeans_quantization(original_image, k)
        quantized_images.append(quantized_image)
        mse = calculate_mse(original_image, quantized_image)
        mse_values.append(mse)
    print ( mse_values)  
    
    # Display quantized images
    for i, (k, quantized_image) in enumerate(zip(k_values, quantized_images), start=2):
        plt.subplot(2, len(k_values), i)
        plt.imshow(quantized_image.astype(np.uint8))
        plt.title(f'k={k}\nMSE={mse_values[i-2]:.2f}')
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# Let us use the 2nd image 
image_path = 'F:\RGB image 2.jpg'
main(image_path)


K-means clustering is a widely used method for image quantization, which involves reducing the number of colors in an image to a specified number of clusters (k).



Our observation - 

From the above output of the code we can see that, as the value of k increases, MSE decreases - this suggests that we should go for a high value of k. But, if the value of k is too high, it may not provide the desired compression benefits. Therefore, selection of the optimal value of k is very important.
Also, the overall values of MSE (for the above k values = 1,2,4,8,16) is in general, high. This suggests that we can go for other methods for image quantization (such as - Median Cut Algorithm, Octree Quantization, Uniform Quantization, Neural Networks Error Diffusion (Dithering) etc etc)



Effectiveness of K-Means Image Quantization - 

1) Color Reduction Efficiency - K-means is highly effective at reducing the number of colors to a predefined number of k colors. This can dramatically decrease the image size while retaining the visual essence of the original image.
2) Adaptability - K-means adapts to the specific color distribution of the image, ensuring that the selected colors (centroids of the clusters) are representative of the original image palette.
3) MSE Reduction: In terms of MSE, k-means aims to minimize the within-cluster variances, which directly contributes to reducing the MSE between the original and the quantized image.


Limitations of K-Means Image Quantization -

1) Selection of K - Too small a value of k may lead to a high MSE due to oversimplification of the color palette, while too large a value may not provide the desired compression benefits. Finding the optimal k often requires domain knowledge or experimentation.
2) Initial Centroid Sensitivity - K-means is sensitive to the initial placement of centroids. 
3) Local Minima - The algorithm may converge to a local minimum rather than the global minimum, which can cause a high MSE
4) Computationally Intensive - For large images or high values of k, k-means can be computationally intensive due to the iterative nature of the algorithm. This can be a limitation in scenarios requiring real-time processing.
5) Uniformity Assumption - K-means assumes uniform densities within clusters, which might not be ideal for all images.

In [None]:
pip install numpy opencv-python


In [None]:
# Answer 2


import cv2
import numpy as np

# Load the image
image_path = "F:\connected component.jpg"  
original_image = cv2.imread(image_path)

# Convert to grayscale
gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)

# Apply binary thresholding to create a binary image
_, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)

# Find connected components ---- we will use '8 connectivity'
num_labels, labels_im, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8, ltype=cv2.CV_32S)

# Create an output image to draw the labels
output_image = np.zeros((original_image.shape), np.uint8)

# Assign unique colors to components
for label in range(0, num_labels):  
    mask = labels_im == label
    output_image[mask] = np.random.randint(0, 255, size=(3,))

    # We can also draw component stats or centroids as below
    # For example, to draw bounding boxes around components:
    x, y, w, h, area = stats[label]
    cv2.rectangle(output_image, (x, y), (x+w, y+h), (0, 255, 0), 2)

# Show the original and output images
cv2.imshow("Original Image", original_image)
cv2.imshow("Connected Components", output_image)

cv2.waitKey(0)
cv2.destroyAllWindows()
    

In [None]:
# Answer 3


import cv2
import numpy as np
import matplotlib.pyplot as plt

lena_gray_512_path = "F:\lena_gray_512.tif"
lena_gray_256_path = "F:\lena_gray_256.tif"

# Load the original Lena image
lena_gray_512 = cv2.imread(lena_gray_512_path, cv2.IMREAD_GRAYSCALE)

# Downsample the Lena image to 256x256
reduce_lena_256 = cv2.resize(lena_gray_512, (256, 256))

# Save the reduced image
reduce_lena_256_path = "F:\reduce_lena_256.tif"
cv2.imwrite(reduce_lena_256_path, reduce_lena_256)

# Load the reference image
lena_gray_256 = cv2.imread(lena_gray_256_path, cv2.IMREAD_GRAYSCALE)

# Check if images are loaded properly
if lena_gray_256 is None or reduce_lena_256 is None:
    print("Error: Unable to load images.")
else:
    # Compute PSNR between the reduced and reference images
    psnr = cv2.PSNR(lena_gray_256, reduce_lena_256)
    print("PSNR between reduced_lena_256.tif and lena_gray_256.tif:", psnr)

    # Compute MSE between the reduced and reference images
    mse = np.mean((lena_gray_256 - reduce_lena_256) ** 2)
    print("Mean Squared Error (MSE) between the images:", mse)

    # Plot the difference between the images
    plt.imshow(lena_gray_256 - reduce_lena_256, cmap='gray')
    plt.title('MSE Difference between Images')
    plt.colorbar()
    plt.show()

    # Resize the reduced image to original dimensions using different interpolation methods
    nearest_neighbour = cv2.resize(reduce_lena_256, (512, 512), interpolation=cv2.INTER_NEAREST)
    bilinear = cv2.resize(reduce_lena_256, (512, 512), interpolation=cv2.INTER_LINEAR)
    bicubic = cv2.resize(reduce_lena_256, (512, 512), interpolation=cv2.INTER_CUBIC)

    # Display the resized images
    fig, axs = plt.subplots(2, 2, figsize=(10, 10))

    axs[0, 0].imshow(lena_gray_512, cmap='gray')
    axs[0, 0].set_title('Original Image')

    axs[0, 1].imshow(nearest_neighbour, cmap='gray')
    axs[0, 1].set_title('Nearest Neighbour Interpolation')

    axs[1, 0].imshow(bilinear, cmap='gray')
    axs[1, 0].set_title('Bilinear Interpolation')

    axs[1, 1].imshow(bicubic, cmap='gray')
    axs[1, 1].set_title('Bicubic Interpolation')

    for ax in axs.flat:
        ax.axis('off')

    plt.tight_layout()
    plt.show()


In [None]:
# Answer 4


import cv2
import matplotlib.pyplot as plt
import numpy as np

def apply_filters(image_path):
    # Read the noisy image
    noisy_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    if noisy_image is None:
        print("Error: Could not read the image.")
        return

   # Apply multiple noise reduction filters sequentially
    filtered_image = cv2.GaussianBlur(noisy_image, (3, 3), 9)
    filtered_image = cv2.medianBlur(filtered_image, 15)
    filtered_image = cv2.bilateralFilter(filtered_image, 9, 50, 50)

    # Apply unsharp masking for overall sharpness enhancement
    unsharp_mask = cv2.addWeighted(filtered_image, 1.05, noisy_image, -0.10, 0)

    # Apply histogram equalization for contrast enhancement
    equalized_image = cv2.equalizeHist(unsharp_mask)

    # Display input, filtered, sharpened, and equalized images side-by-side
    plt.figure(figsize=(20, 5))

    # Input (noisy) image
    plt.subplot(1, 4, 1)
    plt.imshow(noisy_image, cmap='gray')
    plt.title('Input Image')
    plt.axis('off')

    # Filtered image
    plt.subplot(1, 4, 2)
    plt.imshow(filtered_image, cmap='gray')
    plt.title('Filtered Image')
    plt.axis('off')

    # Sharpened image (unsharp masking)
    plt.subplot(1, 4, 3)
    plt.imshow(unsharp_mask, cmap='gray')
    plt.title('Sharpened Image (Unsharp Masking)')
    plt.axis('off')

    # Equalized image
    plt.subplot(1, 4, 4)
    plt.imshow(equalized_image, cmap='gray')
    plt.title('Equalized Image')
    plt.axis('off')

    plt.show()

# Specify the image path
image_path = "F:/Noisy image.jpg"
apply_filters(image_path)


Let us see about each of the filters used in the above code - 

1. Gaussian Blur Filter - Reduces Gaussian noise and smooths out details in the image.

2. Median Blur Filter - Removes salt-and-pepper noise, which appears as isolated bright and dark pixels in the image.

3. Bilateral Filter - Preserves edges while reducing noise by applying a non-linear filter based on both spatial distance and                         intensity difference.

4. Unsharp Masking - Enhances overall image sharpness by emphasizing edges and details.

5. Histogram Equalization - Enhances the contrast of the image by redistributing pixel intensity values to cover the entire                                 dynamic range more evenly.

In [None]:
# Answer 5


import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the image
image_path = "F:\lake.tif"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

# Compute first order derivative along 'x'
dx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)

# Compute first order derivative along 'y'
dy = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)

# Compute gradient image magnitude
gradient_magnitude = cv2.magnitude(dx, dy)

# Thresholding to output edge map
threshold_value = 50
_, edge_map_first_order = cv2.threshold(gradient_magnitude, threshold_value, 255, cv2.THRESH_BINARY)

# Edge map computed using second order derivative
laplacian = cv2.Laplacian(image, cv2.CV_64F)
_, edge_map_second_order = cv2.threshold(np.uint8(np.absolute(laplacian)), threshold_value, 255, cv2.THRESH_BINARY)

# Edge map using LoG (Laplacian of Gaussian)
# Apply Gaussian blur
image_blurred = cv2.GaussianBlur(image, (3, 3), 0)
# Apply Laplacian
log = cv2.Laplacian(image_blurred, cv2.CV_64F)
_, edge_map_log = cv2.threshold(np.uint8(np.absolute(log)), threshold_value, 255, cv2.THRESH_BINARY)

# Canny edge detector
canny_edge = cv2.Canny(image, 100, 200)

# Display the results
plt.figure(figsize=(15, 12))

plt.subplot(2, 3, 1)
plt.imshow(image, cmap='gray')
plt.title('Original Image')
plt.axis('off')

plt.subplot(2, 3, 2)
plt.imshow(np.uint8(dx), cmap='gray')
plt.title('First Order Derivative along x')
plt.axis('off')

plt.subplot(2, 3, 3)
plt.imshow(np.uint8(dy), cmap='gray')
plt.title('First Order Derivative along y')
plt.axis('off')

plt.subplot(2, 3, 4)
plt.imshow(np.uint8(gradient_magnitude), cmap='gray')
plt.title('Gradient Image Magnitude')
plt.axis('off')

plt.subplot(2, 3, 5)
plt.imshow(edge_map_first_order, cmap='gray')
plt.title('Edge Map First Order')
plt.axis('off')

plt.subplot(2, 3, 6)
plt.imshow(edge_map_second_order, cmap='gray')
plt.title('Edge Map Second Order')
plt.axis('off')

plt.show()

# Display additional edge maps

plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.imshow(edge_map_log, cmap='gray')
plt.title('Edge Map LoG')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(canny_edge, cmap='gray')
plt.title('Canny Edge Detector')
plt.axis('off')

plt.show()


In [None]:
# Answer 6 


import numpy as np

# Define the image data
image_data = np.array([
    [167, 144, 159],
    [140, 135, 154],
    [135, 148, 148]
], dtype=np.uint8)

# Get the image dimensions
height, width = image_data.shape

# Create an empty list to store the bit planes
bit_planes = []

# Loop through each bit plane (from MSB to LSB)
for i in range(8):
    # Create an empty bit plane
    bit_plane = np.zeros((height, width), dtype=np.uint8)

    # Extract the current bit from each pixel and set the corresponding bit in the bit plane
    for y in range(height):
        for x in range(width):
            # Extract the current bit using bitwise AND operation with 2^i (left shift i by 1)
            bit_value = (image_data[y, x] >> i) & 1

            # Set the corresponding bit in the bit plane
            bit_plane[y, x] = bit_value * 255

    # Append the bit plane to the list
    bit_planes.append(bit_plane)

# Print the bit planes
for i, bit_plane in enumerate(bit_planes):
    print(f"Bit plane {i+1}:")
    print(bit_plane)


This code performs bit-plane slicing, separating the image into its constituent bit planes, and prints each bit plane. Each bit plane represents the image's intensity at a particular bit depth.



Let us understand the above code, step-by-step :

1. Import NumPy: 'import numpy as np' imports the NumPy library under the alias 'np'. 

2. Define the image data: The image data is defined as a 3x3 NumPy array with pixel intensity values ranging from 0 to 255. Each pixel represents grayscale intensity, where 0 is black and 255 is white.

3. Get the image dimensions: 'height, width = image_data.shape' retrieves the height and width of the image data array.

4. Create an empty list to store the bit planes: 'bit_planes = []' initializes an empty list where the bit planes of the image will be stored.

5. Loop through each bit plane (from MSB to LSB): A loop iterates from 0 to 7, representing the bit positions from Most Significant Bit (MSB) to Least Significant Bit (LSB).

6. Create an empty bit plane: 'bit_plane = np.zeros((height, width), dtype=np.uint8)' creates an empty bit plane of the same size as the input image, initialized with zeros.

7. Extract the current bit from each pixel and set the corresponding bit in the bit plane: Nested loops iterate over each pixel in the image.

8. Extract the current bit: '(image_data[y, x] >> i) & 1' shifts the pixel intensity value 'image_data[y, x]' by 'i' bits to the right and performs a bitwise AND operation with 1. This extracts the ith bit from the pixel intensity value.

9. Set the corresponding bit in the bit plane: The extracted bit value is multiplied by 255 to set it to either 0 or 255 (representing black or white) and assigned to the corresponding location in the bit plane.

10. Append the bit plane to the list: 'bit_planes.append(bit_plane)' adds the generated bit plane to the list of bit planes.

11. Print the bit planes: The code iterates over the generated bit planes and prints them one by one, along with their corresponding bit plane index.