[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nils-holmberg/socs-qmd/blob/main/jnb/lab5_cv2.ipynb)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# image dataset

In [None]:
!gdown https://drive.google.com/uc?id=1sBsckDIyQJ-zTsEpjCntYMIJcbBpF3wk

In [None]:
!unzip content-images.zip

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

# For demonstration, creating a sample grayscale image
#image = np.random.randint(0, 256, (200, 200), dtype=np.uint8)
# Read the image using OpenCV
image_path = 'images/ibs-92.jpg'  # Replace with your image path
image = cv2.imread(image_path)

# Initialize ORB detector
orb = cv2.ORB_create()

# Detect ORB keypoints and descriptors in the grayscale version of the image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
keypoints, descriptors = orb.detectAndCompute(gray_image, None)

# Draw keypoints on the original color image
orb_image = cv2.drawKeypoints(image, keypoints, None, color=(0, 255, 0), flags=0)

# Display the color image with ORB features
plt.figure(figsize=(6, 6))
plt.imshow(cv2.cvtColor(orb_image, cv2.COLOR_BGR2RGB))
plt.title('ORB Features on Color Image')
plt.axis('off')
plt.show()


# color clustering

In [None]:
from sklearn.cluster import KMeans
from collections import Counter
from matplotlib import pyplot as plt
import cv2

In [None]:
# Utility function, rgb to hex
def rgb2hex(rgb):
    hex = "#{:02x}{:02x}{:02x}".format(int(rgb[0]), int(rgb[1]), int(rgb[2]))
    return hex
print(rgb2hex([255, 0, 0]))

In [None]:
def image_color_cluster(path, k=6):
    # load image
    img_bgr = cv2.imread(path)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    # resize image to speed up processing time
    resized_img_rgb = cv2.resize(img_rgb, (64, 64), interpolation=cv2.INTER_AREA)
    resized_img_rgb = img_rgb

    # reshape the image to be a list of pixels
    img_list = resized_img_rgb.reshape((resized_img_rgb.shape[0] * resized_img_rgb.shape[1], 3))

    # cluster the pixels and assign labels
    clt = KMeans(n_clusters=k)
    labels = clt.fit_predict(img_list)

    # count labels to find most popular
    label_counts = Counter(labels)
    total_count = sum(label_counts.values())

    # subset out most popular centroid
    center_colors = list(clt.cluster_centers_)
    ordered_colors = [center_colors[i]/255 for i in label_counts.keys()]
    color_labels = [rgb2hex(ordered_colors[i]*255) for i in label_counts.keys()]

    #print(label_counts.values())
    #print(color_labels)

    # plots
    plt.figure(figsize=(14, 8))
    #plt.subplot(221)
    #plt.imshow(img_rgb)
    #plt.axis('off')

    #plt.subplot(222)
    plt.pie(label_counts.values(), labels=color_labels, colors=ordered_colors, startangle=90)
    plt.axis('equal')
    plt.show()

In [None]:
image_path = 'images/ibs-92.jpg'  # Replace with your image path
image_color_cluster(image_path, k=10)

# video dataset

In [None]:
!gdown https://drive.google.com/uc?id=1T9S32UwmDnUd6YbxRYfyG7-gud8jh2Y_


In [None]:
import cv2
import os

# Path to the video file
video_path = 'content-video.mp4'

# Create a VideoCapture object
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Directory to save frames
save_dir = 'images-video'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

frame_idx = 0
while True:
    ret, frame = cap.read()

    # Break the loop if there are no more frames
    if not ret:
        break

    # Save every 100th frame
    if frame_idx % 100 == 0:
        # Filename with 5 leading zeroes
        save_path = os.path.join(save_dir, f'frame_{frame_idx:05d}.jpg')
        cv2.imwrite(save_path, frame)
        print(f'Saved frame {frame_idx:05d}')

    frame_idx += 1

# Release the VideoCapture object
cap.release()


# image ocr

In [None]:
!pip install -q easyocr


In [None]:
import easyocr

In [None]:
#reader = easyocr.Reader(['ch_tra', 'en', 'sv'])
reader = easyocr.Reader(['en'])

In [None]:
result = reader.readtext('images-video/frame_00400.jpg')
result

In [None]:
reader.readtext('images-video/frame_00400.jpg', detail=0)

# image normalization

In [None]:
import cv2
import os
import numpy as np

def resize_and_crop(img, size):
    # Resize image to maintain aspect ratio
    h, w, _ = img.shape
    if h > w:
        new_h, new_w = size * h / w, size
    else:
        new_h, new_w = size, size * w / h

    new_h, new_w = int(new_h), int(new_w)
    resized_img = cv2.resize(img, (new_w, new_h))

    # Crop the center of the image
    startx = new_w//2 - size//2
    starty = new_h//2 - size//2
    return resized_img[starty:starty+size, startx:startx+size]

def normalize_image(img):
    # Normalize pixel values to [0, 1]
    return img / 255.0

directory = 'images-video'
output_directory = 'images-normalize'

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for filename in os.listdir(directory):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
#    if filename.endswith('.jpg'):
        img = cv2.imread(os.path.join(directory, filename))
        img = resize_and_crop(img, 256)
        normalized_img = normalize_image(img)

        # Convert the normalized image back to 8-bit format
        img_to_save = (normalized_img * 255).astype(np.uint8)

        # Save the normalized image
        output_path = os.path.join(output_directory, filename)
        cv2.imwrite(output_path, img_to_save)


# color histograms

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Read the image
image = cv2.imread('images/ibs-92.jpg')

# Calculate color histograms for each channel
hist_r = cv2.calcHist([image], [0], None, [256], [0, 256])
hist_g = cv2.calcHist([image], [1], None, [256], [0, 256])
hist_b = cv2.calcHist([image], [2], None, [256], [0, 256])

# Normalize histograms
hist_r /= hist_r.sum()
hist_g /= hist_g.sum()
hist_b /= hist_b.sum()

# Create subplots for the original image and histograms
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Plot the original image
axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
axes[0].set_title('Original Image')
axes[0].axis('off')

# Plot the color histograms using Seaborn
sns.lineplot(x=np.arange(256), y=hist_r.squeeze(), color='red', ax=axes[1], label='Red')
sns.lineplot(x=np.arange(256), y=hist_g.squeeze(), color='green', ax=axes[1], label='Green')
sns.lineplot(x=np.arange(256), y=hist_b.squeeze(), color='blue', ax=axes[1], label='Blue')
axes[1].set_title('Color Histograms')
axes[1].set_xlabel('Pixel Value')
axes[1].set_ylabel('Frequency')
axes[1].legend()

# Show the plots
plt.tight_layout()
plt.show()


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def compute_histogram(image, bins=256):
    """Compute the color histogram for an image."""
    histogram = [cv2.calcHist([image], [i], None, [bins], [0, 256]) for i in range(3)]
    return np.concatenate(histogram).flatten()

def calculate_similarity(hist_list):
    """Calculate histogram similarity matrix."""
    num_images = len(hist_list)
    similarity_matrix = np.zeros((num_images, num_images))

    for i in range(num_images):
        for j in range(num_images):
            similarity = cv2.compareHist(hist_list[i], hist_list[j], cv2.HISTCMP_CORREL)
            similarity_matrix[i, j] = similarity

    return similarity_matrix

# Directory containing images
image_directory = 'images'  # Replace with your directory path
image_directory = 'images-normalize'  # Replace with your directory path

# Load and process images
image_files = [f for f in os.listdir(image_directory) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
histograms = []

for file in image_files:
    image_path = os.path.join(image_directory, file)
    image = cv2.imread(image_path)
    if image is not None:
        hist = compute_histogram(image)
        histograms.append(hist)

# Calculate the similarity matrix
similarity_matrix = calculate_similarity(histograms)
similarity_matrix = np.around(similarity_matrix, decimals=1)

# Plotting the similarity matrix
plt.figure(figsize=(10, 8))
#sns.heatmap(similarity_matrix, annot=True, cmap='coolwarm')
sns.heatmap(similarity_matrix, annot=False, cmap='coolwarm')
plt.title('Image Histogram Similarity Matrix')
plt.xlabel('Image Index')
plt.ylabel('Image Index')
plt.show()


In [None]:
#!rm images-normalize/ibs-*

In [None]:
import os
import cv2
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage

# Function to compute the color histogram and similarity matrix
# ... (same functions `compute_histogram` and `calculate_similarity` as before)

# Directory containing images
image_directory = 'images'  # Replace with your directory path
image_directory = 'images-normalize'  # Replace with your directory path

# Load and process images
image_files = [f for f in os.listdir(image_directory) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
histograms = []

for file in image_files:
    image_path = os.path.join(image_directory, file)
    image = cv2.imread(image_path)
    if image is not None:
        hist = compute_histogram(image)
        histograms.append(hist)

# Calculate the similarity matrix
similarity_matrix = calculate_similarity(histograms)

# Perform hierarchical clustering
Z = linkage(1 - similarity_matrix, method='average')

# Plot dendrogram
plt.figure(figsize=(12, 8))
dendrogram(Z, labels=image_files, orientation='right')
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Distance')
plt.ylabel('Image')
plt.show()


# clustering, explorative

In [None]:
!gdown https://drive.google.com/uc?id=1U3yly6qUlBBcWYfkMlRpZ47I6DjcRUna

In [None]:
!unzip mnist-42k.zip

In [None]:
!pip install -q supervision umap-learn

In [None]:
import base64

def image_to_data_uri(image_path):
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    return "data:image/jpeg;base64," + encoded_image

In [None]:
import os
import cv2
import supervision as sv
import numpy as np

#SOURCE_DIR = os.path.join(dataset.location, "train")
SOURCE_DIR = os.path.join("MNIST-42000-images-1", "train")
PER_CLASS_IMAGE_COUNT = 500

labels = []
train = []
images = []
image_paths = []

class_ids = sorted(os.listdir(SOURCE_DIR))

for class_id in class_ids:
    source_subdir = os.path.join(SOURCE_DIR, class_id)
    for image_path in sv.list_files_with_extensions(source_subdir)[:PER_CLASS_IMAGE_COUNT]:
        image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
        labels.append(class_id)
        images.append(image)
        image_paths.append(str(image_path))
        train.append(image.flatten())

# class associated with image
labels = np.array(labels)
# features extracted from image
train = np.array(train)
# local image path
image_paths = np.array(image_paths)
# cached images
image_data_uris = {path: image_to_data_uri(path) for path in image_paths}

In [None]:
import random
images_sample = random.sample(images, 9)
sv.plot_images_grid(images_sample, grid_size=(3, 3))

In [None]:
from sklearn.manifold import TSNE
import time

start = time.time()
tsne = TSNE(n_components = 3, random_state=0)
projections = tsne.fit_transform(train)
end = time.time()
print(f"generating projections with T-SNE took: {(end-start):.2f} sec")

In [None]:
import plotly.graph_objects as go
import plotly.express as px
import numpy as np

from typing import Dict
from pathlib import Path
from IPython.core.display import display, HTML


def display_projections(
    labels: np.ndarray,
    projections: np.ndarray,
    image_paths: np.ndarray,
    image_data_uris: Dict[str, str],
    show_legend: bool = False,
    show_markers_with_text: bool = True
) -> None:
    # Create a separate trace for each unique label
    unique_labels = np.unique(labels)
    traces = []
    for unique_label in unique_labels:
        mask = labels == unique_label
        customdata_masked = image_paths[mask]
        trace = go.Scatter3d(
            x=projections[mask][:, 0],
            y=projections[mask][:, 1],
            z=projections[mask][:, 2],
            mode='markers+text' if show_markers_with_text else 'markers',
            text=labels[mask],
            customdata=customdata_masked,
            name=str(unique_label),
            marker=dict(size=8),
            hovertemplate="<b>class: %{text}</b><br>path: %{customdata}<extra></extra>"
        )
        traces.append(trace)

    # Create the 3D scatter plot
    fig = go.Figure(data=traces)
    fig.update_layout(
        scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'),
        width=1000,
        height=1000,
        showlegend=show_legend
    )

    # Convert the chart to an HTML div string and add an ID to the div
    plotly_div = fig.to_html(full_html=False, include_plotlyjs=False, div_id="scatter-plot-3d")

    # Define your JavaScript code for copying text on point click
    javascript_code = f"""
    <script>
        function displayImage(imagePath) {{
            var imageElement = document.getElementById('image-display');
            var placeholderText = document.getElementById('placeholder-text');
            var imageDataURIs = {image_data_uris};
            imageElement.src = imageDataURIs[imagePath];
            imageElement.style.display = 'block';
            placeholderText.style.display = 'none';
        }}

        // Get the Plotly chart element by its ID
        var chartElement = document.getElementById('scatter-plot-3d');

        // Add a click event listener to the chart element
        chartElement.on('plotly_click', function(data) {{
            var customdata = data.points[0].customdata;
            displayImage(customdata);
        }});
    </script>
    """

    # Create an HTML template including the chart div and JavaScript code
    html_template = f"""
    <!DOCTYPE html>
    <html>
        <head>
            <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
            <style>
                #image-container {{
                    position: fixed;
                    top: 0;
                    left: 0;
                    width: 200px;
                    height: 200px;
                    padding: 5px;
                    border: 1px solid #ccc;
                    background-color: white;
                    z-index: 1000;
                    box-sizing: border-box;
                    display: flex;
                    align-items: center;
                    justify-content: center;
                    text-align: center;
                }}
                #image-display {{
                    width: 100%;
                    height: 100%;
                    object-fit: contain;
                }}
            </style>
        </head>
        <body>
            {plotly_div}
            <div id="image-container">
                <img id="image-display" src="" alt="Selected image" style="display: none;" />
                <p id="placeholder-text">Click on a data entry to display an image</p>
            </div>
            {javascript_code}
        </body>
    </html>
    """

    # Display the HTML template in the Jupyter Notebook
    display(HTML(html_template))

In [None]:
display_projections(
    labels=labels,
    projections=projections,
    image_paths=image_paths,
    image_data_uris=image_data_uris
)

In [None]:
import umap
import time

start = time.time()
projections = umap.UMAP(n_components=3).fit_transform(train)
end = time.time()
print(f"generating projections with UMAP took: {(end-start):.2f} sec")

In [None]:
display_projections(
    labels=labels,
    projections=projections,
    image_paths=image_paths,
    image_data_uris=image_data_uris
)

# image embeddings

In [None]:
from transformers import CLIPModel, CLIPProcessor
from PIL import Image

# Step 1: Install the transformers library (if not already installed)
# pip install transformers

# Step 2: Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Step 3: Load and process your image
image = Image.open("images-normalize/frame_00000.jpg")  # Replace with your image path
inputs = processor(images=image, return_tensors="pt")

# Step 4: Generate image embeddings
embeddings = model.get_image_features(**inputs)

# The 'embeddings' variable now contains the image embeddings
type(embeddings)

In [None]:
# Step 1: Convert the tensor to a NumPy array
# If the tensor is on GPU, move it to CPU first
embeddings_np = embeddings.cpu().detach().numpy()

# Step 2: Create a Pandas DataFrame
df = pd.DataFrame(embeddings_np)

# Now 'df' is a Pandas DataFrame containing your embeddings
df.head()

In [None]:
from PIL import Image
import requests

from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)

outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities

print(probs)

# image object detection

In [None]:
!pip install -q transformers
!pip install -q timm


In [None]:
from transformers import pipeline
model = pipeline("object-detection")

In [None]:
import glob

rows_list = []
for img in sorted(glob.glob('images-video/frame_*.jpg')):
  dict1 = {}
  res = model(img)
  dict1.update({"image": img, "result": res})
  rows_list.append(dict1)

df = pd.DataFrame(rows_list)
df.head()

In [None]:
rows_list = []
for index, row in df.iterrows():
  for i in row.result:
    dict1 = {}
    dict1.update({"image":row.image})
    dict1.update(i)
    rows_list.append(dict1)

df_res = pd.DataFrame(rows_list)
df_res.head()

In [None]:
df_res.label.value_counts().plot(kind='bar')

In [None]:
df_res[df_res.image=="images-video/frame_00100.jpg"]

In [None]:
import cv2

img = cv2.imread('images-video/frame_00100.jpg')
for index,row in df_res[df_res.image=="images-video/frame_00100.jpg"].iterrows():
  x1,y1,x2,y2 = list(row.box.values())
  cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

from google.colab.patches import cv2_imshow
cv2_imshow(img)
#cv2.imshow("display", img)
#cv2.imwrite("objects.png", img)

# image content inference

In [None]:
!pip install -q mediapipe

In [None]:
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, model_complexity=1, enable_segmentation=True)
mp_drawing = mp.solutions.drawing_utils

# Read an image
image_path = 'images-video/frame_02800.jpg'  # Replace with the path to your image
image_path = 'images-video/frame_00400.jpg'  # Replace with the path to your image
image = cv2.imread(image_path)

# Check if image is loaded
if image is None:
    print("Error: Image not found.")
else:
    # Convert the BGR image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Process the image and find the pose
    results = pose.process(image_rgb)

    # Draw pose landmarks on the image
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # Convert back to BGR for displaying with OpenCV
    image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Display the image
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.show()

# Release resources
pose.close()
