Classify images in bag using trained CNN
==============================================================

This notebook looks at all the images in a bagfile and clasifies them. It needs prespecified target variables. If you don't have any you can select a target with the target select notebook.

In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import quaternion
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from torch import nn, optim
from torchvision import datasets, models, transforms

import torch
from lightglue import SuperPoint
from lightglue.utils import load_image

import scripts.query_image as query_image
import scripts.save_patch as save_patch
import scripts.divide_data as divide_data
import scripts.points_and_polygons as points_and_polygons


In [2]:
# Parameters
ros_topic_pose = "/gnc/ekf".replace("/", "_")[1:]
ros_topic_image = "/hw/cam_sci/info".replace("/", "_")[1:]

# Target Pose
target_position = np.array([-0.84, 0.6, -0.81])
target_attitude = quaternion.from_euler_angles(0, 0, np.radians(180))  # order is roll, pitch, yaw

# Query Contraints
max_distance = 1.0
min_distance = 0.2
max_angle = 30

# Target size, the target is pointed to in the x-axis
target_size_y = 0.05
target_size_z = 0.07

base_image_path = 'data/bags/2024-03-21_tim/bsharp/Fixed/isaac_sci_cam_image_delayed/1711061089.228.jpg'
image_path = 'data/bags/2024-03-21_tim/bsharp/Fixed/isaac_sci_cam_image_delayed/'
bag = '20240321_2254_survey_bsharp7_1.fix_all.bag'
target_corners = [[(1838, 1531), (2050, 1531), (1838, 1683), (2050, 1694)]]

In [3]:
result = query_image.query_image_of_bag(target_position, target_attitude, ros_topic_pose, ros_topic_image, max_distance, min_distance, max_angle, target_size_y, target_size_z, bag) 

Connected to isaac database
From database got 35 matches
From first filtering got 25 matches
Query successful, got 25 matches


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check if Nvidia CUDA is supported by the gpu otherwise set device to cpu
base_image = load_image(base_image_path)
extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)  # Load Superpoint as the extractor
feats_base_image = extractor.extract(base_image.to(device))

In [5]:
classification_dictionary = {}
corners = points_and_polygons.identify_corners(target_corners)

# Go through all the images in the bag file and store the classification in a dictionary.
for idx, element in enumerate(result): 
    
    transformed_image = save_patch.match_images_and_transform(base_image_path, image_path + element['img'], feats_base_image)
    extracted_image_patch = save_patch.extract_image(transformed_image, [corners[0]['A'], corners[0]['B'], corners[0]['D'], corners[0]['C']])
                                            
    
    # Parameters
    classes = ['off', 'on']  # specify the image classes
    model_name = 'switch_model_cnn.pt'  # saved model name
    
    
    # Open image
    image = Image.fromarray(extracted_image_patch)
    
    # Open model
    model = models.densenet121(weights='DenseNet121_Weights.DEFAULT')
    model.classifier = nn.Sequential(
        nn.Linear(1024, 256),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(256, 2),
        nn.LogSoftmax(dim=1),
    )
    model.load_state_dict(torch.load('switch_model_cnn.pt'))
    model.eval()
    
    # Classify Image!
    test_transforms = transforms.Compose(
        [
            transforms.Resize(256),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
    image_tensor = test_transforms(image).float()
    image_tensor = image_tensor.unsqueeze_(0)
    output = model(image_tensor)
    
    # Apply softmax to get probabilities
    probabilities = torch.nn.functional.softmax(output, dim=1)
    
    # Get the predicted class and its probability
    _, predicted = torch.max(probabilities, 1)
    confidence = probabilities[0][predicted.item()].item()

    classification_dictionary.update({element['img']: classes[predicted.item()]})

In [6]:
print(classification_dictionary)

{'1711061763.930.jpg': 'on', '1711061764.596.jpg': 'on', '1711061765.480.jpg': 'on', '1711061765.897.jpg': 'on', '1711061766.355.jpg': 'on', '1711061766.814.jpg': 'on', '1711061767.228.jpg': 'on', '1711061767.646.jpg': 'on', '1711061768.056.jpg': 'on', '1711061768.557.jpg': 'on', '1711061769.035.jpg': 'on', '1711061769.627.jpg': 'on', '1711061770.228.jpg': 'on', '1711061770.889.jpg': 'on', '1711061771.349.jpg': 'on', '1711061771.777.jpg': 'on', '1711061772.410.jpg': 'on', '1711061772.826.jpg': 'on', '1711061773.284.jpg': 'on', '1711061773.960.jpg': 'on', '1711061774.668.jpg': 'on', '1711061775.167.jpg': 'on', '1711061775.784.jpg': 'on', '1711061776.409.jpg': 'on', '1711061776.826.jpg': 'on'}
