In [128]:
# Imports 
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from pycocotools.coco import COCO
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cv2
import torch.optim as optim
import random
from PIL import Image
from matplotlib import patches
from torchvision.transforms import functional as F
import os
import pandas as pd
import math
import torch.nn.functional as F  # Make sure this import is here
import torch.nn as nn
import shutil  # Used for copying files
from torchvision.transforms import ToTensor

# Open the csv file, get all the image name and let the faster RCNN predict what the class it is and stuff
# Create a coco dataset with all the images and bounding box
# Use the images and bounding box to predict the lat lon for different model

In [155]:
## This is required for visualization of the images with bounding box
# class COCODataset(Dataset):
#     def __init__(self, annotation_file, image_dir, transforms=None):
#         self.coco = COCO(annotation_file)
#         self.image_dir = image_dir
#         self.transforms = transforms
#         self.ids = list(self.coco.imgs.keys())  # List of image IDs

#     def __getitem__(self, index):
#         # Get the image ID and load the associated annotations
#         img_id = self.ids[index]
#         ann_ids = self.coco.getAnnIds(imgIds=img_id)
#         anns = self.coco.loadAnns(ann_ids)
#         image_info = self.coco.loadImgs(img_id)[0]
#         path = image_info['file_name']

#         # Load the image using OpenCV
#         img = cv2.imread(os.path.join(self.image_dir, path))
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#         num_objs = len(anns)
#         boxes = []
#         labels = []
        
#         # Extract the bounding boxes and category labels
#         for i in range(num_objs):
#             xmin = anns[i]['bbox'][0]
#             ymin = anns[i]['bbox'][1]
#             xmax = xmin + anns[i]['bbox'][2]
#             ymax = ymin + anns[i]['bbox'][3]
#             boxes.append([xmin, ymin, xmax, ymax])
#             labels.append(anns[i]['category_id'])

#         # Convert boxes and labels to tensors
#         boxes = torch.as_tensor(boxes, dtype=torch.float32)
#         if boxes.ndim == 1:
#             boxes = boxes.unsqueeze(0)
#         labels = torch.as_tensor(labels, dtype=torch.int64)
#         image_id = torch.tensor([img_id])
        
#         # Calculate the area of the boxes
#         if boxes.size(0) > 0:  # Check if there are any boxes
#             area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
#         else:
#             area = torch.tensor([])

#         # Set the crowd flag to 0 (no crowd annotations in this case)
#         iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

#         # Build the target dictionary
#         target = {}
#         target["boxes"] = boxes
#         target["labels"] = labels
#         target["image_id"] = image_id
#         target["area"] = area
#         target["iscrowd"] = iscrowd

#         # Apply transformations if specified
#         if self.transforms:
#             img = self.transforms(img)

#         return img, target

#     def __len__(self):
#         return len(self.ids)

class COCODataset(Dataset):
    def __init__(self, coco_file, images_dir):
        self.images_dir = images_dir
        self.coco_annotations = self.load_coco_annotations(coco_file)
        self.bounding_boxes, self.image_files = self.process_data(self.coco_annotations)

    def load_coco_annotations(self, file_path):
        with open(file_path, 'r') as f:
            coco_data = json.load(f)
        return coco_data

    def process_data(self, coco_data):
        max_instances_per_class = 2
        num_classes = 9
        data_points = 4
        input_size = num_classes * max_instances_per_class * data_points
        bounding_boxes = []
        image_files = []

        for image_info in coco_data['images']:
            image_id = image_info['id']

            input_vector = [0] * input_size
            annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == image_id]
            for ann in annotations:
                class_id = ann['category_id']
                bbox = ann['bbox']
                instance_index = sum(1 for a in annotations if a['category_id'] == class_id) - 1
                if instance_index < max_instances_per_class:
                    start_index = (class_id * max_instances_per_class + instance_index) * 4
                    length = 1280.0
                    width = 720.0
                    bbox[0] = bbox[0] / length
                    bbox[1] = bbox[1] / width
                    bbox[2] = bbox[2] / length
                    bbox[3] = bbox[3] / width
                    input_vector[start_index:start_index + 4] = bbox

            bounding_boxes.append(input_vector)
            image_files.append(image_info['file_name'])

        return bounding_boxes, image_files

    def __len__(self):
        return len(self.bounding_boxes)

    def __getitem__(self, idx):
        bounding_box = torch.tensor(self.bounding_boxes[idx], dtype=torch.float32)
        image_file = self.image_files[idx]
        image_path = os.path.join(self.images_dir, image_file)
        return bounding_box, image_path


# Model (FFNN, LSTM) Architecture for Lat and Lon Prediction

# FFNN Architecture with first 3 digits

In [88]:
# model = torch.load("ffnn_location_no_shortened_digits.pt")
# print(model)
class LatLonModelWith3Digits(nn.Module):
    def __init__(self):
        super(LatLonModelWith3Digits, self).__init__()
        self.fc1 = nn.Linear(72, 40)
        self.bn1 = nn.BatchNorm1d(40)
        self.fc2 = nn.Linear(40, 2)
        self.dropout = nn.Dropout(p=0.3)

    def forward(self, x):
        x = F.leaky_relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# FFNN Architecture without first 3 digits

# LSTM Architecture with first 3 digits

# LSTM Architecture without first 3 digits

In [119]:
import pandas as pd

# Function to open the CSV file, correct the image names, and extract swift lat/lon data
def open_csv(filepath):
    """Opens CSV, returns modified filenames and corresponding swift lat/lon."""
    df = pd.read_csv(filepath, encoding='utf-8')  # Ensure correct encoding

    corrected_image = []
    swift_data = []

    # Making the correct format and extract swift_lat, swift_lon
    for index, row in df.iterrows():
        image = row["timestamp"]
        if isinstance(image, str):  # Ensure the value is a string
            corrected_img_name = image.replace(":", "_").strip()  # Replace ":" and remove leading/trailing spaces
            # Appending darker, darkest image
            darker_img = corrected_img_name + "_darker.jpg"
            darkest_img = corrected_img_name + "_darkest.jpg"
            corrected_image.append((corrected_img_name + ".jpg", row["swift_latitude"], row["swift_longitude"]))
            corrected_image.append((darker_img, row["swift_latitude"], row["swift_longitude"]))
            corrected_image.append((darkest_img, row["swift_latitude"], row["swift_longitude"]))
        else:
            continue
    
    return corrected_image

csv_path = "test_5/position_data_logger.csv"
corrected_image = open_csv(csv_path)
# print(corrected_image)

In [130]:
# Load the Faster RCNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.COCO_V1)

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one (for 9 classes + background)
num_classes = 9 + 1  # 9 classes + background
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the model state dict (from previously saved model)
model_save_path = "faster_rcnn.pth"
model.load_state_dict(torch.load(model_save_path))
model.eval()

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Categories for COCO format
categories = [{"id": i, "name": f"class_{i}"} for i in range(1, 10)]  # Modify with actual category names if you have them

def load_image(file_path):
    image = Image.open(file_path).convert("RGB")
    transform = ToTensor()  # Create a ToTensor transformation
    return transform(image).unsqueeze(0)  # Convert image to tensor and add batch dimension

# Function to annotate images in COCO format
def annotate_images_in_folder(found_images, folder_path, output_json_path, threshold=0.5):
    image_id = 1
    annotation_id = 1
    coco_annotations = {
        "images": [],
        "annotations": [],
        "categories": categories
    }
    # print(found_images)
    # Loop through found images, checking if they exist in the folder
    for image_info in found_images:
        filename, swift_latitude, swift_longitude = image_info 
        file_path = os.path.join(folder_path, filename)
        if not os.path.isfile(file_path):
            # print(f"Image not found: {filename}")
            continue  # Skip if the image is not found

        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            # Load and preprocess the image
            image = load_image(file_path).to(device)

            # Predict using Faster R-CNN
            with torch.no_grad():
                prediction = model(image)

            # Get image dimensions
            image_np = image.squeeze().permute(1, 2, 0).cpu().numpy()
            height, width, _ = image_np.shape

            # Append image details to COCO annotations
            coco_annotations["images"].append({
                "id": image_id,
                "file_name": filename,
                "width": int(width),
                "height": int(height),
                "swift_latitude": float(swift_latitude),  # Add swift latitude
                "swift_longitude": float(swift_longitude)  # Add swift longitude
            })

            # Get the predictions
            boxes = prediction[0]['boxes'].cpu().numpy()
            scores = prediction[0]['scores'].cpu().numpy()
            labels = prediction[0]['labels'].cpu().numpy()

            # Create a list of (score, label, box) tuples and sort by score descending
            predictions = sorted(zip(scores, labels, boxes), key=lambda x: x[0], reverse=True)

            # Keep track of the count of predictions per label type (to limit to 2 per class)
            label_counts = {}

            # Add top 2 predictions for each class
            for score, label, box in predictions:
                if score >= threshold:
                    if label not in label_counts:
                        label_counts[label] = 0
                    if label_counts[label] < 2:
                        xmin, ymin, xmax, ymax = box
                        width = xmax - xmin
                        height = ymax - ymin
                        coco_annotations["annotations"].append({
                            "id": annotation_id,
                            "image_id": image_id,
                            "category_id": int(label),
                            "bbox": [float(xmin), float(ymin), float(width), float(height)],
                            "score": float(score)
                        })
                        label_counts[label] += 1
                        annotation_id += 1

            image_id += 1

    # Save the annotations to a JSON file
    with open(output_json_path, 'w') as f:
        json.dump(coco_annotations, f, indent=4)

# Example usage:
folder_path = 'test_5/captured_images'  # The folder where the images are stored
output_json_path = 'test_5/New_2BBox_Each_Class_Annotation.json'

# Annotate images and create a COCO-style dataset
annotate_images_in_folder(corrected_image, folder_path, output_json_path)
print("Finished creating COCO-style annotations.")

Finished creating COCO-style annotations.


# Checking to make sure the model is predicting the bbox right

In [146]:
transform = transforms.Compose([transforms.ToTensor()])

# Initialize dataset and dataloader
annotation_file = 'test_5/New_2BBox_Each_Class_Annotation.json'
image_dir = 'test_5_found_image'
dataset = COCODataset(annotation_file, image_dir, transforms=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))
# Function to visualize a batch of images
def visualize_batch(images, targets):
    fig, axes = plt.subplots(1, len(images), figsize=(20, 5))
    for i, (img, target) in enumerate(zip(images, targets)):
        img = img.permute(1, 2, 0).numpy()  # Convert tensor to numpy array
        img = (img * 255).astype(np.uint8)  # Convert to uint8
        axes[i].imshow(img)
        boxes = target['boxes'].numpy()
        for box in boxes:
            xmin, ymin, xmax, ymax = box
            rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color='red')
            axes[i].add_patch(rect)
        axes[i].axis('off')
    plt.show()

# Get a batch of data
images, targets = next(iter(dataloader))

# Visualize the batch
visualize_batch(images, targets)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


ValueError: too many values to unpack (expected 2)

# Loading LatLonModelWith3Digits

In [163]:
lat_lon_model_with_3digits = LatLonModelWith3Digits()
lat_lon_model_with_3digits.load_state_dict(torch.load('ffnn_location_no_shortened_digits.pt', weights_only=True))  # Load the saved weights
lat_lon_model_with_3digits.eval()  # Set the model to evaluation mode

# Making predictions and saving them
annotation_file = 'test_5/New_2BBox_Each_Class_Annotation.json'
found_image_dir = "test_5_found_image"
lat_lon_dataset_with_3digits = COCODataset(annotation_file, found_image_dir)
lat_lon_test_dataloader = DataLoader(lat_lon_dataset_with_3digits, batch_size=10, shuffle=False)

print(f"Total images in the dataset: {len(lat_lon_dataset_with_3digits)}")
torch.set_printoptions(precision=8)

all_predictions = []
all_targets = []
image_paths = []  # Uncommented to track image paths
with torch.no_grad():
    for inputs, targets in lat_lon_test_dataloader:
        # Predict outputs
        outputs = lat_lon_model_with_3digits(inputs)
        
        # print(outputs)
        all_predictions.append(outputs)
        all_targets.append(targets)
        
        image_paths.append(targets)  
print(all_predictions)
flat_predictions = torch.cat(all_predictions, dim=0).cpu().numpy()  # Converts to numpy array
flat_image_paths = [path for batch in image_paths for path in batch]  # Flatten image paths if batched

# Check if all images were processed
print(f"Number of batches processed: {len(all_predictions)}")
print(f"Number of predictions made: {sum([len(batch) for batch in all_predictions])}")
# print(all_targets)
# Optionally, print or check specific outputs
# for i, prediction in enumerate(all_predictions):
#     print(f"Batch {i}: {prediction}")

data = {
    "image_path" : flat_image_paths,
    "predicted_lat" : flat_predictions[:, 0],
    "predicted_lon": flat_predictions[:, 1]
}

# Creating a csv file for lat and lon with 3 digits 
df = pd.DataFrame(data)
df.to_csv("predictions_for_testing/new_lat_lon_model_with_3digits.csv", index=False, float_format="%.8f")

print(f"Total image paths processed: {len(image_paths)}")

Total images in the dataset: 833
[tensor([[ 35.77074432, -78.67474365],
        [ 35.77074814, -78.67473602],
        [ 35.77061462, -78.67485046],
        [ 35.77061462, -78.67485046],
        [ 35.77060318, -78.67469788],
        [ 35.77045441, -78.67483521],
        [ 35.77024460, -78.67494202],
        [ 35.77035904, -78.67494202],
        [ 35.77040863, -78.67486572],
        [ 35.77042007, -78.67484283]]), tensor([[ 35.77040863, -78.67487335],
        [ 35.77040863, -78.67487335],
        [ 35.77040482, -78.67484283],
        [ 35.77041626, -78.67483521],
        [ 35.77053070, -78.67481232],
        [ 35.77052307, -78.67481232],
        [ 35.77044678, -78.67482758],
        [ 35.77045441, -78.67481995],
        [ 35.77044296, -78.67482758],
        [ 35.77044678, -78.67482758]]), tensor([[ 35.77046585, -78.67481995],
        [ 35.77046204, -78.67481995],
        [ 35.77045441, -78.67482758],
        [ 35.77064514, -78.67475891],
        [ 35.77049637, -78.67493439],
        [ 35