<a href="https://colab.research.google.com/github/shinnakayama/GLMM-course/blob/master/transshipment_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os

!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# create a folder to mount
temp_folder_name = 'temp/'
if not os.path.exists(temp_folder_name):
    os.mkdir(temp_folder_name)

# mount
my_bucket = 'planet_imagery'
!gcsfuse --implicit-dirs {my_bucket} {temp_folder_name}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1022  100  1022    0     0  15336      0 --:--:-- --:--:-- --:--:-- 15484
OK
86 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mhttp://packages.cloud.google.com/apt/dists/gcsfuse-bionic/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted.gpg), see the DEPRECATION section in apt-key(8) for details.[0m
[1;33mW: [0mSkipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)[0m
gcsfuse is already the newest version (2.4.0).
0 upgraded, 0 newly installed, 0 to remove and 86 not upgraded.
{"timestamp":{"seconds":1722554243,"nanos":249333428},"severity":"INFO","message":"Start gcsfuse/2.4.0 (Go version go1.22.4) for app \"\" using mount point: /conten

In [3]:
import os
import json
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.rpn import AnchorGenerator, RPNHead
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import pandas as pd
import contextlib
import io
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [4]:
class CustomDataset(Dataset):
    def __init__(self, json_file, root_dir, transforms=None):
        with open(json_file) as f:
            self.data = json.load(f)
        self.root_dir = root_dir
        self.transforms = transforms

        self.imgs = self.data['images']
        self.annotations = self.data['annotations']
        self.categories = self.data['categories']

        self.img_to_anns = {img['id']: [] for img in self.imgs}
        for ann in self.annotations:
            self.img_to_anns[ann['image_id']].append(ann)

        self.imgs = [img for img in self.imgs if self.img_to_anns[img['id']]]

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        img_info = self.imgs[idx]
        img_path = os.path.join(self.root_dir, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")

        annots = self.img_to_anns[img_info['id']]

        boxes = []
        labels = []
        for annot in annots:
            xmin, ymin, width, height = annot['bbox']
            boxes.append([xmin, ymin, xmin + width, ymin + height])
            labels.append(annot['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([img_info['id']])
        area = torch.as_tensor([annot['area'] for annot in annots], dtype=torch.float32)
        iscrowd = torch.as_tensor([annot['iscrowd'] for annot in annots], dtype=torch.int64)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = image_id

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

In [7]:
def calculate_mean_std(root_dir):
    means = []
    stds = []
    transform = transforms.ToTensor()
    for img_file in tqdm(os.listdir(root_dir)):
        img_path = os.path.join(root_dir, img_file)
        image = Image.open(img_path).convert("RGB")
        tensor = transform(image)
        means.append(tensor.mean(dim=[1, 2]))
        stds.append(tensor.std(dim=[1, 2]))

    mean = torch.stack(means).mean(dim=0)
    std = torch.stack(stds).mean(dim=0)

    return mean, std

root_dir = 'temp/majuro_2022/chopped'
mean, std = calculate_mean_std(root_dir)

100%|██████████| 41/41 [00:10<00:00,  3.82it/s]


In [8]:
def get_transform(train, mean, std):
    transforms_list = []
    transforms_list.append(transforms.ToTensor())
    if train:
        transforms_list.append(transforms.RandomHorizontalFlip(0.5))
        transforms_list.append(transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2))
    transforms_list.append(transforms.Normalize(mean=mean.tolist(), std=std.tolist()))
    return transforms.Compose(transforms_list)

In [9]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [10]:
def visualize_image(img, target, prediction):
    img = img.permute(1, 2, 0).cpu().numpy()
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(img)

    # Plot ground truth boxes
    for box in target['boxes']:
        xmin, ymin, xmax, ymax = box.cpu().numpy()
        width, height = xmax - xmin
        height = ymax - ymin
        rect = patches.Rectangle((xmin, ymin), width, height, linewidth=2, edgecolor='g', facecolor='none')
        ax.add_patch(rect)

    # Plot predicted boxes
    for box in prediction['boxes']:
        xmin, ymin, xmax, ymax = box.cpu().numpy()
        width, height = xmax - xmin
        height = ymax - ymin
        rect = patches.Rectangle((xmin, ymin), width, height, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)

    plt.show()

In [11]:
def evaluate(model, data_loader, device, coco_gt):
    model.eval()
    coco_results = []
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            outputs = model(images)
            for target, output in zip(targets, outputs):
                image_id = target["image_id"].item()
                boxes = output["boxes"].cpu().numpy()
                scores = output["scores"].cpu().numpy()
                labels = output["labels"].cpu().numpy()

                for box, score, label in zip(boxes, scores, labels):
                    xmin, ymin, xmax, ymax = box
                    width = xmax - xmin
                    height = ymax - ymin
                    coco_results.append({
                        "image_id": int(image_id),
                        "category_id": int(label),
                        "bbox": [float(xmin), float(ymin), float(width), float(height)],
                        "score": float(score)
                    })

    with open("results.json", "w") as f:
        json.dump(coco_results, f, indent=4)

    # Suppress COCOeval output
    with contextlib.redirect_stdout(io.StringIO()):
        coco_dt = coco_gt.loadRes("results.json")
        coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
        coco_eval.params.iouThrs = np.array([0.5])  # Set IoU threshold to 0.5
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

    # Prepare AP per class
    precisions = coco_eval.eval['precision']
    classes = [cat['name'] for cat in coco_gt.loadCats(coco_gt.getCatIds())]
    ap_per_class = {}
    for idx, class_name in enumerate(classes):
        # Calculate AP at IoU threshold 0.5
        ap = np.mean(precisions[:, :, idx, 0, -1])
        ap_per_class[class_name] = ap

    ap_table = pd.DataFrame(list(ap_per_class.items()), columns=['Class', 'AP'])
    mAP = coco_eval.stats[0]

    return mAP, ap_table, coco_results

In [12]:
def count_classes(dataset):
    class_counts = Counter()
    for img, target in dataset:
        labels = target['labels'].numpy()
        class_counts.update(labels)
    return class_counts

In [13]:
def visualize_results(model_path, dataset, device):
    # Load the trained model
    model = fasterrcnn_resnet50_fpn_v2(weights=None)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    num_classes = len(dataset.categories) + 1
    model.roi_heads.box_predictor = torch.nn.Linear(in_features, num_classes)
    model.load_state_dict(torch.load(model_path))
    model.to(device)
    model.eval()

    # DataLoader for the dataset
    data_loader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

    # Visualize the test images with predictions
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        outputs = model(images)
        for img, target, output in zip(images, targets, outputs):
            visualize_image(img, target, output)

In [15]:
def main():
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Load the custom dataset
    dataset = CustomDataset(json_file='instances_default.json', root_dir='temp/majuro_2022/chopped', transforms=get_transform(train=True, mean=mean, std=std))

    # Determine split sizes
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size

    # Split the dataset into train and test set
    dataset_train, dataset_test = random_split(dataset, [train_size, test_size])

    # Count the number of each class in the training dataset
    class_counts_train = count_classes(dataset_train)
    print("Class counts in the training dataset:")
    for class_id, count in class_counts_train.items():
        class_name = next(cat['name'] for cat in dataset.categories if cat['id'] == class_id)
        print(f"{class_name}: {count}")

    # Count the number of each class in the test dataset
    class_counts_test = count_classes(dataset_test)
    print("\nClass counts in the test dataset:")
    for class_id, count in class_counts_test.items():
        class_name = next(cat['name'] for cat in dataset.categories if cat['id'] == class_id)
        print(f"{class_name}: {count}")

    # Update transforms for the test set
    dataset_test.dataset.transforms = get_transform(train=False, mean=mean, std=std)

    data_loader = DataLoader(dataset_train, batch_size=2, shuffle=True, num_workers=4, collate_fn=collate_fn)
    data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=4, collate_fn=collate_fn)

    # Load a pre-trained model with specific weights
    weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
    model = fasterrcnn_resnet50_fpn_v2(weights=weights)

    # Define custom anchor generator with sizes and aspect ratios
    anchor_generator = AnchorGenerator(
        sizes=((4,), (8,), (16,), (32,), (64,)),
        aspect_ratios=((0.5, 2.0, 4.0),) * 5
    )

    # Replace the RPN anchor generator with the custom one
    model.rpn.anchor_generator = anchor_generator

    # Optionally replace the RPN head if needed
    model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one (num_classes includes the background)
    num_classes = len(dataset.categories) + 1
    model.roi_heads.box_predictor = torch.nn.Linear(in_features, num_classes)

    model.to(device)

    # Construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    num_epochs = 10

    # Load COCO ground truth for evaluation
    coco_gt = COCO('instances_default.json')

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, targets in tqdm(data_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            running_loss += losses.item()

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        lr_scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(data_loader):.4f}")

        # Evaluate every 3 epochs
        if (epoch + 1) % 3 == 0:
            mAP, ap_table, _ = evaluate(model, data_loader_test, device, coco_gt)
            print(f"Epoch [{epoch+1}/{num_epochs}], mAP: {mAP:.4f}")
            print(ap_table)

    # Final evaluation after all epochs
    mAP, ap_table, predictions = evaluate(model, data_loader_test, device, coco_gt)
    print(f"Final Evaluation, mAP: {mAP:.4f}")
    print(ap_table)

    # Save the model
    torch.save(model.state_dict(), "fasterrcnn_model.pth")

    # Now, you can call visualize_results when you want to visualize the results
    # Example: visualize_results("fasterrcnn_model.pth", dataset_test, device)

if __name__ == "__main__":
    main()

Class counts in the training dataset:
vessel1: 92
vessel2: 24
vessel3: 14

Class counts in the test dataset:
vessel2: 8
vessel1: 28
vessel3: 2


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:02<00:00, 66.0MB/s]


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


Training Epoch 1/10:   0%|          | 0/15 [00:03<?, ?it/s]


ValueError: too many values to unpack (expected 2)