In [None]:
# Install the required packages for YOLOv8 and Comet ML
!pip install ultralytics comet_ml torch torchvision

In [None]:
# Set your Comet Api Key
!export COMET_API_KEY='Blj8bZW6JvGDYje5CMFHMe9AR'

In [None]:
import comet_ml

comet_ml.init(project_name="rt-detr")

In [None]:
from tqdm.auto import tqdm

import os
import requests
import zipfile
import cv2
import matplotlib.pyplot as plt
import glob
import numpy as np
import random
import torch



def set_random_seed(seed):
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False
  np.random.seed(seed)
  random.seed(seed)

set_random_seed(42)

In [None]:
!mkdir /content/Defect_Data

In [None]:
# Download dataset.
def download_file(url, save_name):
    if not os.path.exists(save_name):
        print(f"Downloading file")
        file = requests.get(url, stream=True)
        total_size = int(file.headers.get('content-length', 0))
        block_size = 1024
        progress_bar = tqdm(
            total=total_size,
            unit='iB',
            unit_scale=True
        )
        with open(os.path.join(save_name), 'wb') as f:
            for data in file.iter_content(block_size):
                progress_bar.update(len(data))
                f.write(data)
        progress_bar.close()
    else:
        print('File already present')

download_file(
    'https://app.roboflow.com/ds/Gi7A06UnbM?key=fl4u4rF4Eq',
    '/content/Defect_Data/defects.zip'
)

Downloading file


  0%|          | 0.00/66.2M [00:00<?, ?iB/s]

In [None]:
# Unzip the data file
def unzip(zip_file=None):
    try:
        with zipfile.ZipFile(zip_file) as z:
            z.extractall("./")
            print("Extracted all")
    except:
        print("Invalid file")

unzip('/content/Defect_Data/defects.zip')

Extracted all


In [None]:
%mv /content/test /content/train /content/valid /content/Defect_Data/

In [None]:
from ultralytics import RTDETR

model_name = "rtdetr-l.pt"
dataset_name = "/content/data.yaml"

# Initialize YOLO Model
#model = YOLO(f"{model_name}.pt")

# Load a COCO-pretrained RT-DETR-l model
model = RTDETR(model_name)

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/rtdetr-l.pt to 'rtdetr-l.pt'...


100%|██████████| 63.4M/63.4M [00:00<00:00, 256MB/s]


In [None]:
# @title
from ultralytics import settings
settings.update({'comet': True})

In [None]:
# @title
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
# @title
import yaml

with open('data.yaml', 'r') as read_file:
  content = yaml.safe_load(read_file)
  content['train'] = '/content/Defect_Data/train/images'
  content['val'] = '/content/Defect_Data/valid/images'
  content['test'] = '/content/Defect_Data/test/images'

with open('new_data.yaml','w') as dump_file:
  yaml.dump(content,dump_file)



# Training


In [None]:
results = model.train(
    data = dataset_name,
    project = 'RT-DETR',
    epochs = 50,
    imgsz = 640,
    batch = 10,
    workers = 2,
    seed = 42,
    deterministic = True,
    plots = True,
    save_period = 10,
    save = False,
    optimizer = 'SGD',
    verbose = False,
    cos_lr = True
)

In [None]:
from ultralytics import RTDETR
model = RTDETR('/content/RT-DETR/train/weights/last.pt')
dataset_name = '/content/data.yaml'
# Validate the model
metrics = model.val(data = dataset_name)  # no arguments needed, dataset and settings remembered
metrics.box.map    # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps   # a list contains map50-95 of each category

Ultralytics YOLOv8.0.225 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)
rt-detr-l summary: 498 layers, 31989905 parameters, 0 gradients


[34m[1mval: [0mScanning /content/Defect_Data/valid/labels.cache... 83 images, 15 backgrounds, 0 corrupt: 100%|██████████| 83/83 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:07<00:00,  1.19s/it]


                   all         83        290       0.87      0.819      0.904      0.547
                bridge         83         79      0.913      0.927      0.977      0.559
                   gap         83         85      0.792      0.576       0.77      0.393
                  sraf         83        126      0.906      0.952      0.967      0.689
Speed: 0.8ms preprocess, 53.1ms inference, 0.0ms loss, 7.2ms postprocess per image
Results saved to [1mruns/detect/val[0m


array([    0.55866,     0.39342,     0.68902])

In [None]:
import pandas as pd
validation_result = pd.DataFrame({
    'Class': ['all', 'bridge','gap','sraf'],
    'Images':[83,83,83,83],
    'Instances':[290,79,85,126],
    'P':[0.87,0.913, 0.792, 0.906],
    'R':[0.819, 0.927,0.576, 0.952],
    'mAP50':[0.904, 0.977, 0.77, 0.967],
    'mAP50-95':[0.547, 0.559, 0.393, 0.689]
})

validation_result.to_csv('/content/drive/MyDrive/metrics/rtdetrl_new_val_.csv')

In [None]:
%cp /content/RT-DETR/train/weights/rtdetr_l_new.pt /content/drive/MyDrive/models/

In [None]:
test_img = glob.glob('/content/Defect_Data/test/images/*.jpg')

In [None]:
from PIL import Image


# Run inference on 'bus.jpg'
test_results = model(test_img, conf = 0.45 )  # results list

# Show the results
for r in test_results:
    im_array = r.plot()  # plot a BGR numpy array of predictions
    im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
    im.show()  # show image



0: 640x640 4 srafs, 1: 640x640 3 bridges, 2: 640x640 6 gaps, 3: 640x640 (no detections), 4: 640x640 3 gaps, 5: 640x640 7 gaps, 6: 640x640 (no detections), 7: 640x640 3 gaps, 8: 640x640 4 srafs, 9: 640x640 10 gaps, 10: 640x640 5 bridges, 11: 640x640 4 srafs, 12: 640x640 4 bridges, 13: 640x640 4 srafs, 14: 640x640 4 srafs, 15: 640x640 4 srafs, 16: 640x640 4 srafs, 17: 640x640 2 bridges, 18: 640x640 6 bridges, 19: 640x640 3 bridges, 20: 640x640 3 bridges, 21: 640x640 5 srafs, 22: 640x640 2 bridges, 23: 640x640 5 srafs, 24: 640x640 6 bridges, 25: 640x640 (no detections), 26: 640x640 9 gaps, 27: 640x640 6 srafs, 28: 640x640 1 bridge, 29: 640x640 5 bridges, 30: 640x640 4 srafs, 31: 640x640 10 gaps, 32: 640x640 4 srafs, 33: 640x640 4 srafs, 34: 640x640 5 bridges, 35: 640x640 3 bridges, 36: 640x640 6 gaps, 1249.5ms
Speed: 1.7ms preprocess, 33.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


In [None]:
%cp -r /content/runs/detect/RTDETRl/ /content/drive/MyDrive/inference_outputs/RTDETR/

In [None]:


# Run inference on 'bus.jpg' with arguments
model.predict(test_img, save=True, imgsz=640, conf=0.5)


In [None]:
from IPython.display import Image, display

def visualize(INFER_DIR):
# Visualize inference images.
    #INFER_PATH = f"runs/detect/{INFER_DIR}"
    infer_images = glob.glob(f"{INFER_DIR}/*.jpg")
    print(infer_images)
    for pred_image in infer_images:
      display(Image(filename=pred_image))
      print('\n')
        # image = cv2.imread(pred_image)
        # plt.figure(figsize=(10, 10))
        # plt.imshow(image[:, :, ::-1])
        # plt.axis('off')
        # plt.show()

visualize('/content/runs/detect/predict')

# Import model from gdrive and check reproductability


In [None]:
from ultralytics import RTDETR

model = RTDETR('/content/drive/MyDrive/models/rtdetr_l_new.pt')

metrics = model.val(data = '/content/new_data.yaml')
metrics.box.map    # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps   # a list contains map50-95 of each category

Ultralytics YOLOv8.0.226 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)
rt-detr-l summary: 498 layers, 31989905 parameters, 0 gradients
Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 31.4MB/s]
[34m[1mval: [0mScanning /content/Defect_Data/valid/labels... 83 images, 15 backgrounds, 0 corrupt: 100%|██████████| 83/83 [00:00<00:00, 1652.66it/s]

[34m[1mval: [0mNew cache created: /content/Defect_Data/valid/labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:07<00:00,  1.28s/it]


                   all         83        290       0.87      0.819      0.904      0.547
                bridge         83         79      0.913      0.927      0.977      0.559
                   gap         83         85      0.792      0.576       0.77      0.393
                  sraf         83        126      0.906      0.952      0.967      0.689
Speed: 4.8ms preprocess, 62.6ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1mruns/detect/val4[0m


array([    0.55866,     0.39342,     0.68902])

In [None]:
model_x = RTDETR('/content/drive/MyDrive/models/rtdetr_x.pt')

metrics_x = model_x.val(data = '/content/new_data.yaml')
metrics_x.box.map    # map50-95
metrics_x.box.map50  # map50
metrics_x.box.map75  # map75
metrics_x.box.maps   # a list contains map50-95 of each category

Ultralytics YOLOv8.0.226 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)
rt-detr-l summary: 498 layers, 31989905 parameters, 0 gradients


[34m[1mval: [0mScanning /content/Defect_Data/valid/labels.cache... 83 images, 15 backgrounds, 0 corrupt: 100%|██████████| 83/83 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:07<00:00,  1.23s/it]


                   all         83        290      0.872       0.84      0.907       0.53
                bridge         83         79       0.92      0.949      0.985      0.555
                   gap         83         85      0.786      0.612      0.769      0.396
                  sraf         83        126      0.911       0.96      0.969      0.638
Speed: 5.1ms preprocess, 49.1ms inference, 0.0ms loss, 2.5ms postprocess per image
Results saved to [1mruns/detect/val5[0m


array([    0.55454,      0.3959,     0.63825])

# Activation maps vizualization


In [None]:

import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import cv2 as cv
import argparse
from torchvision import models, transforms


# load the model
model = models.resnet50(pretrained=True)
print(model)
model_weights = [] # we will save the conv layer weights in this list
conv_layers = [] # we will save the 49 conv layers in this list
# get all the model children as list
model_children = list(model.children())

In [None]:

counter = 0
# append all the conv layers and their respective weights to the list
for i in range(len(model_children)):
    if type(model_children[i]) == nn.Conv2d:
        counter += 1
        model_weights.append(model_children[i].weight)
        conv_layers.append(model_children[i])
    elif type(model_children[i]) == nn.Sequential:
        for j in range(len(model_children[i])):
            for child in model_children[i][j].children():
                if type(child) == nn.Conv2d:
                    counter += 1
                    model_weights.append(child.weight)
                    conv_layers.append(child)
print(f"Total convolutional layers: {counter}")

Total convolutional layers: 49


In [None]:

# take a look at the conv layers and the respective weights
for weight, conv in zip(model_weights, conv_layers):
    # print(f"WEIGHT: {weight} \nSHAPE: {weight.shape}")
    print(f"CONV: {conv} ====> SHAPE: {weight.shape}")

In [None]:

# visualize the first conv layer filters
plt.figure(figsize=(20, 17))
for i, filter in enumerate(model_weights[0]):
    plt.subplot(8, 8, i+1) # (8, 8) because in conv0 we have 7x7 filters and total of 64 (see printed shapes)
    plt.imshow(filter[0, :, :].detach(), cmap='gray')
    plt.axis('off')
    plt.savefig('/content/filter.png')
plt.show()

In [None]:
# read and visualize an image
img = cv.imread('/content/3_b.jpg')
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
plt.imshow(img)
plt.show()
# define the transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
])
img = np.array(img)
# apply the transforms
img = transform(img)
print(img.size())
# unsqueeze to add a batch dimension
img = img.unsqueeze(0)
print(img.size())

In [None]:
# pass the image through all the layers
results = [conv_layers[0](img)]
for i in range(1, len(conv_layers)):
    # pass the result from the last layer to the next layer
    results.append(conv_layers[i](results[-1]))
# make a copy of the `results`
outputs = results

In [None]:
!rm -r *.png

In [None]:
# visualize 64 features from each layer
# (although there are more feature maps in the upper layers)
for num_layer in range(len(outputs)):
    plt.figure(figsize=(30, 30))
    layer_viz = outputs[num_layer][0, :, :, :]
    layer_viz = layer_viz.data
    print(layer_viz.size())
    for i, filter in enumerate(layer_viz):

        if i == 1:
          break
        plt.imshow(filter, cmap='gray')
        plt.axis("off")


        print(f"Saving layer {num_layer} feature maps...")
        plt.savefig(f"/content/layer_{num_layer}.png")
      # plt.show()
        plt.close()