In [1]:
!pip install -r requirements.txt

Collecting matplotlib>=3.2.2
  Downloading matplotlib-3.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting numpy<1.24.0,>=1.18.5
  Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting opencv-python>=4.1.1
  Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (63.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.0/63.0 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting scipy>=1.4.1
  Downloading scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.6/3

In [2]:
# Cloning the YOLOv7 repo
!git clone https://github.com/WongKinYiu/yolov7.git

Cloning into 'yolov7'...
remote: Enumerating objects: 1197, done.[K
remote: Total 1197 (delta 0), reused 0 (delta 0), pack-reused 1197 (from 1)[K
Receiving objects: 100% (1197/1197), 74.23 MiB | 16.23 MiB/s, done.
Resolving deltas: 100% (520/520), done.


In [15]:
import os
import xml.etree.ElementTree as ET

annotations_dir = 'NEUDET/ANNOTATIONS'
labels_dir = 'NEUDET/labels'
images_dir = 'NEUDET/IMAGES'

if not os.path.exists(labels_dir):
    os.makedirs(labels_dir)

class_names = ['crazing', 'inclusion', 'patches', 'pitted_surface', 'rolled_in_scale', 'scratches']
class_map = {name: idx for idx, name in enumerate(class_names)}

def convert_annotation(xml_file, img_width=200, img_height=200):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    annotation_lines = []
    for obj in root.findall('object'):
        cls = obj.find('name').text.strip().lower()
        if cls not in class_map:
            continue  # skip unknown class
        cls_id = class_map[cls]
        xmlbox = obj.find('bndbox')
        xmin = float(xmlbox.find('xmin').text)
        ymin = float(xmlbox.find('ymin').text)
        xmax = float(xmlbox.find('xmax').text)
        ymax = float(xmlbox.find('ymax').text)

        # Convert to YOLO format
        x_center = ((xmin + xmax) / 2.0) / img_width
        y_center = ((ymin + ymax) / 2.0) / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height
        annotation_lines.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    return annotation_lines

# Process each .xml in ANNOTATIONS
for filename in os.listdir(annotations_dir):
    if filename.endswith('.xml'):
        xml_path = os.path.join(annotations_dir, filename)
        yolo_lines = convert_annotation(xml_path)
        base_name = os.path.splitext(filename)[0]
        txt_path = os.path.join(labels_dir, base_name + '.txt')
        with open(txt_path, 'w') as f:
            f.write("\n".join(yolo_lines))

In [16]:
###################################
# 5) Verify label files and remove corrupted
###################################
def is_valid_label_file(file_path):
    """
    Check if a YOLO-format label file is valid.
    A valid file has each non-empty line split into exactly 5 values
    and no duplicate bounding boxes.
    Empty files are allowed (no object).
    """
    try:
        with open(file_path, 'r') as f:
            lines = [line.strip() for line in f if line.strip()]
        if not lines:
            return True  # empty => OK
        boxes = []
        for line in lines:
            parts = line.split()
            if len(parts) != 5:
                print(f"[INVALID] {file_path} => line '{line}' does not have 5 values.")
                return False
            try:
                box = tuple(float(x) for x in parts)
            except:
                print(f"[INVALID] {file_path} => cannot convert line '{line}' to float.")
                return False
            boxes.append(box)
        if len(boxes) != len(set(boxes)):
            print(f"[INVALID] {file_path} => contains duplicate bounding boxes.")
            return False
        return True
    except Exception as e:
        print(f"[ERROR] {file_path} => {e}")
        return False

valid_label_files = []
for filename in os.listdir(labels_dir):
    if filename.endswith('.txt'):
        file_path = os.path.join(labels_dir, filename)
        if is_valid_label_file(file_path):
            valid_label_files.append(filename)
        else:
            print(f"Skipping corrupted label file: {filename}")

# Remove corrupted from disk
for filename in os.listdir(labels_dir):
    if filename.endswith('.txt'):
        file_path = os.path.join(labels_dir, filename)
        if not is_valid_label_file(file_path):
            os.remove(file_path)
            print(f"Removed corrupted file: {filename}")

print(f"Valid label files: {len(valid_label_files)}")

[INVALID] NEUDET/labels/crazing_120.txt => contains duplicate bounding boxes.
Skipping corrupted label file: crazing_120.txt
[INVALID] NEUDET/labels/inclusion_62.txt => contains duplicate bounding boxes.
Skipping corrupted label file: inclusion_62.txt
[INVALID] NEUDET/labels/patches_198.txt => contains duplicate bounding boxes.
Skipping corrupted label file: patches_198.txt
[INVALID] NEUDET/labels/crazing_120.txt => contains duplicate bounding boxes.
Removed corrupted file: crazing_120.txt
[INVALID] NEUDET/labels/inclusion_62.txt => contains duplicate bounding boxes.
Removed corrupted file: inclusion_62.txt
[INVALID] NEUDET/labels/patches_198.txt => contains duplicate bounding boxes.
Removed corrupted file: patches_198.txt
Valid label files: 1797


In [17]:
###################################
# 6) Split into train/val/test
###################################
import shutil
import random

output_base = './NEUDET_split'
os.makedirs(output_base, exist_ok=True)
for phase in ['train', 'val', 'test']:
    os.makedirs(os.path.join(output_base, phase, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_base, phase, 'labels'), exist_ok=True)

# Gather images
image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
random.shuffle(image_files)

n = len(image_files)
train_split = int(0.8 * n)
val_split = int(0.9 * n)

train_files = image_files[:train_split]
val_files = image_files[train_split:val_split]
test_files = image_files[val_split:]

def copy_files(file_list, phase):
    for file in file_list:
        # Copy image
        shutil.copy(os.path.join(images_dir, file),
                    os.path.join(output_base, phase, 'images', file))
        # Copy label
        label_file = os.path.splitext(file)[0] + '.txt'
        src_label = os.path.join(labels_dir, label_file)
        dst_label = os.path.join(output_base, phase, 'labels', label_file)
        if os.path.exists(src_label):
            shutil.copy(src_label, dst_label)

copy_files(train_files, 'train')
copy_files(val_files, 'val')
copy_files(test_files, 'test')

In [18]:
###################################
# 7) Create the data/neu_det.yaml inside yolov7
###################################
%cd yolov7

!mkdir -p data
with open('data/neu_det.yaml', 'w') as f:
    f.write("""train: ../NEUDET_split/train/images
val: ../NEUDET_split/val/images
test: ../NEUDET_split/test/images

nc: 6
names: [crazing, inclusion, patches, pitted_surface, rolled_in_scale, scratches]
""")

/workspace/yolov7


In [19]:
###################################
# 8) Append custom modules to models/common.py
###################################
custom_code = r"""
# ----- Custom Modules for SS-YOLO -----
import torch
import torch.nn as nn

class DWConv(nn.Module):

    def __init__(self, c1, c2, k=3, s=1):
        super(DWConv, self).__init__()
        self.dw = nn.Conv2d(c1, c1, k, s, k//2, groups=c1, bias=False)
        self.bn1 = nn.BatchNorm2d(c1)
        self.act1 = nn.ReLU(inplace=True)
        self.pw = nn.Conv2d(c1, c2, 1, 1, 0, bias=False)
        self.bn2 = nn.BatchNorm2d(c2)
    def forward(self, x):
        x = self.act1(self.bn1(self.dw(x)))
        x = self.bn2(self.pw(x))
        return x

class DSimSPPF(nn.Module):

    def __init__(self, c1, c2, k=5):
        super(DSimSPPF, self).__init__()
        self.conv1 = nn.Conv2d(c1, c1, 1, 1, 0, bias=False)
        self.bn1 = nn.BatchNorm2d(c1)
        self.act1 = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(kernel_size=k, stride=1, padding=k//2)
        self.conv2 = nn.Conv2d(c1*4, c1*4, 3, 1, 1, groups=c1*4, bias=False)
        self.point = nn.Conv2d(c1*4, c2, 1, 1, 0, bias=False)
        self.bn2 = nn.BatchNorm2d(c2)
        self.act2 = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.act1(self.bn1(self.conv1(x)))
        y1 = self.pool(x)
        y2 = self.pool(y1)
        y3 = self.pool(y2)
        y = torch.cat([x, y1, y2, y3], dim=1)
        y = self.conv2(y)
        y = self.act2(self.bn2(self.point(y)))
        return y

class SimAM_Module(nn.Module):

    def __init__(self, lambda_val=0.1):
        super(SimAM_Module, self).__init__()
        self.lambda_val = lambda_val
    def forward(self, x):
        n, c, h, w = x.shape
        x_mean = x.mean(dim=[2,3], keepdim=True)
        d = (x - x_mean).pow(2)
        var = d.sum(dim=[2,3], keepdim=True) / (h*w - 1 + 1e-6)
        e_inv = d / (4 * (var + self.lambda_val)) + 0.5
        return x * torch.sigmoid(e_inv)
"""

# Append to common.py
with open('models/common.py', 'a') as f:
    f.write('\n' + custom_code + '\n')

In [20]:
###################################
# 9) Create ssyolo.yaml in cfg/training
###################################
!mkdir -p cfg/training
ssyolo_code = r"""
# SS-YOLO model configuration (MobileNetv3 backbone + SimAM + D-SimSPPF)
nc: 6
depth_multiple: 1.0
width_multiple: 1.0

# Usually, 3 anchors per scale
anchors:
  - [12,16, 19,36, 40,28]   # P3/8
  - [36,75, 76,55, 72,146]  # P4/16
  - [142,110, 192,243, 459,401]  # P5/32

backbone:
  [[-1, 1, Conv, [16, 3, 2]],          
   [-1, 1, Conv, [16, 3, 1]],          
   [[-2, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [64, 1, 1]],          
   [-1, 1, DWConv, [24, 3, 2]],        
   [-1, 1, Conv, [72, 1, 1]],          
   [-1, 1, DWConv, [24, 3, 1]],        
   [[4, -1], 1, Shortcut, [0]],        
   [-1, 1, Conv, [72, 1, 1]],          
   [-1, 1, DWConv, [40, 5, 2]],        
   [-1, 1, Conv, [120, 1, 1]],         
   [-1, 1, DWConv, [40, 5, 1]],        
   [[9, -1], 1, Shortcut, [0]],        
   [-1, 1, Conv, [120, 1, 1]],         
   [-1, 1, DWConv, [40, 5, 1]],        
   [[12, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [240, 1, 1]],         
   [-1, 1, DWConv, [80, 3, 2]],        
   [-1, 1, Conv, [200, 1, 1]],         
   [-1, 1, DWConv, [80, 3, 1]],        
   [[17, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [184, 1, 1]],         
   [-1, 1, DWConv, [80, 3, 1]],        
   [[20, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [184, 1, 1]],         
   [-1, 1, DWConv, [80, 3, 1]],        
   [[23, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [480, 1, 1]],         
   [-1, 1, DWConv, [112, 3, 1]],       
   [-1, 1, Conv, [672, 1, 1]],         
   [-1, 1, DWConv, [112, 3, 1]],       
   [[28, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [672, 1, 1]],         
   [-1, 1, DWConv, [160, 5, 2]],       
   [-1, 1, Conv, [960, 1, 1]],         
   [-1, 1, DWConv, [160, 5, 1]],       
   [[33, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [960, 1, 1]],         
   [-1, 1, DWConv, [160, 5, 1]],       
   [[36, -1], 1, Shortcut, [0]],       
   [-1, 1, Conv, [960, 1, 1]]          
  ]

head:
  [[40, 1, DSimSPPF, [960, 960, 5]],           
   [41, 1, SimAM_Module, [0.1]],       
   [42, 1, Conv, [256, 1, 1]],         
   [43, 1, nn.Upsample, [None, 2, 'nearest']],  
   [26, 1, Conv, [256, 1, 1]],         
   [[45, 44], 1, Concat, [1]],         
   [46, 1, Conv, [256, 1, 1]],         
   [46, 1, Conv, [128, 3, 1]],         
   [48, 1, Conv, [128, 3, 1]],         
   [49, 1, Conv, [128, 1, 1]],         
   [50, 1, nn.Upsample, [None, 2, 'nearest']],  
   [15, 1, Conv, [128, 1, 1]],         
   [[52, 51], 1, Concat, [1]],         
   [53, 1, Conv, [128, 1, 1]],         
   [53, 1, Conv, [64, 3, 1]],          
   [55, 1, Conv, [64, 3, 1]],          
   [56, 1, Conv, [64, 3, 1]],          
   [57, 1, Conv, [64, 3, 1]],          
   [58, 1, Conv, [128, 1, 1]],         
   [59, 1, Conv, [128, 3, 2]],         
   [[60, 50], 1, Concat, [1]],         
   [61, 1, Conv, [256, 1, 1]],         
   [62, 1, Conv, [128, 3, 1]],         
   [63, 1, Conv, [128, 3, 1]],         
   [64, 1, Conv, [256, 3, 2]],         
   [[65, 43], 1, Concat, [1]],         
   [66, 1, Conv, [256, 1, 1]],         
   [67, 1, Conv, [128, 3, 1]],         
   [68, 1, Conv, [128, 3, 1]],         
   [69, 1, SimAM_Module, [0.1]],       
   [[59, 64, 69], 1, IDetect, [nc, anchors]]
  ]
"""
with open('cfg/training/ssyolo.yaml', 'w') as f:
    f.write(ssyolo_code)

%cd ..

/workspace


In [21]:
###################################
# 10) Verify model architecture
###################################
%cd yolov7
from models.yolo import Model

# IMPORTANT: Use ch=3 for RGB, since the paper uses 3-channel input
model = Model("cfg/training/ssyolo.yaml", ch=3, nc=6)
print(model)
%cd ..

/workspace/yolov7
Model(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (1): Conv(
      (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (2): Shortcut()
    (3): Conv(
      (conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (4): DWConv(
      (dw): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
      (bn1): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (pw): Conv2d(64, 24,

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [4]:
cd yolov7


/workspace/yolov7


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [7]:
!python test.py \
  --weights runs/train/ssyolo_scratch_neu/weights/best.pt \
  --data data/neu_det.yaml \
  --img 640 \
  --task test


Namespace(weights=['runs/train/ssyolo_scratch_neu/weights/best.pt'], data='data/neu_det.yaml', batch_size=32, img_size=640, conf_thres=0.001, iou_thres=0.65, task='test', device='', single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project='runs/test', name='exp', exist_ok=False, no_trace=False, v5_metric=False)
YOLOR 🚀 v0.1-128-ga207844 torch 2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4090, 24107.0625MB)

Fusing layers... 
IDetect.fuse
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Model Summary: 240 layers, 9097222 parameters, 0 gradients, 17.5 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

[34m[1mtest: [0mScanning '../NEUDET_split/test/labels' images and labels... 180 found, 0 m[0m
[34m[1mtest: [0mNew cache created: ../NEUDET_split/test/labels.cache
               Class      Images      Labels           P           R      mAP@.5
                 