In [1]:
!pip install torch-pruning ultralytics -q --upgrade

In [2]:
!ls /kaggle/input/last_last/pytorch/default/1/best.pt



/kaggle/input/last_last/pytorch/default/1/best.pt


In [16]:
!ls /kaggle/input/fishai-sample/annotations_s


images	labels


In [6]:
from ultralytics import YOLO
model = YOLO('/kaggle/input/last_last/pytorch/default/1/best.pt')
# for name, module in model.named_modules():
#     print(name)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [7]:

# ignored_layers = []
# for name, module in model.named_modules():
#     if 'dfl' in name:
#         ignored_layers.append(module)



In [8]:
# indexs = [2, 4, 6, 8, 12, 15, 18, 21]

# def get_model_groups(model, bottleneck_index):
#         return [
#             [
#                 f"model.{i}.m.{n}.cv2.conv"
#                 for n in range(len(model.module[i].m if hasattr(model, "module") else model[i].m))
#             ]
#             + [f"model.{i}.cv1.conv"]
#             for i in bottleneck_index
#         ]
# groups = get_model_groups(model,indexs)

In [9]:
# groups

In [10]:
from torch import nn
from ultralytics.nn.modules import Detect, C2f, Conv, Bottleneck

def infer_shortcut(bottleneck):
    c1 = bottleneck.cv1.conv.in_channels
    c2 = bottleneck.cv2.conv.out_channels
    return c1 == c2 and hasattr(bottleneck, 'add') and bottleneck.add
class C2f_v2(nn.Module):
    # CSP Bottleneck with 2 convolutions
    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv0 = Conv(c1, self.c, 1, 1)
        self.cv1 = Conv(c1, self.c, 1, 1)
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))

    def forward(self, x):
        # y = list(self.cv1(x).chunk(2, 1))
        y = [self.cv0(x), self.cv1(x)]
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))
def replace_c2f_with_c2f_v2(module):
    for name, child_module in module.named_children():
        if isinstance(child_module, C2f):
            # Replace C2f with C2f_v2 while preserving its parameters
            shortcut = infer_shortcut(child_module.m[0])
            c2f_v2 = C2f_v2(child_module.cv1.conv.in_channels, child_module.cv2.conv.out_channels,
                            n=len(child_module.m), shortcut=shortcut,
                            g=child_module.m[0].cv2.conv.groups,
                            e=child_module.c / child_module.cv2.conv.out_channels)
            transfer_weights(child_module, c2f_v2)
            setattr(module, name, c2f_v2)
        else:
            replace_c2f_with_c2f_v2(child_module)

def transfer_weights(c2f, c2f_v2):
    c2f_v2.cv2 = c2f.cv2
    c2f_v2.m = c2f.m

    state_dict = c2f.state_dict()
    state_dict_v2 = c2f_v2.state_dict()

    # Transfer cv1 weights from C2f to cv0 and cv1 in C2f_v2
    old_weight = state_dict['cv1.conv.weight']
    half_channels = old_weight.shape[0] // 2
    state_dict_v2['cv0.conv.weight'] = old_weight[:half_channels]
    state_dict_v2['cv1.conv.weight'] = old_weight[half_channels:]

    # Transfer cv1 batchnorm weights and buffers from C2f to cv0 and cv1 in C2f_v2
    for bn_key in ['weight', 'bias', 'running_mean', 'running_var']:
        old_bn = state_dict[f'cv1.bn.{bn_key}']
        state_dict_v2[f'cv0.bn.{bn_key}'] = old_bn[:half_channels]
        state_dict_v2[f'cv1.bn.{bn_key}'] = old_bn[half_channels:]

    # Transfer remaining weights and buffers
    for key in state_dict:
        if not key.startswith('cv1.'):
            state_dict_v2[key] = state_dict[key]

    # Transfer all non-method attributes
    for attr_name in dir(c2f):
        attr_value = getattr(c2f, attr_name)
        if not callable(attr_value) and '_' not in attr_name:
            setattr(c2f_v2, attr_name, attr_value)

    c2f_v2.load_state_dict(state_dict_v2)

In [11]:
import torch

from ultralytics import YOLO
import torch_pruning as tp

from ultralytics.nn.modules import Detect


def prune(model):

    for name, param in model.model.named_parameters():
        param.requires_grad = True
    replace_c2f_with_c2f_v2(model.model)
    model.model.eval()
    example_inputs = torch.randn(1, 3, 640, 640).to(model.device)
    imp = tp.importance.MagnitudeImportance(p=2)  

    ignored_layers = []
    unwrapped_parameters = []

    modules_list = list(model.model.modules())
    for i, m in enumerate(modules_list):
        if isinstance(m, (Detect,)):
            ignored_layers.append(m)

    iterative_steps =1 
    pruner = tp.pruner.MagnitudePruner(
        model.model,
        example_inputs,
        importance=imp,
        iterative_steps=iterative_steps,
        ch_sparsity=0.75,  
        ignored_layers=ignored_layers,
        unwrapped_parameters=unwrapped_parameters
    )
    base_macs, base_nparams = tp.utils.count_ops_and_params(model.model, example_inputs)
    pruner.step()

    pruned_macs, pruned_nparams = tp.utils.count_ops_and_params(pruner.model, example_inputs)
    # print(model.model)
    print("Before Pruning: MACs=%f G, #Params=%f G" % (base_macs / 1e9, base_nparams / 1e9))
    print("After Pruning: MACs=%f G, #Params=%f G" % (pruned_macs / 1e9, pruned_nparams / 1e9))
prune(model)




Before Pruning: MACs=128.968017 G, #Params=0.068185 G
After Pruning: MACs=18.232305 G, #Params=0.008156 G


In [15]:
model.predict(torch.rand(1,3,640,640),verbose=False)
None

In [17]:
!ls /kaggle/input/fishai-sample/annotations_s


images	labels


In [18]:
str_data = """
train: /kaggle/input/fishai-sample/annotations_s/images/train  # Path to training images
val: /kaggle/input/fishai-sample/annotations_s/images/val      # Path to validation images

labels:
  train: /kaggle/input/fishai-sample/annotations_s/labels/train  # Path to training labels
  val: /kaggle/input/fishai-sample/annotations_s/labels/val      # Path to validation labels


names:
  0: Long snouted lancetfish
  1: Roudie scolar
  2: Marlin
  3: Swordfish
  4: Great barracuda
  5: Thresher shark
  6: Lancetfish
  7: Pomfret
  8: Tuna
  9: Opah
  10: Pelagic stingray
  11: Mahi mahi
  12: Striped marlin
  13: Wahoo
  14: No fish
  15: Human
  16: Skipjack tuna
  17: Yellowfin tuna
  18: Unknown
  19: Blue marlin
  20: Oilfish
  21: Bigeye tuna
  22: Snake mackerel
  23: Indo Pacific sailfish
  24: Water
  25: Sickle pomfret
  26: Albacore
  27: Brama
  28: Black marlin
  29: Shortbill spearfish
  30: Mola mola
  31: Rainbow runner
  32: Escolar
  33: Shark
"""
with open('test.yaml','w') as f:
    f.write(str_data)


In [19]:
results = model.train(data="test.yaml", epochs=1, imgsz=640,device=[0],batch=0.95,augment=False)

Ultralytics 8.3.49 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/kaggle/input/last_last/pytorch/default/1/best.pt, data=test.yaml, epochs=1, time=None, patience=100, batch=0.95, imgsz=640, save=True, save_period=-1, cache=False, device=[0], workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, sho

100%|██████████| 755k/755k [00:00<00:00, 21.6MB/s]



                   from  n    params  module                                       arguments                     
  0                  -1  1      2320  ultralytics.nn.modules.conv.Conv             [3, 80, 3, 2]                 
  1                  -1  1    115520  ultralytics.nn.modules.conv.Conv             [80, 160, 3, 2]               
  2                  -1  3    436800  ultralytics.nn.modules.block.C2f             [160, 160, 3, True]           
  3                  -1  1    461440  ultralytics.nn.modules.conv.Conv             [160, 320, 3, 2]              
  4                  -1  6   3281920  ultralytics.nn.modules.block.C2f             [320, 320, 6, True]           
  5                  -1  1   1844480  ultralytics.nn.modules.conv.Conv             [320, 640, 3, 2]              
  6                  -1  6  13117440  ultralytics.nn.modules.block.C2f             [640, 640, 6, True]           
  7                  -1  1   3687680  ultralytics.nn.modules.conv.Conv             [640

100%|██████████| 5.35M/5.35M [00:00<00:00, 105MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /kaggle/input/fishai-sample/annotations_s/labels/train... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:03<00:00, 278.42it/s]






[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=640 at 95.0% CUDA memory utilization.
[34m[1mAutoBatch: [0mCUDA:0 (Tesla P100-PCIE-16GB) 15.89G total, 0.62G reserved, 0.59G allocated, 14.68G free
      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output


  check_for_updates()


    68185350       258.3         1.497         66.68         156.1        (1, 3, 640, 640)                    list
    68185350       516.6         2.147         74.49         113.2        (2, 3, 640, 640)                    list
    68185350        1033         3.825         132.2         176.4        (4, 3, 640, 640)                    list
    68185350        2066         6.564         251.5         305.9        (8, 3, 640, 640)                    list
    68185350        4133        12.721         402.1         541.9       (16, 3, 640, 640)                    list
[34m[1mAutoBatch: [0mUsing batch-size 17 for CUDA:0 14.64G/15.89G (92%) ✅


[34m[1mtrain: [0mScanning /kaggle/input/fishai-sample/annotations_s/labels/train... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:01<00:00, 778.39it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/input/fishai-sample/annotations_s/labels/val... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:03<00:00, 259.10it/s]






Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000263, momentum=0.9) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.00053125), 103 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1      14.3G      3.654      5.158      3.389         75        640: 100%|██████████| 59/59 [01:38<00:00,  1.67s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [00:19<00:00,  1.54it/s]

                   all       1000       3516          0.612      0.332      0.367       0.24






1 epochs completed in 0.036 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 136.8MB
Optimizer stripped from runs/detect/train/weights/best.pt, 136.8MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.49 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
Model summary (fused): 268 layers, 68,156,310 parameters, 0 gradients, 257.6 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [00:19<00:00,  1.57it/s]

                   all       1000       3516         0.592      0.343      0.308      0.221





Speed: 0.1ms preprocess, 17.8ms inference, 0.0ms loss, 0.1ms postprocess per image
Results saved to [1mruns/detect/train[0m


In [21]:
model.save('result.pt')

In [22]:
from IPython.display import FileLink
FileLink('result.pt')