In [1]:
import os
import sys

# CRITICAL: Pre-load PyTorch libraries before importing DCNv3
# This must be done BEFORE any DCNv3 import
import torch

# Method 1: Pre-load libc10.so using ctypes
import ctypes
torch_lib_path = os.path.join(os.path.dirname(torch.__file__), 'lib')
libc10_path = os.path.join(torch_lib_path, 'libc10.so')

try:
    # Force load PyTorch's C10 library
    ctypes.CDLL(libc10_path, mode=ctypes.RTLD_GLOBAL)
    print(f"Pre-loaded libc10.so from: {libc10_path}")
except Exception as e:
    print(f"Could not pre-load libc10.so: {e}")

# Set CUDA workspace config for deterministic behavior
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

from ultralytics import YOLO

# Check GPU availability
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU instead.")

# Verify DCNv3 is working (use DCNv3_pkg to import the Python class)
try:
    from DCNv3_pkg import DCNv3
    print("DCNv3 module loaded successfully from DCNv3_pkg")
    print(f"   DCNv3 class: {DCNv3}")
except ImportError as e:
    print(f"DCNv3 import failed: {e}")
    print("\nThe DCNv3 Python wrapper classes should be installed.")
    print("   Check that DCNv3_pkg directory exists in site-packages")


Pre-loaded libc10.so from: /home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/site-packages/torch/lib/libc10.so
DCNv3_AVAILABLE=True
Using GPU: NVIDIA GeForce RTX 3060
DCNv3 module loaded successfully from DCNv3_pkg
   DCNv3 class: <class 'DCNv3_modules.dcnv3.DCNv3'>
DCNv3_AVAILABLE=True
Using GPU: NVIDIA GeForce RTX 3060
DCNv3 module loaded successfully from DCNv3_pkg
   DCNv3 class: <class 'DCNv3_modules.dcnv3.DCNv3'>




In [2]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

2.0.1+cu118
11.8
True


In [None]:
import os

# Force all caches to F drive
os.environ["TORCH_HOME"] = "F:/caches/torch"
os.environ["HF_HOME"] = "F:/caches/huggingface"
os.environ["PIP_CACHE_DIR"] = "F:/caches/pip"

# Make sure folders exist
for path in [os.environ["TORCH_HOME"], os.environ["HF_HOME"], os.environ["PIP_CACHE_DIR"]]:
    os.makedirs(path, exist_ok=True)

print("‚úÖ Using custom cache directories:")
print("TORCH_HOME:", os.environ["TORCH_HOME"])
print("HF_HOME:", os.environ["HF_HOME"])
print("PIP_CACHE_DIR:", os.environ["PIP_CACHE_DIR"])

In [6]:
import os

print(os.path.exists("real_100data_linux.yaml"))

True


In [5]:
# Train Vanilla Yolov8n model
import os

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

model = YOLO("ultralytics/cfg/models/v8/yolov8n.yaml")

results = model.train(
    data="real_100data_linux.yaml",
    project=project_dir,
    name="yolov8-vanilla",
    epochs=300,
    patience=50,
    imgsz=640,
    batch=16,
)

print(f"‚úÖ Training completed. Check {project_dir}/yolov8-vanilla for results.")

Ultralytics 8.3.223 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=real_100data_linux.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=ultralytics/cfg/models/v8/yolov8n.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8-vanilla, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, over

In [9]:
import pandas as pd

# Path to your results.csv
csv_path = r"/media/james/Lexar/YOLO_outputs/yolov8-vanilla/results.csv"
df = pd.read_csv(csv_path)

# Best epoch based on mAP50
best_epoch = df["metrics/mAP50(B)"].idxmax()
last_epoch = len(df) - 1

# Best epoch based on mAP50-95
best_epoch_map5095 = df["metrics/mAP50-95(B)"].idxmax()
best_map5095_value = df["metrics/mAP50-95(B)"].max()

print(f"Best epoch (mAP50): {best_epoch}")
print(f"Best epoch (mAP50-95): {best_epoch_map5095} (value: {best_map5095_value})")
print(f"Last epoch: {last_epoch + 1}")

Best epoch (mAP50): 293
Best epoch (mAP50-95): 287 (value: 0.66992)
Last epoch: 300


In [6]:
# Train dcnv2-liu model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv2-yolov8-liu.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv2-yolov8-liu",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv2-yolov8-liu for results.")

Ultralytics 8.3.223 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=100data_linux.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=ultralytics/cfg/models/v8/dcnv2-yolov8-liu.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=dcnv2-yolov8-liu, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto

In [10]:
import pandas as pd

# Path to your results.csv
csv_path = r"/media/james/Lexar/YOLO_outputs/dcnv2-yolov8-liu/results.csv"
df = pd.read_csv(csv_path)

# Best epoch based on mAP50
best_epoch = df["metrics/mAP50(B)"].idxmax()
last_epoch = len(df) - 1

# Best epoch based on mAP50-95
best_epoch_map5095 = df["metrics/mAP50-95(B)"].idxmax()
best_map5095_value = df["metrics/mAP50-95(B)"].max()

print(f"Best epoch (mAP50): {best_epoch}")
print(f"Best epoch (mAP50-95): {best_epoch_map5095} (value: {best_map5095_value})")
print(f"Last epoch: {last_epoch + 1}")

Best epoch (mAP50): 299
Best epoch (mAP50-95): 292 (value: 0.67884)
Last epoch: 300


In [5]:
# Train dcnv2-neck-fpn model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv2-yolov8-neck-fpn.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv2-yolov8-neck-fpn",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv2-yolov8-liu for results.")

Ultralytics 8.3.223 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=100data_linux.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=ultralytics/cfg/models/v8/dcnv2-yolov8-neck-fpn.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=dcnv2-yolov8-neck-fpn, nbs=64, nms=False, opset=None, optimize=False, opti

In [6]:
# Train dcnv2-neck-pan model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv2-yolov8-neck-pan.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv2-yolov8-neck-pan",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv2-yolov8-neck-pan for results.")

Ultralytics 8.3.223 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=100data_linux.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=ultralytics/cfg/models/v8/dcnv2-yolov8-neck-pan.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=dcnv2-yolov8-neck-pan, nbs=64, nms=False, opset=None, optimize=False, opti

In [7]:
# Train dcnv2-neck-full model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv2-yolov8-neck-full.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv2-yolov8-neck-full",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv2-yolov8-neck-full for results.")

Ultralytics 8.3.223 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=100data_linux.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=ultralytics/cfg/models/v8/dcnv2-yolov8-neck-full.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=dcnv2-yolov8-neck-full, nbs=64, nms=False, opset=None, optimize=False, op

In [2]:
# Train dcnv3-liu model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-liu.yaml")

if torch.cuda.is_available():
    model.model = model.model.cuda()
    print("üü¢ Model moved to CUDA:", torch.cuda.get_device_name(0))
else:
    print("‚ö†Ô∏è CUDA not available ‚Äî DCNv3 will not run on CPU!")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv3-yolov8-liu",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv3-yolov8-liu for results.")



ValueError: groups must be a positive integer

In [3]:
# Quick sanity check: ensure DCNv3 nn.Module wrapper is importable
try:
    import DCNv3  # Python wrapper class around the CUDA op
    import inspect
    try:
        sig = inspect.signature(DCNv3.__init__)
    except Exception:
        sig = None
    print("‚úÖ DCNv3 wrapper available from 'dcnv3' package.")
    if sig:
        print("DCNv3.__init__ signature:", sig)
except Exception as e:
    print("‚ùå DCNv3 wrapper not found. Please install the Python package that provides 'from dcnv3 import DCNv3'.")
    print("Hint: pip install dcnv3  # InternImage DCNv3 Python wrapper")
    print("Error:", e)


‚úÖ DCNv3 wrapper available from 'dcnv3' package.
DCNv3.__init__ signature: (*args, **kwargs)


In [1]:
# Create a permanent fix by adding library path to conda environment activation
import os
import torch

conda_env_path = "/home/james/miniconda3/envs/yolov8-dcn"
activate_script = os.path.join(conda_env_path, "etc/conda/activate.d/env_vars.sh")
torch_lib_path = os.path.join(os.path.dirname(torch.__file__), 'lib')

print("üìù Creating permanent environment variable script...")
print(f"   Script location: {activate_script}")

# Create the activate.d directory if it doesn't exist
os.makedirs(os.path.dirname(activate_script), exist_ok=True)

# Write the environment variable script
script_content = f"""#!/bin/bash
# Auto-generated: Add PyTorch lib path for DCNv3
export LD_LIBRARY_PATH="{torch_lib_path}:$LD_LIBRARY_PATH"
"""

try:
    with open(activate_script, 'w') as f:
        f.write(script_content)
    
    # Make it executable
    os.chmod(activate_script, 0o755)
    
    print("‚úÖ Environment activation script created!")
    print("\nüìã What this does:")
    print("   - Automatically sets LD_LIBRARY_PATH when you activate the conda environment")
    print("   - No need to set it manually in notebooks anymore")
    print("\nüîÑ To apply:")
    print("   1. Deactivate current environment: conda deactivate")
    print("   2. Reactivate: conda activate yolov8-dcn")
    print("   3. Restart Jupyter kernel")
    
except Exception as e:
    print(f"‚ùå Failed to create script: {e}")
    print("\nüîß Manual alternative:")
    print(f"   Create file: {activate_script}")
    print(f"   Contents: export LD_LIBRARY_PATH=\"{torch_lib_path}:$LD_LIBRARY_PATH\"")

üìù Creating permanent environment variable script...
   Script location: /home/james/miniconda3/envs/yolov8-dcn/etc/conda/activate.d/env_vars.sh
‚úÖ Environment activation script created!

üìã What this does:
   - Automatically sets LD_LIBRARY_PATH when you activate the conda environment
   - No need to set it manually in notebooks anymore

üîÑ To apply:
   1. Deactivate current environment: conda deactivate
   2. Reactivate: conda activate yolov8-dcn
   3. Restart Jupyter kernel


In [2]:
# Train dcnv3-liu model with proper device handling
from ultralytics import YOLO
import torch
import os

# Check CUDA availability first
if not torch.cuda.is_available():
    raise RuntimeError("‚ö†Ô∏è CUDA not available ‚Äî DCNv3 requires GPU!")

print(f"‚úÖ Using GPU: {torch.cuda.get_device_name(0)}")

# Build model on CPU (safe initialization for DCNv3)
model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-liu.yaml", task="detect")

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Let Ultralytics handle device placement during training
results = model.train(
    data="100data_linux.yaml",
    project=project_dir,
    name="dcnv3-yolov8-liu",
    epochs=300,
    patience=50,
    imgsz=640,
    batch=16,
    device=0,  # Specify GPU device (0 = first GPU)
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv3-yolov8-liu for results.")


‚úÖ Using GPU: NVIDIA GeForce RTX 3060




ValueError: groups must be a positive integer

## üîß Install DCNv3 Python Wrapper Manually (Terminal Commands)

The setup.py only installs the CUDA kernels but not the Python wrapper classes. Since the notebook approach keeps failing due to import issues, use these terminal commands instead:

### **Step 1: Clone InternImage and Copy Python Files**

```bash
# Activate your conda environment
conda activate yolov8-dcn

# Set compiler variables (GCC-10 for CUDA 11.5 compatibility)
export CC=/usr/bin/gcc-10
export CXX=/usr/bin/g++-10
export CUDAHOSTCXX=/usr/bin/g++-10

# Find your site-packages directory
SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
echo "Site-packages: $SITE_PACKAGES"

# Clone InternImage to temporary location
cd /tmp
rm -rf InternImage  # Clean up if exists
git clone --depth 1 https://github.com/OpenGVLab/InternImage.git

# Find DCNv3 installation location (might be in .egg)
DCNv3_LOC=$(find $SITE_PACKAGES -name "DCNv3" -type d | head -1)
PARENT_DIR=$(dirname "$DCNv3_LOC")
echo "DCNv3 location: $DCNv3_LOC"
echo "Parent directory: $PARENT_DIR"

# Copy Python wrapper files
cp -r /tmp/InternImage/detection/ops_dcnv3/functions "$PARENT_DIR/DCNv3_functions"
cp -r /tmp/InternImage/detection/ops_dcnv3/modules "$PARENT_DIR/DCNv3_modules"

echo "‚úÖ Copied Python files"
```

### **Step 2: Fix Import Statements**

```bash
# Fix imports in dcnv3.py
sed -i 's|from \.\.functions import DCNv3Function, dcnv3_core_pytorch|from DCNv3_functions.dcnv3_func import DCNv3Function, dcnv3_core_pytorch|g' \
    "$PARENT_DIR/DCNv3_modules/dcnv3.py"

# Fix imports in modules/__init__.py
sed -i 's|from \.dcnv3 import|from DCNv3_modules.dcnv3 import|g' \
    "$PARENT_DIR/DCNv3_modules/__init__.py"

# Fix imports in functions/__init__.py
sed -i 's|from \.dcnv3_func import|from DCNv3_functions.dcnv3_func import|g' \
    "$PARENT_DIR/DCNv3_functions/__init__.py"

echo "‚úÖ Fixed import statements"
```

### **Step 3: Create DCNv3/__init__.py**

```bash
# Create the main __init__.py
cat > "$DCNv3_LOC/__init__.py" << 'EOF'
# DCNv3 Package Wrapper
# Import Python wrapper classes with fixed absolute imports
from DCNv3_modules.dcnv3 import DCNv3, DCNv3_pytorch
from DCNv3_functions.dcnv3_func import DCNv3Function

__all__ = ['DCNv3', 'DCNv3_pytorch', 'DCNv3Function']
EOF

echo "‚úÖ Created DCNv3/__init__.py"
```

### **Step 4: Test the Installation**

```bash
# Test imports
python -c "from DCNv3_functions.dcnv3_func import DCNv3Function; print('‚úÖ DCNv3Function:', DCNv3Function)"
python -c "from DCNv3_modules.dcnv3 import DCNv3; print('‚úÖ DCNv3 class:', DCNv3)"
python -c "from DCNv3 import DCNv3; print('‚úÖ DCNv3 package:', DCNv3)"

echo ""
echo "üéâ DCNv3 is installed! Now restart your Jupyter kernel."
```

### **Alternative: Clean Reinstall from Scratch**

If the above doesn't work, try a complete reinstall:

```bash
conda activate yolov8-dcn

# Set compiler
export CC=/usr/bin/gcc-10
export CXX=/usr/bin/g++-10
export CUDAHOSTCXX=/usr/bin/g++-10

# Completely remove old installation
pip uninstall -y DCNv3
rm -rf $(python -c "import site; print(site.getsitepackages()[0])")/DCNv3*

# Clone and manually install
cd /tmp
rm -rf InternImage
git clone https://github.com/OpenGVLab/InternImage.git
cd InternImage/detection/ops_dcnv3

# Install with proper compiler
pip install -e . -v

# Check if Python files were installed
python -c "from DCNv3 import DCNv3; print('‚úÖ Success:', DCNv3)"
```

**After running these commands, restart your Jupyter kernel and run the first cell in the notebook!**

## üß™ Test DCNv3 Model Loading

Before training, let's verify the model loads correctly with the fixes applied.

In [2]:
# Test loading the DCNv3 model
from ultralytics import YOLO
import torch

print("Testing DCNv3 model loading...")
print(f"CUDA available: {torch.cuda.is_available()}")

try:
    model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-liu.yaml", task="detect")
    print("Model architecture built successfully")
    print(f"   Model device: {next(model.model.parameters()).device}")
    print(f"   Number of parameters: {sum(p.numel() for p in model.model.parameters()):,}")
except Exception as e:
    print(f"Model loading failed: {e}")
    import traceback
    traceback.print_exc()


Testing DCNv3 model loading...
CUDA available: True
Model architecture built successfully
   Model device: cuda:0
   Number of parameters: 2,958,942
Model architecture built successfully
   Model device: cuda:0
   Number of parameters: 2,958,942


## Summary of Fixes Applied

### Completed Fixes:

1. **DCNv3 Compilation**: Compiled with GCC-10 for CUDA 11.5 compatibility
2. **Python Wrapper Installation**: Manually installed DCNv3_functions and DCNv3_modules
3. **Import Resolution**: 
   - Fixed circular imports 
   - Created DCNv3_pkg wrapper to avoid naming conflict with .so file
   - Updated imports to use `from DCNv3_pkg import DCNv3`
4. **Channel Validation**: 
   - Fixed `DCNv3Bottleneck`: `c_ = max(1, int(c2 * e))` to prevent zero channels
   - Fixed `DCNv3C2f`: `self.c = max(1, int(c2 * e))` to prevent zero channels
5. **Model Parsing**: Added `DCNv3C2f` to `base_modules` in parse_model
6. **CUDA Initialization**: Made model initialization CUDA-aware (moves to GPU during stride computation)
7. **Channel Format Fix**: **KEY FIX** - DCNv3 expects (N, H, W, C) format but PyTorch uses (N, C, H, W)
   - Added `permute()` operations in DCNv3Conv.forward() to convert between formats
   - This fixed the "mat1 and mat2 shapes cannot be multiplied" error

### Current Status:

The model now:
- Builds the architecture successfully
- Imports DCNv3 from DCNv3_pkg correctly
- Initializes on CUDA automatically when available
- Handles channel format conversion correctly
- **Ready for training**

### Next Steps:

1. **Restart Jupyter kernel** (if not already done)
2. **Run first cell** to verify imports work
3. **Run test cell** to confirm model loads
4. **Run training cell** to start training

## Ready to Train

If the test cell above shows "Model architecture built successfully", you're ready to train!

### Training Cell

Use the cell below or modify the existing training cells (cells 12-15) to train your DCNv3 model.

In [3]:
# Train DCNv3 model - FINAL VERSION
from ultralytics import YOLO
import torch
import os

# Check CUDA availability first
if not torch.cuda.is_available():
    raise RuntimeError("CUDA not available - DCNv3 requires GPU")

print(f"Using GPU: {torch.cuda.get_device_name(0)}")

# Build model
model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-liu.yaml", task="detect")

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Train with automatic GPU placement
results = model.train(
    data="100data_linux.yaml",
    project=project_dir,
    name="dcnv3-yolov8-liu",
    epochs=300,
    patience=50,
    imgsz=640,
    batch=16,
    device=0,  # Specify GPU device (0 = first GPU)
)

print(f"Training completed. Check {project_dir}/dcnv3-yolov8-liu for results.")


Using GPU: NVIDIA GeForce RTX 3060
New https://pypi.org/project/ultralytics/8.3.226 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.223 üöÄ Python-3.10.19 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
New https://pypi.org/project/ultralytics/8.3.226 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.223 üöÄ Python-3.10.19 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=100data_linux.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0

In [4]:
# Train dcnv3-neck-fpn model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-neck-fpn.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv3-yolov8-neck-fpn",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv3-yolov8-neck-fpn for results.")

Ultralytics 8.3.223 üöÄ Python-3.10.19 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060, 11901MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=100data_linux.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=ultralytics/cfg/models/v8/dcnv3-yolov8-neck-fpn.yaml, momentum=0.937, mosaic=1.0, multi_scale=False, name=dcnv3-yolov8-neck-fpn, nbs=64, nms=False, opset=None, optimize=False, optimiz

Exception in thread Thread-21 (_pin_memory_loop):
Traceback (most recent call last):
  File "/home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/threading.py", line 953, in run
Exception in thread Thread-22 (_pin_memory_loop):
Traceback (most recent call last):
  File "/home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 51, in _pin_memory_loop
    do_one_step()
  File "/home/james/miniconda3/envs/yolov8-dcn/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 28, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_IN

[K      1/300      2.36G      4.503      6.259      4.237        114        640: 0% ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/598 6.7it/s 0.1s<1:29
[K      1/300      2.36G      4.503      6.259      4.237        114        640: 0% ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/598 6.7it/s 0.1s<1:29


RuntimeError: Pin memory thread exited unexpectedly

In [None]:
# Train dcnv3-neck-pan model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-neck-pan.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv3-yolov8-neck-pan",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv3-yolov8-neck-pan for results.")

In [None]:
# Train dcnv3-neck-full model
import os

from ultralytics import YOLO

# Pick up environment variable or default
project_dir = os.getenv("YOLO_OUTPUT", "/media/james/Lexar/YOLO_outputs")

# Use your custom YOLOv8-DCN model
model = YOLO("ultralytics/cfg/models/v8/dcnv3-yolov8-neck-full.yaml")

# Train the model
results = model.train(
    data="100data_linux.yaml",  # your dataset config
    project=project_dir,  # where runs will be stored
    name="dcnv3-yolov8-neck-full",  # experiment name
    epochs=300,  # match your baseline
    patience=50,  # early stopping
    imgsz=640,  # image size
    batch=16,  # adjust if GPU runs OOM
)

print(f"‚úÖ Training completed. Check {project_dir}/dcnv3-yolov8-neck-full for results.")