In [None]:
import cv2
import numpy as np


In [2]:
def preprocess_image(img_path):
    # Read 16-bit TIFF
    img_16 = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)
    
    # Normalization
    img_norm = cv2.normalize(img_16, None, 0, 65535, cv2.NORM_MINMAX)
    
    # Convert to 8-bit preserving dynamic range
    img_8bit = np.uint8(img_norm/256)
    
    # Advanced noise reduction
    bilateral_filtered = cv2.bilateralFilter(img_8bit, 9, 75, 75)
    
    # Adaptive contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    clahe_img = clahe.apply(bilateral_filtered)
    
    # Background estimation and subtraction
    background = cv2.GaussianBlur(clahe_img, (101,101), 0)
    subtracted = cv2.subtract(clahe_img, background)
    
    # Dynamic thresholding
    hist = cv2.calcHist([subtracted], [0], None, [256], [0, 256])
    hist = hist / hist.sum()
    cumulative_hist = np.cumsum(hist)
    median_bin = np.searchsorted(cumulative_hist, 0.5)
    _, binary = cv2.threshold(subtracted, median_bin*1.5, 255, cv2.THRESH_BINARY)
    
    return binary, subtracted

In [10]:
import pandas as pd
from pathlib import Path
import cv2
import shutil
import yaml

def prepare_yolo_dataset(csv_path, image_dir, output_dir='yolo_dataset'):
    # Read annotations
    df = pd.read_csv(csv_path)
    
    # Create YOLO directory structure
    (Path(output_dir)/'images/train').mkdir(parents=True, exist_ok=True)
    (Path(output_dir)/'labels/train').mkdir(parents=True, exist_ok=True)
    (Path(output_dir)/'images/val').mkdir(parents=True, exist_ok=True)
    (Path(output_dir)/'labels/val').mkdir(parents=True, exist_ok=True)
    
    # Class mapping
    class_map = {'star': 0, 'streak': 1}  # Adjust based on your CSV
    
    # Process each image
    for img_name, group in df.groupby('image'):
        img_path = Path(image_dir)/img_name
        if not img_path.exists():
            continue
            
        # Split into train/val (80/20)
        split = 'train' if hash(img_name) % 5 != 0 else 'val'
        
        # Copy image (consider converting TIFF to PNG/JPG)
        dest_img = Path(output_dir)/f'images/{split}/{img_path.stem}.jpg'
        img = cv2.imread(str(img_path), cv2.IMREAD_ANYDEPTH)
        img_8bit = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
        cv2.imwrite(str(dest_img), img_8bit)
        
        # Create YOLO format labels
        label_path = Path(output_dir)/f'labels/{split}/{img_path.stem}.txt'
        with open(label_path, 'w') as f:
            for _, row in group.iterrows():
                # Convert bbox to YOLO format (normalized cx,cy,w,h)
                x_center = (row['bbox_x'] + row['bbox_width']/2) / 4500
                y_center = (row['bbox_y'] + row['bbox_height']/2) / 4500
                width = row['bbox_width'] / 4500
                height = row['bbox_height'] / 4500
                
                f.write(f"{class_map[row['object_type']]} {x_center} {y_center} {width} {height}\n")
    
    # Create dataset.yaml
    data = {
        'path': str(Path(output_dir).absolute()),
        'train': 'images/train',
        'val': 'images/val',
        'names': list(class_map.keys()),
        'nc': len(class_map)
    }
    
    with open(Path(output_dir)/'dataset.yaml', 'w') as f:
        yaml.dump(data, f)

In [15]:
from ultralytics import YOLO
import cv2
import numpy as np

class AstroYOLO:
    def __init__(self):
        self.model = None
        self.clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        
    def preprocess_image(self,img_path):
        # Read 16-bit TIFF
        img_16 = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)
        
        # Normalization
        img_norm = cv2.normalize(img_16, None, 0, 65535, cv2.NORM_MINMAX)
        
        # Convert to 8-bit preserving dynamic range
        img_8bit = np.uint8(img_norm/256)
        
        # Advanced noise reduction
        bilateral_filtered = cv2.bilateralFilter(img_8bit, 9,50, 50)
        
        # Adaptive contrast enhancement
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        clahe_img = clahe.apply(bilateral_filtered)
        
        
        # Dynamic thresholding
        hist = cv2.calcHist([clahe_img], [0], None, [256], [0, 256])
        hist = hist / hist.sum()
        cumulative_hist = np.cumsum(hist)
        median_bin = np.searchsorted(cumulative_hist, 0.5)
        _, binary = cv2.threshold(clahe_img, median_bin*2, 255, cv2.THRESH_BINARY)
        
        return binary
    def train(self, data_yaml, epochs=100, imgsz=1024):
        """Train with custom preprocessing"""
        # Load model
        self.model = YOLO('custom_yolov8.yaml')
        
        # Custom training loop
        results = self.model.train(
            data=data_yaml,
            epochs=epochs,
            imgsz=imgsz,
            batch=8,
            augment=True,
            degrees=45,
            translate=0.1,
            scale=0.5,
            mosaic=1.0,
            pretrained=True,
            optimizer='AdamW',
            lr0=0.001,
            close_mosaic=10,
            overlap_mask=False,
            rect=True,
            # Custom preprocess callback
            preprocess=self.preprocess
        )
        return results
    
    def predict_large_image(self, img_path, tile_size=1024, overlap=128):
        """Predict on large 4500x4500 images using tiling"""
        img = self.preprocess(img_path)
        h, w = img.shape[:2]
        
        # Split into tiles
        tiles = []
        for y in range(0, h, tile_size-overlap):
            for x in range(0, w, tile_size-overlap):
                tile = img[y:y+tile_size, x:x+tile_size]
                tiles.append((tile, x, y))
        
        # Process each tile
        results = []
        for tile, x_offset, y_offset in tiles:
            tile_result = self.model.predict(tile, imgsz=tile_size, conf=0.25)
            for box in tile_result[0].boxes:
                # Convert tile coordinates to original image coordinates
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                results.append([
                    x1 + x_offset,
                    y1 + y_offset,
                    x2 + x_offset,
                    y2 + y_offset,
                    box.conf.item(),
                    box.cls.item()
                ])
        
        # Apply NMS across all tiles
        return self.non_max_suppression(results)

    @staticmethod
    def non_max_suppression(boxes, iou_thresh=0.5):
        """Custom NMS for astronomical objects"""
        if len(boxes) == 0:
            return []
        
        boxes = np.array(boxes)
        x1 = boxes[:,0]
        y1 = boxes[:,1]
        x2 = boxes[:,2]
        y2 = boxes[:,3]
        scores = boxes[:,4]
        
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = scores.argsort()[::-1]
        
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])
            
            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            inter = w * h
            ovr = inter / (areas[i] + areas[order[1:]] - inter)
            
            
            inds = np.where(ovr <= iou_thresh)[0]
            order = order[inds + 1]
        
        return boxes[keep]

In [16]:
# Prepare dataset
prepare_yolo_dataset('centroids.csv', r'Datasets\Padded_Raw', 'yolo_dataset')

# Train model
astro_yolo = AstroYOLO()
results = astro_yolo.train('yolo_dataset/dataset.yaml', epochs=100, imgsz=1024)

# Save model
astro_yolo.model.export(format='onnx')  # Optional: export to ONNX

TypeError: Detect.__init__() takes from 1 to 3 positional arguments but 4 were given