In [None]:
# Install required packages (uncomment to run)
# !pip install -r requirements.txt
# !pip install anomalib pytorch-lightning
# Optional: pillow-heif if you have HEIC images
# !pip install pillow-heif

In [None]:
# Configuration - set paths here
from pathlib import Path
import os
from dotenv import load_dotenv

# Load .env if present
load_dotenv()

# Folder containing your already-segmented/cropped nut images (change as needed)
SEGMENTED_DIR = Path(os.getenv('SEGMENTED_DIR', 'segmented_nuts'))  # default folder name
DATASET_DIR = Path(os.getenv('DATASET_DIR', 'dataset_for_anomalib'))
OUTPUT_DIR = Path(os.getenv('ANOMALIB_OUTPUT', 'anomalib_results'))

# Training parameters
TRAIN_RATIO = float(os.getenv('TRAIN_RATIO', 0.7))
BATCH_SIZE = int(os.getenv('BATCH_SIZE', 16))
IMAGE_SIZE = int(os.getenv('IMAGE_SIZE', 224))
MAX_EPOCHS = int(os.getenv('MAX_EPOCHS', 3))

print('SEGMENTED_DIR ->', SEGMENTED_DIR.resolve())
print('DATASET_DIR ->', DATASET_DIR.resolve())
print('OUTPUT_DIR ->', OUTPUT_DIR.resolve())

## Step 1 — Inspect input folder
This cell checks whether the folder contains mask files (image+mask pairs) or already-cropped images.

In [None]:
from pathlib import Path
SEGMENTED_DIR.mkdir(exist_ok=True)
images = list(SEGMENTED_DIR.glob('*.png')) + list(SEGMENTED_DIR.glob('*.jpg')) + list(SEGMENTED_DIR.glob('*.jpeg'))
masks = [p for p in images if p.stem.endswith('_mask') or p.name.lower().endswith('_mask.png')]
# Also look for separate mask files (any file with _mask in name)
mask_files = list(SEGMENTED_DIR.glob('*_mask.png')) + list(SEGMENTED_DIR.glob('*_mask.jpg'))

print(f'Found {len(images)} image files and {len(mask_files)} mask files in {SEGMENTED_DIR.name}')

if len(mask_files) > 0:
    print('Detected mask files. Assuming folder contains image+mask pairs.')
    INPUT_MODE = 'image_mask_pairs'
else:
    print('No mask files detected. Assuming folder contains cropped nut images ready for dataset split.')
    INPUT_MODE = 'cropped_images'

INPUT_MODE

## Step 2 — Build Anomalib Dataset
If your folder contains masks, we'll pair images and masks and produce cropped images. If it already contains cropped images, we'll create train/test splits directly.

In [None]:
# Import local builders
from anomalib_dataset_builder import AnomalibDatasetBuilder
from segmentation_cropper import SegmentationCropper

# Create dataset dir
DATASET_DIR.mkdir(parents=True, exist_ok=True)

if INPUT_MODE == 'image_mask_pairs':
    print('Using SegmentationCropper to produce cropped images from image+mask pairs...')
    cropper = SegmentationCropper(output_base='cropped_nuts')
    # The cropper expects Roboflow-like JSON results; we will instead pair images and masks directly here.
    # Simple pairing: for each image file MYIMAGE.png, look for MYIMAGE_mask.png and crop region of mask.
    cropped_base = Path('cropped_nuts')
    cropped_base.mkdir(parents=True, exist_ok=True)
    metadata = []
    for img_path in sorted(SEGMENTED_DIR.glob('*.*')):
        if img_path.name.lower().endswith(('_mask.png','_mask.jpg','_mask.jpeg')):
            continue
        # expected mask name
        mask_candidates = list(SEGMENTED_DIR.glob(f
)) + list(SEGMENTED_DIR.glob(f
))
        if not mask_candidates:
            # fallback: try any mask with same name + '_mask' pattern
            mask_candidates = list(SEGMENTED_DIR.glob(f
))
        if not mask_candidates:
            # No mask found - skip
            continue
        mask_path = mask_candidates[0]
        # Load images
        import cv2, numpy as np
        img = cv2.imread(str(img_path))
        mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
        if img is None or mask is None:
            continue
        # find contours and crop the bounding box around the mask
        contours, _ = cv2.findContours((mask>127).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not contours:
            continue
        x,y,w,h = cv2.boundingRect(contours[0])
        pad = 8
        x0,y0 = max(0,x-pad), max(0,y-pad)
        x1,y1 = min(img.shape[1], x+w+pad), min(img.shape[0], y+h+pad)
        crop = img[y0:y1, x0:x1]
        out_name = f"{img_path.stem}.png"
        out_path = cropped_base / out_name
        cv2.imwrite(str(out_path), crop)
        metadata.append({'source':str(img_path), 'cropped': str(out_path)})
    print(f'Created {len(metadata)} cropped images into {cropped_base}')
    # Use dataset builder on cropped_nuts
    builder = AnomalibDatasetBuilder(str(DATASET_DIR))
    counts = builder.create_splits(str(cropped_base), train_ratio=TRAIN_RATIO)
    print('Dataset splits:', counts)
else:
    print('Using provided cropped images to build dataset splits...')
    builder = AnomalibDatasetBuilder(str(DATASET_DIR))
    counts = builder.create_splits(str(SEGMENTED_DIR), train_ratio=TRAIN_RATIO)
    print('Dataset splits:', counts)

## Step 3 — Train Anomalib Model
This will run Patchcore training using the dataset we just created. Adjust `MAX_EPOCHS` and `BATCH_SIZE` above if needed.

In [None]:
from anomalib_trainer import AnomalibTrainer

trainer = AnomalibTrainer(dataset_root=str(DATASET_DIR), model_name='patchcore', backbone='wide_resnet50_2', image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, max_epochs=MAX_EPOCHS, output_dir=str(OUTPUT_DIR))
trainer.setup_model()
trainer.setup_trainer()
trainer.save_config()
dm = trainer.setup_datamodule()
trainer.train(dm)
results = trainer.test(dm)
print('Training finished. Results:', results)

## Notes
- This notebook assumes your segmented/cropped images are correctly centered on nuts.
- If training fails due to missing packages, run the pip install cell above.
- For HEIC inputs, install `pillow-heif` or convert to PNG/JPG before running.