# Dataset

In [1]:
import os
import json
import shutil
import cv2


def clean_coco_json(json_path, images_dir, output_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    existing_images = set(os.listdir(images_dir))

    clean_images = []
    clean_annotations = []

    existing_image_ids = set()

    for image in data['images']:
        if image['file_name'] in existing_images:
            clean_images.append(image)
            existing_image_ids.add(image['id'])

    for annotation in data['annotations']:
        if annotation['image_id'] in existing_image_ids:
            clean_annotations.append(annotation)

    data['images'] = clean_images
    data['annotations'] = clean_annotations

    with open(output_path, 'w') as f:
        json.dump(data, f, indent=4)

    print(f"Очищений JSON файл збережено за адресою: {output_path}")


clean_coco_json(
    json_path='/kaggle/input/livecell/livecell_coco_test.json',
    images_dir='/kaggle/input/livecell/images/images/livecell_test_images',
    output_path='./livecell_coco_test.json'
)

Очищений JSON файл збережено за адресою: ./livecell_coco_test.json


In [2]:
image_dir_train = '/kaggle/input/livecell/images/images/livecell_train_val_images'
image_dir_val = '/kaggle/input/livecell/images/images/livecell_train_val_images'
image_dir_test = '/kaggle/input/livecell/images/images/livecell_test_images'

coco_annotation_file_train = '/kaggle/input/livecell/livecell_coco_train.json'
coco_annotation_file_val = '/kaggle/input/livecell/livecell_coco_val.json'
coco_annotation_file_test = '/kaggle/working/livecell_coco_test.json'

output_label_dir_train = 'dataset/labels/train'
output_label_dir_val = 'dataset/labels/val'
output_label_dir_test = 'test_dataset/test/labels'

output_image_dir_train = 'dataset/images/train'
output_image_dir_val = 'dataset/images/val'
output_image_dir_test = 'test_dataset/test/images'

output_file = 'dataset/data.yaml'
categories = ['cell']
target_img_size = (512, 512)

In [3]:
import os
import json
from pathlib import Path


def convert_coco_to_yolo(coco_annotation_file, output_label_dir, categories, target_img_size=None):
    """
    Convert COCO annotations to YOLO format.

    Parameters:
    - coco_annotation_file: Path to COCO annotations JSON file
    - output_label_dir: Directory to save YOLO formatted labels
    - categories: List of categories to include
    - target_img_size: Tuple (width, height) to resize the images, default is None (no resize)
    """

    with open(coco_annotation_file) as f:
        coco_data = json.load(f)

    category_map = {cat['id']: i for i, cat in enumerate(coco_data['categories']) if cat['name'] in categories}
    os.makedirs(output_label_dir, exist_ok=True)

    for img in coco_data['images']:
        img_id = img['id']
        img_filename = img['file_name']
        img_width, img_height = img['width'], img['height']

        if target_img_size:
            target_width, target_height = target_img_size
            width_scale = target_width / img_width
            height_scale = target_height / img_height
        else:
            width_scale = height_scale = 1

        label_output_path = os.path.join(output_label_dir, Path(img_filename).stem + '.txt')
        with open(label_output_path, 'w') as label_file:
            for ann in coco_data['annotations']:
                if ann['image_id'] == img_id and ann['category_id'] in category_map:
                    x, y, width, height = ann['bbox']
                    x_center = (x + width / 2) * width_scale / target_width
                    y_center = (y + height / 2) * height_scale / target_height
                    width *= width_scale / target_width
                    height *= height_scale / target_height
                    class_id = category_map[ann['category_id']]
                    label_file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


convert_coco_to_yolo(coco_annotation_file_train, output_label_dir_train, categories, target_img_size)
convert_coco_to_yolo(coco_annotation_file_val, output_label_dir_val, categories, target_img_size)
convert_coco_to_yolo(coco_annotation_file_test, output_label_dir_test, categories, target_img_size)

In [4]:
def create_test_yaml_file():
    yaml_content = """
path: /kaggle/working/test_dataset/test
train: images
val: images
test: images
nc: 1  # Кількість класів
names: ['cell']
"""
    yaml_path = "/kaggle/working/test_dataset/test/dataset.yaml"
    with open(yaml_path, "w") as file:
        file.write(yaml_content)
    return yaml_path

create_test_yaml_file()

'/kaggle/working/test_dataset/test/dataset.yaml'

In [5]:
def copy_and_resize_images(image_dir, label_dir, output_image_dir, target_img_size):
    os.makedirs(output_image_dir, exist_ok=True)
    label_files = [f for f in os.listdir(label_dir) if f.endswith('.txt')]

    for label_file in label_files:
        image_file = label_file.replace('.txt', '.tif')
        src_image_path = os.path.join(image_dir, image_file)
        dst_image_path = os.path.join(output_image_dir, image_file)

        if os.path.exists(src_image_path):
            img = cv2.imread(src_image_path)
            resized_img = cv2.resize(img, target_img_size)
            cv2.imwrite(dst_image_path.replace('.tif', '.jpg'), resized_img)


copy_and_resize_images(image_dir_train, output_label_dir_train, output_image_dir_train, target_img_size)
copy_and_resize_images(image_dir_val, output_label_dir_val, output_image_dir_val, target_img_size)
copy_and_resize_images(image_dir_test, output_label_dir_test, output_image_dir_test, target_img_size)

In [6]:
import os
import yaml

def create_data_yaml(train_images_dir, val_images_dir, test_images_dir, class_names, output_file):
    data = {
        'train': train_images_dir,
        'val': val_images_dir,
        'test': test_images_dir,
        'nc': len(class_names),
        'names': class_names
    }

    with open(output_file, 'w') as f:
        yaml.dump(data, f, default_flow_style=False)


create_data_yaml('/kaggle/working/dataset/images/train', 
                 '/kaggle/working/dataset/images/val', 
                 '/kaggle/working/dataset/images/test', categories, output_file)

print(f"data.yaml created at {output_file}")


data.yaml created at dataset/data.yaml


# Model

In [7]:
import shutil
import os
import sys
from colorama import Fore
import torch


    
class SetupPipline:
    def __init__(self, display: bool = True):
        self.pycocotools = self.__pycocotools()
        self.ultralytics = self.__ultralytics()
        torch.backends.cudnn.benchmark = True
        
    @staticmethod
    def __ultralytics() -> str:
        sys.path.append("/kaggle/input/hubmap-tools-ultralytics-and-pycocotools/ultralytics/ultralytics") 
        return "successfully"
        
    @staticmethod
    def __pycocotools() -> str:
        if not os.path.exists("/kaggle/working/packages"):
            shutil.copytree("/kaggle/input/hubmap-tools-ultralytics-and-pycocotools/pycocotools/pycocotools", "/kaggle/working/packages")
            os.chdir("/kaggle/working/packages/pycocotools-2.0.6/")
            os.system("python setup.py install")
            os.system("pip install . --no-index --find-links /kaggle/working/packages/")
            os.chdir("/kaggle/working")
            return "successfully"
    
    def display(self) -> None:
        print(Fore.GREEN+f"\nPycocotools was installed {self.pycocotools}")
        print(f"Ultralytics was installed {self.ultralytics}"+Fore.WHITE)

In [8]:
pipline = SetupPipline()

!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()
!!

        ********************************************************************************
        Please avoid running ``setup.py`` and ``easy_install``.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://github.com/pypa/setuptools/issues/917 for details.
        ********************************************************************************

!!
  self.initialize_options()
[2;36m[06/26/24 09:26:52][0m[2;36m [0m[1;31mERROR   [0m listing git files failed - pretending     ]8;id=98965

Compiling pycocotools/_mask.pyx because it changed.
[1/1] Cythonizing pycocotools/_mask.pyx


./common/maskApi.c: In function 'rleToBbox':
  151 |     uint h, w, xs, ys, xe, ye, xp, cc; siz j, m;
      |                                ^~
./common/maskApi.c: In function 'rleFrPoly':
  197 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
      |   ^~~
./common/maskApi.c:197:54: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'for'
  197 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
      |                                                      ^
  198 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
      |   ^~~
./common/maskApi.c:198:54: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'for'
  198 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
      |                                                      ^
./common/maskApi.c: In function 'rleToString':
  243 |       if(more) c |= 0x20; c+=48; s[p++]=c;
      |       ^~


Looking in links: /kaggle/working/packages/
Processing /kaggle/working/packages/pycocotools-2.0.6
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (pyproject.toml): started
  Building wheel for pycocotools (pyproject.toml): finished with status 'done'
  Created wheel for pycocotools: filename=pycocotools-2.0.6-cp310-cp310-linux_x86_64.whl size=109429 sha256=dedd8b2234da05f5b31ce33268238550fea68251927d8cc697b14ba9c1a22a50
  Stored in directory: /root/.cache/pip/wheels/b7/83/32/99474500256e64154dfc568319411b6ff49e96e50f30d9474f
Successfully built pycocotools
Installing collected packages: pycocotools
  Attempting uninstall: pyco

In [9]:
pipline.display()

[32m
Pycocotools was installed successfully
Ultralytics was installed successfully[37m


In [10]:
from pycocotools import _mask as coco_mask 
from ultralytics import YOLO

In [11]:
from ultralytics import YOLO
import torch
import os

In [None]:
from ultralytics import YOLO
import torch
import math

def main():
    model = YOLO("yolov8x")

    model.train(
        project="live-cell",
        name="yolov8x",

        deterministic=True,
        seed=43,

        data="/kaggle/working/dataset/data.yaml",
        save=True,
        save_period=5,
        pretrained=True,
        imgsz=512,

        epochs=50,
        batch=8,
        workers=8,
        val=True,

        lr0=0.01,
        patience=30,
        optimizer="AdamW",
        momentum=0.9,
        weight_decay=0.01,
        close_mosaic=3,

        amp=True,
        cache=True,
    )
    
    return model

model = main()


Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt to yolov8x.pt...
100%|██████████| 131M/131M [00:01<00:00, 95.4MB/s] 
New https://pypi.org/project/ultralytics/8.2.42 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.120 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla P100-PCIE-16GB, 16276MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=/kaggle/working/dataset/data.yaml, epochs=50, patience=30, batch=8, imgsz=512, save=True, save_period=5, cache=True, device=None, workers=8, project=live-cell, name=yolov8x, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=43, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=3, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...
Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to yolov8n.pt...
100%|██████████| 6.23M/6.23M [00:00<00:00, 22.5MB/s]
[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mScanning /kaggle/working/dataset/labels/train... 3188 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3188/3188 [00:07<00:00, 454.18it/s]
[34m[1mtrain: [0mNew cache created: /kaggle/working/dataset/labels/train.cache
[34m[1mtrain: [0mCaching images (2.3GB True): 100%|██████████| 3188/3188 [00:02<00:00, 1222.07it/s]
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))
  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/dataset/labels/val... 569 images, 0 backgrounds, 0 corrupt: 100%|██████████| 569/569 [00:01<00:00, 324.91it/s]
[34m[1mval: [0mNew cache cre

In [None]:
from ultralytics import YOLO

def evaluate_model_on_test_set():
    best_model_path = "/kaggle/working/live-cell/yolov8x/weights/best.pt"
    model = YOLO(best_model_path)

    test_data_path = "/kaggle/working/test_dataset/test/dataset.yaml"
    
    results = model.val(data=test_data_path)
    print(results.results_dict)

evaluate_model_on_test_set()


In [None]:
import os
import cv2
import matplotlib.pyplot as plt
from ultralytics import YOLO

def predict_and_show_images(model, image_paths, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    for img_path in image_paths:
        img = cv2.imread(img_path)
        results = model(img)
        
        for result in results:
            boxes = result.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
                
        output_path = os.path.join(output_dir, os.path.basename(img_path))
        cv2.imwrite(output_path, img)
        
        plt.figure(figsize=(10, 10))
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()

In [None]:
predict_and_show_images(model, ['/kaggle/working/test_dataset/test/images/SHSY5Y_Phase_A10_1_00d04h00m_3.jpg',
                                '/kaggle/working/test_dataset/test/images/SkBr3_Phase_G3_1_03d04h00m_3.jpg'],
                       'predict')

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO

def segment_object_within_bbox(img, bbox):
    x1, y1, x2, y2 = bbox
    obj = img[y1:y2, x1:x2]
    
    gray = cv2.cvtColor(obj, cv2.COLOR_BGR2GRAY)
    
    _, mask = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    object_contour_mask = np.zeros_like(mask)
    cv2.drawContours(object_contour_mask, contours, -1, (255), thickness=cv2.FILLED)
    
    mask = cv2.bitwise_and(mask, object_contour_mask)
    
    color_mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
    
    segmented = cv2.bitwise_and(obj, color_mask)
    
    img[y1:y2, x1:x2] = segmented


def predict_and_segment_images(model, image_paths, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    for img_path in image_paths:
        img = cv2.imread(img_path)
        results = model(img)
        
        for result in results:
            boxes = result.boxes
            
            if not boxes:
                print(f"No boxes found for image {img_path}")
                continue
            
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                segment_object_within_bbox(img, (x1, y1, x2, y2))
                
                conf = box.conf[0]
                cls = int(box.cls[0])
                
                '''cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
                label = f'{model.names[cls]} {conf:.2f}'
                cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)'''
                
        
        output_path = os.path.join(output_dir, os.path.basename(img_path))
        cv2.imwrite(output_path, img)
        
        plt.figure(figsize=(10, 10))
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()


best_model_path = "/kaggle/working/live-cell/yolov8x/weights/best.pt"
model = YOLO(best_model_path)
        
predict_and_segment_images(model, ['/kaggle/working/test_dataset/test/images/SHSY5Y_Phase_A10_1_00d04h00m_3.jpg',
                                   '/kaggle/working/test_dataset/test/images/SkBr3_Phase_G3_1_03d04h00m_3.jpg'],
                           'predict-seg')
