In [None]:
# -*- coding: utf-8 -*-
"""
bccd.ipynb: Automatically generated by Colab - Customized!
Original file is located at: https://colab.research.google.com/drive/1vme1jryl4LhO9JVjGSO8CddH3bFhjO-w
"""
# Install dataset
!git clone 'https://github.com/Shenggan/BCCD_Dataset.git'

Cloning into 'BCCD_Dataset'...
remote: Enumerating objects: 800, done.[K
remote: Total 800 (delta 0), reused 0 (delta 0), pack-reused 800 (from 1)[K
Receiving objects: 100% (800/800), 7.39 MiB | 16.03 MiB/s, done.
Resolving deltas: 100% (378/378), done.


In [None]:
# Extraction of data labels from .xml file to dataframe
import shutil
import pandas as pd
import os, sys, random
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from sklearn import preprocessing, model_selection
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches

annotations = sorted(glob('/content/BCCD_Dataset/BCCD/Annotations/*.xml'))

df = []
cnt = 0
for file in annotations:
  prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
  filename = str(cnt) + '.jpg'
  row = []
  parsedXML = ET.parse(file)
  for node in parsedXML.getroot().iter('object'):
    blood_cells = node.find('name').text
    xmin = int(node.find('bndbox/xmin').text)
    xmax = int(node.find('bndbox/xmax').text)
    ymin = int(node.find('bndbox/ymin').text)
    ymax = int(node.find('bndbox/ymax').text)

    row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
    df.append(row)
  cnt += 1

data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])

data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('/content/blood_cell_detection.csv', index=False)

img_width = 640
img_height = 480

def width(df):
  return int(df.xmax - df.xmin)
def height(df):
  return int(df.ymax - df.ymin)
def x_center(df):
  return int(df.xmin + (df.width/2))
def y_center(df):
  return int(df.ymin + (df.height/2))
def w_norm(df):
  return df/img_width
def h_norm(df):
  return df/img_height

df = pd.read_csv('/content/blood_cell_detection.csv')

le = preprocessing.LabelEncoder()
le.fit(df['cell_type'])
print(le.classes_)
labels = le.transform(df['cell_type'])
df['labels'] = labels

df['width'] = df.apply(width, axis=1)
df['height'] = df.apply(height, axis=1)

df['x_center'] = df.apply(x_center, axis=1)
df['y_center'] = df.apply(y_center, axis=1)

df['x_center_norm'] = df['x_center'].apply(w_norm)
df['width_norm'] = df['width'].apply(w_norm)

df['y_center_norm'] = df['y_center'].apply(h_norm)
df['height_norm'] = df['height'].apply(h_norm)


['Platelets' 'RBC' 'WBC']
['Platelets' 'RBC' 'WBC']


In [None]:
def segregate_data(df, img_path, label_path, train_img_path, train_label_path):
  filenames = []
  for filename in df.filename:
    filenames.append(filename)
  filenames = set(filenames)

  for filename in filenames:
    yolo_list = []

    for _,row in df[df.filename == filename].iterrows():
      yolo_list.append([row.labels, row.x_center_norm, row.y_center_norm, row.width_norm, row.height_norm])

    yolo_list = np.array(yolo_list)
    txt_filename = os.path.join(train_label_path, str(row.prev_filename.split('.')[0]) + ".txt")
    np.savetxt(txt_filename, yolo_list, fmt=["%d", "%f", "%f", "%f", "%f"])
    shutil.copyfile(os.path.join(img_path, row.prev_filename), os.path.join(train_img_path, row.prev_filename))

In [None]:
import os
import pandas as pd
from sklearn.model_selection import KFold
# === K-Fold Cross Validation Integration ===

!git clone 'https://github.com/ultralytics/yolov5.git'
!pip install -qr '/content/yolov5/requirements.txt'  # install dependencies

# Ensure DataFrame has enough data
if len(df) < 2:
    raise ValueError(f"Insufficient data: {len(df)} samples found. At least 2 are required.")

# Disable wandb
os.environ['WANDB_MODE'] = 'disabled'

# shuffle=False, random_state=None -> used
# shuffle=True, random_state=42 -> NOT used
kf = KFold(n_splits=5, shuffle=False, random_state=None)
fold_results = []

for fold, (train_index, valid_index) in enumerate(kf.split(df)):
    print(f"\n=== Fold {fold + 1} ===")

    # Eğitim ve doğrulama veri setlerini ayırma
    train_df = df.iloc[train_index]
    valid_df = df.iloc[valid_index]

    # Eğitim ve doğrulama dosyalarını hazırlama
    train_img_path = f"/content/bcc/images/train/fold_{fold}"
    valid_img_path = f"/content/bcc/images/valid/fold_{fold}"
    train_label_path = f"/content/bcc/labels/train/fold_{fold}"
    valid_label_path = f"/content/bcc/labels/valid/fold_{fold}"

    # Klasör oluşturma
    os.makedirs(train_img_path, exist_ok=True)
    os.makedirs(valid_img_path, exist_ok=True)
    os.makedirs(train_label_path, exist_ok=True)
    os.makedirs(valid_label_path, exist_ok=True)

    # Veriyi klasörlere ayırma
    segregate_data(train_df, "/content/BCCD_Dataset/BCCD/JPEGImages/",
                   "/content/BCCD_Dataset/BCCD/Annotations/",
                   train_img_path, train_label_path)
    segregate_data(valid_df, "/content/BCCD_Dataset/BCCD/JPEGImages/",
                   "/content/BCCD_Dataset/BCCD/Annotations/",
                   valid_img_path, valid_label_path)

    # YOLOv5 için eğitim dosyasını oluşturma
    yolo_data_yaml = f'bcc_fold_{fold}.yaml'
    with open(yolo_data_yaml, 'w') as f:
        f.write(f"train: {train_img_path}\n")
        f.write(f"val: {valid_img_path}\n\n")
        f.write("nc: 3\n")  # Sınıf sayısı
        f.write("names: ['Platelets', 'RBC', 'WBC']\n")

    # YOLOv5 Modelini Eğitme
    print(f"\nTraining starts (Fold {fold + 1})...")
    !python yolov5/train.py --img 640 --batch 16 --epochs 100 --data {yolo_data_yaml} --cfg /content/yolov5/models/yolov5l.yaml --name BCCM_fold_{fold}

    # Fold sonuçlarını değerlendirme
    print(f"\nTraining completed for Fold {fold + 1}.")

    # Eğitim sonuçlarını okuma
    results_file = f'yolov5/runs/train/BCCM_fold_{fold}/results.csv'
    print(os.listdir(f'yolov5/runs/train/BCCM_fold_{fold}/'))

    file_exists = os.path.exists # config

    if file_exists(results_file):
        results = pd.read_csv(results_file)
        print(f"Columns in Fold {fold + 1}: {results.columns.tolist()}")

        results.columns = results.columns.str.strip()
        last_epoch_results = results.iloc[-1]

        # Column names from results.csv
        precision = last_epoch_results.get("metrics/precision", None)
        recall = last_epoch_results.get("metrics/recall", None)

        mAP50 = last_epoch_results.get("metrics/mAP_0.5", None)
        mAP5095 = last_epoch_results.get("metrics/mAP_0.5:0.95", None)

        if None not in [precision, recall, mAP50, mAP5095]:
          fold_results.append({
              'fold': fold + 1,
              'precision': precision,
              'recall': recall,
              'mAP50': mAP50,
              'mAP50-95': mAP5095
          })
          print(f"Fold {fold + 1} - Precision: {precision}, Recall: {recall}, mAP@0.5: {mAP50}, mAP@0.5:0.95: {mAP5095}")
        else:
          print(f"Some metrics are missing in Fold {fold + 1} results.")
    else:
      print(f"Results file not found for Fold {fold + 1}.")

# Calculate and display averages
if fold_results:
    results_df = pd.DataFrame(fold_results)
    avg_precision = results_df['precision'].mean()
    avg_recall = results_df['recall'].mean()
    avg_map50 = results_df['mAP50'].mean()
    avg_map5095 = results_df['mAP50-95'].mean()

    print("\n=== Final Cross-Validation Results ===")
    print(f"Overall Average Precision: {avg_precision:.4f}")
    print(f"Overall Average Recall: {avg_recall:.4f}")
    print(f"Overall Average mAP@0.5: {avg_map50:.4f}")
    print(f"Overall Average mAP@0.5:0.95: {avg_map5095:.4f}")
else:
    print("No results available for averages.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
      86/99      10.5G     0.0306     0.1421   0.001226        169        640: 100% 18/18 [00:02<00:00,  6.72it/s]
                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 3/3 [00:00<00:00,  3.64it/s]
                   all         86        977      0.778      0.926 