In [1]:
import os
import pydicom
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from ultralytics import YOLO
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------------------------------

MODEL_PATH = './yolov8m_med_final/final_best_model/weights/best.pt'
TEST_DATA_DIR = Path('./datatest')
OUTPUT_DIR = Path('./test_results')
DEFAULT_WINDOW_CENTER = 40 # общепринятые
DEFAULT_WINDOW_WIDTH = 400

In [2]:
def apply_windowing(dcm_image, window_center, window_width):
    pixel_array = dcm_image.pixel_array
    if 'RescaleSlope' in dcm_image and 'RescaleIntercept' in dcm_image:
        pixel_array = pixel_array * float(dcm_image.RescaleSlope) + float(dcm_image.RescaleIntercept)
    min_val = window_center - window_width / 2
    max_val = window_center + window_width / 2
    windowed_image = np.clip(pixel_array, min_val, max_val)
    normalized_image = ((windowed_image - min_val) / (max_val - min_val)) * 255
    return normalized_image.astype(np.uint8)

def main():
    output_pathologies_dir = OUTPUT_DIR / 'detected_pathologies'
    output_pathologies_dir.mkdir(parents=True, exist_ok=True)
    model = YOLO(MODEL_PATH)
    dcm_files = list(TEST_DATA_DIR.glob('**/*.dcm'))
    print(f"Найдено {len(dcm_files)} изображений для тестирования.")

    results_list = []

    for dcm_path in tqdm(dcm_files, desc="Обработка изображений"):
        try:
            actual_class = dcm_path.parent.name
            dcm = pydicom.dcmread(dcm_path)
            uid = dcm.SOPInstanceUID

            window_center = dcm.get('WindowCenter', DEFAULT_WINDOW_CENTER)
            window_width = dcm.get('WindowWidth', DEFAULT_WINDOW_WIDTH)
            if isinstance(window_center, pydicom.multival.MultiValue): window_center = window_center[0]
            if isinstance(window_width, pydicom.multival.MultiValue): window_width = window_width[0]

            processed_image_8bit = apply_windowing(dcm, window_center, window_width)
            bgr_image = cv2.cvtColor(processed_image_8bit, cv2.COLOR_GRAY2BGR)

            results = model.predict(bgr_image, verbose=False)
            
            # --- логика определения класса и УВЕРЕННОСТИ для ROC-AUC ---
            predicted_class = 'normal'
            confidence_score = 0.0 # уверенность по умолчанию для "нормы"
            
            if results[0].masks is not None and len(results[0].masks) > 0:
                predicted_class = 'pathology'
                # берем максимальную уверенность, если найдено несколько объектов
                confidence_score = results[0].boxes.conf.max().item()

            results_list.append({
                'uid': uid,
                'actual_class': actual_class,
                'predicted_class': predicted_class,
                'confidence_score': confidence_score
            })
            
            # --- сохраняем изображения только, если найдена патология ---
            if predicted_class == 'pathology':
                visualized_image = results[0].plot()
                output_image_path = output_pathologies_dir / f'{uid}_{actual_class}_pred_pathology.png'
                cv2.imwrite(str(output_image_path), visualized_image)

        except Exception as e:
            print(f"\nВ файле {dcm_path} ошибка: {e}")
        
    df = pd.DataFrame(results_list)
    csv_path = OUTPUT_DIR / 'results.csv'
    excel_path = OUTPUT_DIR / 'results.xlsx'
    df.to_csv(csv_path, index=False)
    df.to_excel(excel_path, index=False)
    
    print("\n--- Оценка производительности модели (на основе порога 0.25 по умолчанию) ---")
    y_true = df['actual_class']
    y_pred = df['predicted_class']
    print(f"Общая точность (Accuracy): {accuracy_score(y_true, y_pred):.2%}")
    print("\nМатрица ошибок (Confusion Matrix):")
    cm = confusion_matrix(y_true, y_pred, labels=['normal', 'pathology'])
    print(pd.DataFrame(cm, index=['Actual Normal', 'Actual Pathology'], columns=['Predicted Normal', 'Predicted Pathology']))
    
    y_true = df['actual_class']
    y_pred = df['predicted_class']
    labels = ['normal', 'pathology']
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=labels, yticklabels=labels,
                annot_kws={"size": 16})
        
    plt.title('Матрица ошибок (Confusion Matrix)', fontsize=16)
    plt.ylabel('Истинный класс (True Label)', fontsize=12)
    plt.xlabel('Предсказанный класс (Predicted Label)', fontsize=12)
    plt.tight_layout()
    
    confusion_matrix_path = OUTPUT_DIR / 'confusion_matrix.png'
    plt.savefig(confusion_matrix_path)
    plt.close()

    #roc_auc
    print("\n--- Расчет ROC-AUC ---")
    y_true_binary = y_true.map({'normal': 0, 'pathology': 1})
    y_scores = df['confidence_score']
    
    fpr, tpr, _ = roc_curve(y_true_binary, y_scores)
    roc_auc = auc(fpr, tpr)
    
    print(f"ROC-AUC: {roc_auc:.4f}")

    # --- ROC-кривая ---
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    roc_curve_path = OUTPUT_DIR / 'roc_curve.png'
    plt.savefig(roc_curve_path)
    plt.close()

In [3]:
main()

Найдено 100 изображений для тестирования.


Обработка изображений:   0%|          | 0/100 [00:00<?, ?it/s]

  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)



--- Оценка производительности модели (на основе порога 0.25 по умолчанию) ---
Общая точность (Accuracy): 77.00%

Матрица ошибок (Confusion Matrix):
                  Predicted Normal  Predicted Pathology
Actual Normal                   49                    1
Actual Pathology                22                   28

--- Расчет ROC-AUC ---
ROC-AUC: 0.7712
