In [1]:
import pandas as pd

from model2_final import main

#### Data Preprocessing and Feature Engineering

In [2]:
db_path = '../data/ORALMED_v10/Images'

df_mask = pd.read_csv('../data/ORALMED_v10/oralmedROI_v10.csv', sep=',')
df_info = pd.read_excel('../data/ORALMED_v10/oralmedDS_v10.xlsx', sheet_name='Paciente')
df_ds = pd.read_excel('../data/ORALMED_v10/oralmedDS_v10.xlsx', sheet_name='Imagen')
injury_mapping = pd.read_csv('../data/mapping.csv', sep='\t')


#### Model Training
First model training test - classification and segmentation at basic granularity level

In [3]:
main(
    mode_type = "test",                                 # ["train", "test"]
    model_type = "base",                            # ["base", "custom_roi"]
    images_dir = db_path,
    df_roi = df_mask,
    df_meta = df_ds,
    mapping_target_label = None,                    # if None, no superinjury_label applied
    output_dir = "./base_per_class_analysis_whealthy",
    label_field = "outcome_label",                  # ["injury_label", "outcome_label"]
    train_ratio = 0.7,
    val_ratio = 0.15,
    test_ratio = 0.15,
    augmentation_type = "light",                    # none = "base" or "light" or "strong"
    downsampling_flag = False,
    balance_skew = 0.8,                             # [0.0, 1.0] --> from 0.0 for perfectly balanced to 1.0 for original counts (no downsample)
    balance_max_ratio = None,                        # >= 1.0     --> enforces that after downsampling: majority/minority count <= max_ratio 
                                                    #                by capping each class at cap = floor(N_min * max_ratio)
    keep_one_bbox = True,
    selection_mode = 'largest_area',
    batch_size = 4,
    epochs = 60,
    lr = 0.001,
    score_thr = 0.00,
    mask_thr = 0.50,
    iou_thr = 0.50,
    dice_thr= 0.50,
    img_acc_score_thr = 0.00,
    num_workers = 2,
    pretrained = True,
    use_amp = True,
    optimize_metric = None,                         # "val"=None or "dice" or "iou" or "ap"
    segm_every = 10,                                #  epochs interval to compute segmentation metrics
    scheduler_type = "cosine",                      # ["cosine", "plateau"]
    sampler_alpha = 0.6,                            # smoothing factor to soften oversampling - it controls class weights in WeightRandomSampler
    max_class_weights = 3.5,                        # [2.0 - 5.0] --> controls max weight assigned to a class in loss function in CustomROI Mask-RCNN model
    dice_alpha = 0.0,                               # set to 0.0 for full Dice loss, set to 1.0 for full BCE/Focal loss. In-between values weight both.
    use_bce = False,                                # True for BCE loss for mask
    use_focal = True,                               # True for Focal loss for mask
    focal_alpha = 0.25,                             
    focal_gamma = 2.0,              
    label_smoothing = 0.05,
    dropout_alpha = 0.2,
)

[prepare_structured_dataset_df] Wrote per-image bbox counts to: base_per_class_analysis_whealthy/n_bbox_dist
Classes: {1: 'benign', 2: 'healthy', 3: 'malignant', 4: 'potentially malignant'}
{'benign': 1, 'healthy': 2, 'malignant': 3, 'potentially malignant': 4}
Saved histogram to base_per_class_analysis_whealthy/plots/case_label_distribution.png
Saved counts to base_per_class_analysis_whealthy/plots/case_label_distribution.csv
Patient db size:  1742
train size:  1219 | val size:  261 | test size:  262
images -> train 3471 | val 881 | test 772

Class weights (per label):
1: 0.165898
2: 3.242508
3: 0.426771
4: 0.164823
cuda
Base Mask-RCNN model selected...
Entering test mode...
[test] Loading checkpoint: base_per_class_analysis_whealthy/best_model.pth
GT (val):  base_per_class_analysis_whealthy/coco_gt_val.json
GT (test): base_per_class_analysis_whealthy/coco_gt_test.json

[test] Split = test
[simple_eval_limits] dets/img: 100 -> 5, score: 0.05 -> 0.0, min_size: [800] -> [512], max_size:

Inference detections 1/1:   0%|          | 0/772 [00:00<?, ?it/s]

dt saved
[simple_eval_limits] restored.
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Metrics saved at base_per_class_analysis_whealthy/test/test
[test] Finished metrics for test → written to base_per_class_analysis_whealthy/test/test/


#### Inference - Test Set

#### Metrics & Visualization

In [3]:
from error_report import save_detection_error_report
save_detection_error_report(
    gt_json=str("./base_cosine_light_downsampled_outcome_label/coco_gt_val.json"),
    dt_json=str("./base_cosine_light_downsampled_outcome_label/coco_dt_bbox_val.json"),
    out_csv=str("./base_cosine_light_downsampled_outcome_label/val_errors_0p50.csv"),
    iou_thresh=0.50
)

'./base_cosine_light_downsampled_outcome_label/val_errors_0p50.csv'

In [4]:
from error_visualizer import visualize_errors_from_csv
counts = visualize_errors_from_csv(
    csv_path=str("./base_cosine_light_downsampled_outcome_label/val_errors_0p50.csv"),
    images_root=db_path,
    out_dir=str("./base_cosine_light_downsampled_outcome_label/val_error_viz"),
    max_images_per_type=20,
    resize_max=1600,
    style_scale=1.2,
    line_thickness=10,
    font_size=20,
    font_path="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
)
print("Saved:", counts)

Saved: {'TP': 6, 'FP': 20, 'FN': 20}
