In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **MODEL**

## **UTILS**

### **UTILS**

In [None]:
import numpy as np
from PIL import Image


def cvtColor(image):
    if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
        return image
    else:
        image = image.convert('RGB')
        return image


def resize_image(image, size, letterbox_image):
    iw, ih  = image.size
    w, h    = size
    if letterbox_image:
        scale   = min(w/iw, h/ih)
        nw      = int(iw*scale)
        nh      = int(ih*scale)

        image   = image.resize((nw,nh), Image.BICUBIC)
        new_image = Image.new('RGB', size, (128,128,128))
        new_image.paste(image, ((w-nw)//2, (h-nh)//2))
    else:
        new_image = image.resize((w, h), Image.BICUBIC)
    return new_image

def get_classes(classes_path):
    with open(classes_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names, len(class_names)

def preprocess_input(image):
    image /= 255.0
    image -= np.array([0.485, 0.456, 0.406])
    image /= np.array([0.229, 0.224, 0.225])
    return image


def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def show_config(**kwargs):
    print('Configurations:')
    print('-' * 70)
    print('|%25s | %40s|' % ('keys', 'values'))
    print('-' * 70)
    for key, value in kwargs.items():
        print('|%25s | %40s|' % (str(key), str(value)))
    print('-' * 70)

### **UTILS_BBOX**

In [None]:
import numpy as np
import torch
from torchvision.ops import nms, boxes

def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image):

    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
    input_shape = np.array(input_shape)
    image_shape = np.array(image_shape)

    if letterbox_image:

        new_shape = np.round(image_shape * np.min(input_shape/image_shape))
        offset  = (input_shape - new_shape)/2./input_shape
        scale   = input_shape/new_shape

        box_yx  = (box_yx - offset) * scale
        box_hw *= scale

    box_mins    = box_yx - (box_hw / 2.)
    box_maxes   = box_yx + (box_hw / 2.)
    boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
    boxes *= np.concatenate([image_shape, image_shape], axis=-1)
    return boxes

def decode_outputs(outputs, input_shape):
    grids   = []
    strides = []
    hw      = [x.shape[-2:] for x in outputs]

    outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1)

    outputs[:, :, 4:] = torch.sigmoid(outputs[:, :, 4:])
    for h, w in hw:

        grid_y, grid_x  = torch.meshgrid([torch.arange(h), torch.arange(w)])

        grid            = torch.stack((grid_x, grid_y), 2).view(1, -1, 2)
        shape           = grid.shape[:2]

        grids.append(grid)
        strides.append(torch.full((shape[0], shape[1], 1), input_shape[0] / h))

    grids               = torch.cat(grids, dim=1).type(outputs.type())
    strides             = torch.cat(strides, dim=1).type(outputs.type())

    outputs[..., :2]    = (outputs[..., :2] + grids) * strides
    outputs[..., 2:4]   = torch.exp(outputs[..., 2:4]) * strides

    outputs[..., [0,2]] = outputs[..., [0,2]] / input_shape[1]
    outputs[..., [1,3]] = outputs[..., [1,3]] / input_shape[0]
    return outputs


def non_max_suppression(prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4):

    box_corner          = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
    prediction[:, :, :4] = box_corner[:, :, :4]

    output = [None for _ in range(len(prediction))]

    for i, image_pred in enumerate(prediction):

        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)

        conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()

        if not image_pred.size(0):
            continue

        detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
        detections = detections[conf_mask]

        nms_out_index = boxes.batched_nms(
            detections[:, :4],
            detections[:, 4] * detections[:, 5],
            detections[:, 6],
            nms_thres,
        )

        output[i]   = detections[nms_out_index]


        if output[i] is not None:
            output[i]           = output[i].cpu().numpy()
            box_xy, box_wh      = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
            output[i][:, :4]    = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
    return output


###**UTILS_MAP**

In [None]:
import glob
import json
import math
import operator
import os
import shutil
import sys
from google.colab.patches import cv2_imshow

try:
    from pycocotools.coco import COCO
    from pycocotools.cocoeval import COCOeval
except:
    pass
import cv2
import matplotlib

matplotlib.use('Agg')
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

'''
    0,0 ------> x (width)
     |
     |  (Left,Top)
     |      *_________
     |      |         |
            |         |
     y      |_________|
  (height)            *
                (Right,Bottom)
'''


def log_average_miss_rate(precision, fp_cumsum, num_images):
    """
        log-average miss rate:
            Calculated by averaging miss rates at 9 evenly spaced FPPI points
            between 10e-2 and 10e0, in log-space.

        output:
                lamr | log-average miss rate
                mr | miss rate
                fppi | false positives per image

        references:
            [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
               State of the Art." Pattern Analysis and Machine Intelligence, IEEE
               Transactions on 34.4 (2012): 743 - 761.
    """

    if precision.size == 0:
        lamr = 0
        mr = 1
        fppi = 0
        return lamr, mr, fppi

    fppi = fp_cumsum / float(num_images)
    mr = (1 - precision)

    fppi_tmp = np.insert(fppi, 0, -1.0)
    mr_tmp = np.insert(mr, 0, 1.0)

    ref = np.logspace(-2.0, 0.0, num=9)
    for i, ref_i in enumerate(ref):
        j = np.where(fppi_tmp <= ref_i)[-1][-1]
        ref[i] = mr_tmp[j]

    lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))

    return lamr, mr, fppi


"""
 throw error and exit
"""


def error(msg):
    print(msg)
    sys.exit(0)


"""
 check if the number is a float between 0.0 and 1.0
"""


# def is_float_between_0_and_1(value):
#     try:
#         val = float(value)
#         if val > 0.0 and val < 1.0:
#             return True
#         else:
#             return False
#     except ValueError:
#         return False


"""
 Calculate the AP given the recall and precision array
    1st) We compute a version of the measured precision/recall curve with
         precision monotonically decreasing
    2nd) We compute the AP as the area under this curve by numerical integration.
"""


def voc_ap(rec, prec):
    """
    --- Official matlab code VOC2012---
    mrec=[0 ; rec ; 1];
    mpre=[0 ; prec ; 0];
    for i=numel(mpre)-1:-1:1
            mpre(i)=max(mpre(i),mpre(i+1));
    end
    i=find(mrec(2:end)~=mrec(1:end-1))+1;
    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
    """
    rec.insert(0, 0.0)  # insert 0.0 at begining of list
    rec.append(1.0)  # insert 1.0 at end of list
    mrec = rec[:]
    prec.insert(0, 0.0)  # insert 0.0 at begining of list
    prec.append(0.0)  # insert 0.0 at end of list
    mpre = prec[:]
    """
     This part makes the precision monotonically decreasing
        (goes from the end to the beginning)
        matlab: for i=numel(mpre)-1:-1:1
                    mpre(i)=max(mpre(i),mpre(i+1));
    """
    for i in range(len(mpre) - 2, -1, -1):
        mpre[i] = max(mpre[i], mpre[i + 1])
    """
     This part creates a list of indexes where the recall changes
        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
    """
    i_list = []
    for i in range(1, len(mrec)):
        if mrec[i] != mrec[i - 1]:
            i_list.append(i)  # if it was matlab would be i + 1
    """
     The Average Precision (AP) is the area under the curve
        (numerical integration)
        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
    """
    ap = 0.0
    for i in i_list:
        ap += ((mrec[i] - mrec[i - 1]) * mpre[i])
    return ap, mrec, mpre


"""
 Convert the lines of a file to a list
"""


def file_lines_to_list(path):
    # open txt file lines to a list
    with open(path) as f:
        content = f.readlines()
    # remove whitespace characters like `\n` at the end of each line
    content = [x.strip() for x in content]
    return content


"""
 Draws text in image
"""


def draw_text_in_image(img, text, pos, color, line_width):
    font = cv2.FONT_HERSHEY_PLAIN
    fontScale = 1
    lineType = 1
    bottomLeftCornerOfText = pos
    cv2.putText(img, text,
                bottomLeftCornerOfText,
                font,
                fontScale,
                color,
                lineType)
    text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
    return img, (line_width + text_width)


"""
 Plot - adjust axes
"""


def adjust_axes(r, t, fig, axes):
    # get text width for re-scaling
    bb = t.get_window_extent(renderer=r)
    text_width_inches = bb.width / fig.dpi
    # get axis width in inches
    current_fig_width = fig.get_figwidth()
    new_fig_width = current_fig_width + text_width_inches
    propotion = new_fig_width / current_fig_width
    # get axis limit
    x_lim = axes.get_xlim()
    axes.set_xlim([x_lim[0], x_lim[1] * propotion])


"""
 Draw plot using Matplotlib
"""


def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color,
                   true_p_bar):
    # sort the dictionary by decreasing value, into a list of tuples

    plt.rc('font', family='DejaVu Sans', size=15)
    sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
    # unpacking the list of tuples into two lists
    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
    #
    if true_p_bar != "":
        """
         Special case to draw in:
            - green -> TP: True Positives (object detected and matches ground-truth)
            - red -> FP: False Positives (object detected but does not match ground-truth)
            - orange -> FN: False Negatives (object not detected but present in the ground-truth)
        """
        fp_sorted = []
        tp_sorted = []
        for key in sorted_keys:
            fp_sorted.append(dictionary[key] - true_p_bar[key])
            tp_sorted.append(true_p_bar[key])
        plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
        plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive',
                 left=fp_sorted)
        # add legend
        plt.legend(loc='lower right')
        """
         Write number on side of bar
        """
        fig = plt.gcf()  # gcf - get current figure
        axes = plt.gca()
        r = fig.canvas.get_renderer()
        for i, val in enumerate(sorted_values):
            fp_val = fp_sorted[i]
            tp_val = tp_sorted[i]
            fp_str_val = " " + str(fp_val)
            tp_str_val = fp_str_val + " " + str(tp_val)
            # trick to paint multicolor with offset:
            # first paint everything and then repaint the first number
            t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
            plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
            if i == (len(sorted_values) - 1):  # largest bar
                adjust_axes(r, t, fig, axes)
    else:
        plt.barh(range(n_classes), sorted_values, color=plot_color)
        """
         Write number on side of bar
        """
        fig = plt.gcf()  # gcf - get current figure
        axes = plt.gca()
        axes.set_xlim([0.0, 1.02])
        r = fig.canvas.get_renderer()
        for i, val in enumerate(sorted_values):
            str_val = " " + str(val)  # add a space before
            if val < 1.0:
                str_val = " {0:.3f}".format(val)
            t = plt.text(val, i, str_val, color="black", va='center')
            # re-set axes to show number inside the figure
            if i == (len(sorted_values) - 1):  # largest bar
                adjust_axes(r, t, fig, axes)
    # set window title
    fig.suptitle(window_title)
    # fig.canvas.set_window_title(window_title)
    # write classes in y axis
    tick_font_size = 12
    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
    """
     Re-scale height accordingly
    """
    init_height = fig.get_figheight()
    # comput the matrix height in points and inches
    dpi = fig.dpi
    height_pt = n_classes * (tick_font_size * 1.4)  # 1.4 (some spacing)
    height_in = height_pt / dpi
    # compute the required figure height
    top_margin = 0.15  # in percentage of the figure height
    bottom_margin = 0.05  # in percentage of the figure height
    figure_height = height_in / (1 - top_margin - bottom_margin)
    # set new height
    if figure_height > init_height:
        fig.set_figheight(figure_height)

    # set plot title
    plt.title(plot_title, fontsize=14)
    # set axis titles
    # plt.xlabel('classes')
    plt.xlabel(x_label)
    # adjust size of window
    fig.tight_layout()
    # save the plot
    fig.savefig(output_path)
    # show image
    if to_show:
        plt.show()
    # close the plot
    plt.close()

def get_map(MINOVERLAP, draw_plot, score_threhold=0.5, path='./map_out'):
    GT_PATH = os.path.join(path, 'ground-truth')
    CSV_PATH = os.path.join(path, str(MINOVERLAP))
    DR_PATH = os.path.join(path, 'detection-results')
    IMG_PATH = os.path.join(path, 'images-optional')
    TEMP_FILES_PATH = os.path.join(path, '.temp_files')
    RESULTS_FILES_PATH = os.path.join(path, 'results_%0.1f' % MINOVERLAP)
    df_dic = {}
    show_animation = True
    if os.path.exists(IMG_PATH):
        for dirpath, dirnames, files in os.walk(IMG_PATH):
            if not files:
                show_animation = False
    else:
        show_animation = False

    if not os.path.exists(TEMP_FILES_PATH):
        os.makedirs(TEMP_FILES_PATH)

    if not os.path.exists(CSV_PATH):
        os.makedirs(CSV_PATH)
    if os.path.exists(RESULTS_FILES_PATH):
        shutil.rmtree(RESULTS_FILES_PATH)
    else:
        os.makedirs(RESULTS_FILES_PATH)
    if draw_plot:
        try:
            matplotlib.use('TkAgg')
        except:
            pass
        os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP"))
        os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1"))
        os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall"))
        os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision"))
    if show_animation:
        os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one"))

    ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
    if len(ground_truth_files_list) == 0:
        error("Error: No ground-truth files found!")
    ground_truth_files_list.sort()
    gt_counter_per_class = {}
    counter_images_per_class = {}

    for txt_file in ground_truth_files_list:
        file_id = txt_file.split(".txt", 1)[0]
        file_id = os.path.basename(os.path.normpath(file_id))
        temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
        if not os.path.exists(temp_path):
            error_msg = "Error. File not found: {}\n".format(temp_path)
            error(error_msg)
        lines_list = file_lines_to_list(txt_file)
        bounding_boxes = []
        is_difficult = False
        already_seen_classes = []
        for line in lines_list:
            try:
                if "difficult" in line:
                    class_name, left, top, right, bottom, _difficult = line.split()
                    is_difficult = True
                else:
                    class_name, left, top, right, bottom = line.split()
            except:
                if "difficult" in line:
                    line_split = line.split()
                    _difficult = line_split[-1]
                    bottom = line_split[-2]
                    right = line_split[-3]
                    top = line_split[-4]
                    left = line_split[-5]
                    class_name = ""
                    for name in line_split[:-5]:
                        class_name += name + " "
                    class_name = class_name[:-1]
                    is_difficult = True
                else:
                    line_split = line.split()
                    bottom = line_split[-1]
                    right = line_split[-2]
                    top = line_split[-3]
                    left = line_split[-4]
                    class_name = ""
                    for name in line_split[:-4]:
                        class_name += name + " "
                    class_name = class_name[:-1]

            bbox = left + " " + top + " " + right + " " + bottom
            if is_difficult:
                bounding_boxes.append({"class_name": class_name, "bbox": bbox, "used": False, "difficult": True})
                is_difficult = False
            else:
                bounding_boxes.append({"class_name": class_name, "bbox": bbox, "used": False})
                if class_name in gt_counter_per_class:
                    gt_counter_per_class[class_name] += 1
                else:
                    gt_counter_per_class[class_name] = 1

                if class_name not in already_seen_classes:
                    if class_name in counter_images_per_class:
                        counter_images_per_class[class_name] += 1
                    else:
                        counter_images_per_class[class_name] = 1
                    already_seen_classes.append(class_name)

        with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
            json.dump(bounding_boxes, outfile)

    gt_classes = list(gt_counter_per_class.keys())
    gt_classes = sorted(gt_classes)
    n_classes = len(gt_classes)

    dr_files_list = glob.glob(DR_PATH + '/*.txt')
    dr_files_list.sort()
    for class_index, class_name in enumerate(gt_classes):
        bounding_boxes = []
        for txt_file in dr_files_list:
            file_id = txt_file.split(".txt", 1)[0]
            file_id = os.path.basename(os.path.normpath(file_id))
            temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
            if class_index == 0:
                if not os.path.exists(temp_path):
                    error_msg = "Error. File not found: {}\n".format(temp_path)
                    error(error_msg)
            lines = file_lines_to_list(txt_file)
            for line in lines:
                try:
                    tmp_class_name, confidence, left, top, right, bottom = line.split()
                except:
                    line_split = line.split()
                    bottom = line_split[-1]
                    right = line_split[-2]
                    top = line_split[-3]
                    left = line_split[-4]
                    confidence = line_split[-5]
                    tmp_class_name = ""
                    for name in line_split[:-5]:
                        tmp_class_name += name + " "
                    tmp_class_name = tmp_class_name[:-1]

                if tmp_class_name == class_name:
                    bbox = left + " " + top + " " + right + " " + bottom
                    bounding_boxes.append({"confidence": confidence, "file_id": file_id, "bbox": bbox})

        bounding_boxes.sort(key=lambda x: float(x['confidence']), reverse=True)
        with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
            json.dump(bounding_boxes, outfile)

    sum_AP = 0.0
    ap_dictionary = {}
    lamr_dictionary = {}
    with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
        results_file.write("# AP and precision/recall per class\n")
        count_true_positives = {}

        for class_index, class_name in enumerate(gt_classes):
            count_true_positives[class_name] = 0
            dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
            dr_data = json.load(open(dr_file))

            nd = len(dr_data)
            tp = [0] * nd
            fp = [0] * nd
            score = [0] * nd
            score_threhold_idx = 0
            for idx, detection in enumerate(dr_data):
                file_id = detection["file_id"]
                score[idx] = float(detection["confidence"])
                if score[idx] >= score_threhold:
                    score_threhold_idx = idx

                # Memuat gambar untuk setiap deteksi
                ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
                if len(ground_truth_img) == 0:
                    error("Error. Image not found with id: " + file_id)
                else:
                    img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])

                    # Inisialisasi img_cumulative
                    img_cumulative_path = os.path.join(RESULTS_FILES_PATH, "images", ground_truth_img[0])
                    if os.path.isfile(img_cumulative_path):
                        img_cumulative = cv2.imread(img_cumulative_path)
                    else:
                        img_cumulative = img.copy()

                    # Menambahkan border di bagian bawah gambar
                    bottom_border = 60
                    BLACK = [0, 0, 0]
                    img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)

                    # Gambar kotak pembatas dan teks
                    bb = [float(x) for x in detection["bbox"].split()]
                    bb = [int(i) for i in bb]
                    cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
                    cv2.rectangle(img_cumulative, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)

                    # Menambahkan teks kelas dan confidence score
                    confidence_score = float(detection["confidence"]) * 100
                    text = f"{class_name} {confidence_score:.2f}%"
                    cv2.putText(img, text, (bb[0], bb[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1, cv2.LINE_AA)
                    cv2.putText(img_cumulative, text, (bb[0], bb[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1, cv2.LINE_AA)

                    # Simpan atau tampilkan gambar
                    output_img_path = os.path.join(RESULTS_FILES_PATH, "images", f"{file_id}_detection.jpg")
                    cv2.imwrite(output_img_path, img)
                    cv2_imshow(img)  # Ganti cv2.imshow dengan cv2_imshow

                gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
                ground_truth_data = json.load(open(gt_file))
                ovmax = -1
                gt_match = -1
                bb = [float(x) for x in detection["bbox"].split()]
                for obj in ground_truth_data:
                    if obj["class_name"] == class_name:
                        bbgt = [float(x) for x in obj["bbox"].split()]
                        bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])]
                        iw = bi[2] - bi[0] + 1
                        ih = bi[3] - bi[1] + 1
                        if iw > 0 and ih > 0:
                            ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
                                                                              + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
                            ov = iw * ih / ua
                            if ov > ovmax:
                                ovmax = ov
                                gt_match = obj

                if show_animation:
                    status = "NO MATCH FOUND!"

                min_overlap = MINOVERLAP
                if ovmax >= min_overlap:
                    if "difficult" not in gt_match:
                        if not bool(gt_match["used"]):
                            tp[idx] = 1
                            gt_match["used"] = True
                            count_true_positives[class_name] += 1
                            with open(gt_file, 'w') as f:
                                f.write(json.dumps(ground_truth_data))
                            if show_animation:
                                status = "MATCH!"
                        else:
                            fp[idx] = 1
                            if show_animation:
                                status = "REPEATED MATCH!"
                else:
                    fp[idx] = 1
                    if ovmax > 0:
                        status = "INSUFFICIENT OVERLAP"

                """
                Draw image to show animation
                """
                if show_animation:
                    height, widht = img.shape[:2]
                    white = (255, 255, 255)
                    light_blue = (255, 200, 100)
                    green = (0, 255, 0)
                    light_red = (30, 30, 255)
                    margin = 10
                    # 1nd line
                    v_pos = int(height - margin - (bottom_border / 2.0))
                    text = "Image: " + ground_truth_img[0] + " "
                    img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
                    text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
                    img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue,
                                                         line_width)
                    if ovmax != -1:
                        color = light_red
                        if status == "INSUFFICIENT OVERLAP":
                            text = "IoU: {0:.2f}% ".format(ovmax * 100) + "< {0:.2f}% ".format(min_overlap * 100)
                        else:
                            text = "IoU: {0:.2f}% ".format(ovmax * 100) + ">= {0:.2f}% ".format(min_overlap * 100)
                            color = green
                        img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
                    # 2nd line
                    v_pos += int(bottom_border / 2.0)
                    rank_pos = str(idx + 1)
                    text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(
                        float(detection["confidence"]) * 100)
                    img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
                    color = light_red
                    if status == "MATCH!":
                        color = green
                    text = "Result: " + status + " "
                    img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)

                    font = cv2.FONT_HERSHEY_SIMPLEX
                    if ovmax > 0:
                        bbgt = [int(round(float(x))) for x in gt_match["bbox"].split()]
                        cv2.rectangle(img, (bbgt[0], bbgt[1]), (bbgt[2], bbgt[3]), light_blue, 2)
                        cv2.rectangle(img_cumulative, (bbgt[0], bbgt[1]), (bbgt[2], bbgt[3]), light_blue, 2)
                        cv2.putText(img_cumulative, class_name, (bbgt[0], bbgt[1] - 5), font, 0.6, light_blue, 1,
                                    cv2.LINE_AA)

                    # Menggambar bounding box
                    bb = [int(i) for i in bb]
                    cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), color, 2)
                    cv2.rectangle(img_cumulative, (bb[0], bb[1]), (bb[2], bb[3]), color, 2)

                    # Menambahkan teks kelas dan confidence score
                    confidence_score = float(detection["confidence"]) * 100  # Mengonversi ke persentase
                    text = f"{class_name} {confidence_score:.2f}%"
                    cv2.putText(img_cumulative, text, (bb[0], bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
                    # cv2_imshow(img)
                    # cv2.waitKey(1)
                    output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str(
                        idx) + ".jpg"
                    cv2.imwrite(output_img_path, img)
                    cv2.imwrite(img_cumulative_path, img_cumulative)

            cumsum = 0
            for idx, val in enumerate(fp):
                fp[idx] += cumsum
                cumsum += val

            cumsum = 0
            for idx, val in enumerate(tp):
                tp[idx] += cumsum
                cumsum += val

            rec = tp[:]
            for idx, val in enumerate(tp):
                rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)

            prec = tp[:]
            for idx, val in enumerate(tp):
                prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)

            ap, mrec, mprec = voc_ap(rec[:], prec[:])
            F1 = np.array(rec) * np.array(prec) * 2 / np.where((np.array(prec) + np.array(rec)) == 0, 1,
                                                               (np.array(prec) + np.array(rec)))

            sum_AP += ap
            text = "{0:.2f}%".format(
                ap * 100) + " = " + class_name + " AP "  # class_name + " AP = {0:.2f}%".format(ap*100)

            if len(prec) > 0:
                F1_text = "{0:.2f}".format(F1[score_threhold_idx]) + " = " + class_name + " F1 "
                Recall_text = "{0:.2f}%".format(rec[score_threhold_idx] * 100) + " = " + class_name + " Recall "
                Precision_text = "{0:.2f}%".format(prec[score_threhold_idx] * 100) + " = " + class_name + " Precision "
            else:
                F1_text = "0.00" + " = " + class_name + " F1 "
                Recall_text = "0.00%" + " = " + class_name + " Recall "
                Precision_text = "0.00%" + " = " + class_name + " Precision "

            rounded_prec = ['%.2f' % elem for elem in prec]
            rounded_rec = ['%.2f' % elem for elem in rec]
            results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")

            if len(prec) > 0:
                print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=" + "{0:.2f}".format(
                    F1[score_threhold_idx]) \
                      + " ; Recall=" + "{0:.2f}%".format(
                    rec[score_threhold_idx] * 100) + " ; Precision=" + "{0:.2f}%".format(
                    prec[score_threhold_idx] * 100))
            else:
                print(text + "\t||\tscore_threhold=" + str(
                    score_threhold) + " : " + "F1=0.00% ; Recall=0.00% ; Precision=0.00%")
            ap_dictionary[class_name] = ap

            n_images = counter_images_per_class[class_name]
            lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
            lamr_dictionary[class_name] = lamr

            if draw_plot:
                plt.plot(rec, prec, '-o')
                area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
                area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
                plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')

                fig = plt.gcf()
                plt.suptitle('AP ' + class_name)

                plt.title('class: ' + text)
                plt.xlabel('Recall')
                plt.ylabel('Precision')
                axes = plt.gca()
                axes.set_xlim([0.0, 1.0])
                axes.set_ylim([0.0, 1.05])
                fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png")
                plt.cla()

                plt.plot(score, F1, "-", color='orangered')
                plt.title('class: ' + F1_text + "\nscore_threhold=" + str(score_threhold))
                plt.xlabel('Score_Threhold')
                plt.ylabel('F1')
                axes = plt.gca()
                axes.set_xlim([0.0, 1.0])
                axes.set_ylim([0.0, 1.05])
                fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png")
                plt.cla()

                plt.plot(score, rec, "-H", color='gold')
                plt.title('class: ' + Recall_text + "\nscore_threhold=" + str(score_threhold))
                plt.xlabel('Score_Threhold')
                plt.ylabel('Recall')
                axes = plt.gca()
                axes.set_xlim([0.0, 1.0])
                axes.set_ylim([0.0, 1.05])
                fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png")
                plt.cla()

                plt.plot(score, prec, "-s", color='palevioletred')
                plt.title('class: ' + Precision_text + "\nscore_threhold=" + str(score_threhold))
                plt.xlabel('Score_Threhold')
                plt.ylabel('Precision')
                axes = plt.gca()
                axes.set_xlim([0.0, 1.0])
                axes.set_ylim([0.0, 1.05])
                fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png")
                plt.cla()

                df = pd.DataFrame()
                df["Confidence"] = score
                df["Recall"] = rec
                df["Precision"] = prec
                df["F1"] = F1
                df_dic[class_name] = df
                df.to_csv(os.path.join(CSV_PATH, class_name + ".csv"), encoding="utf-8", index=False,
                          sep=",")

        if show_animation:
            cv2.destroyAllWindows()
        if n_classes == 0:
            print("未检测到任何种类，请检查标签信息与get_map.py中的classes_path是否修改。")
            return 0
        results_file.write("\n# mAP of all classes\n")
        mAP = sum_AP / n_classes
        text = "mAP = {0:.2f}%".format(mAP * 100)
        results_file.write(text + "\n")
        print(text)

    shutil.rmtree(TEMP_FILES_PATH)

    """
    Count total of detection-results
    """
    det_counter_per_class = {}
    for txt_file in dr_files_list:
        lines_list = file_lines_to_list(txt_file)
        for line in lines_list:
            class_name = line.split()[0]
            if class_name in det_counter_per_class:
                det_counter_per_class[class_name] += 1
            else:
                det_counter_per_class[class_name] = 1
    dr_classes = list(det_counter_per_class.keys())

    """
    Write number of ground-truth objects per class to results.txt
    """
    with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
        results_file.write("\n# Number of ground-truth objects per class\n")
        for class_name in sorted(gt_counter_per_class):
            results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")

    """
    Finish counting true positives
    """
    for class_name in dr_classes:
        if class_name not in gt_classes:
            count_true_positives[class_name] = 0

    """
    Write number of detected objects per class to results.txt
    """
    with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
        results_file.write("\n# Number of detected objects per class\n")
        for class_name in sorted(dr_classes):
            n_det = det_counter_per_class[class_name]
            text = class_name + ": " + str(n_det)
            text += " (tp:" + str(count_true_positives[class_name]) + ""
            text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
            results_file.write(text)

    """
    Plot the total number of occurences of each class in the ground-truth
    """
    if draw_plot:
        window_title = "ground-truth-info"
        plot_title = "ground-truth\n"
        plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
        x_label = "Number of objects per class"
        output_path = RESULTS_FILES_PATH + "/ground-truth-info.png"
        to_show = False
        plot_color = 'forestgreen'
        draw_plot_func(
            gt_counter_per_class,
            n_classes,
            window_title,
            plot_title,
            x_label,
            output_path,
            to_show,
            plot_color,
            '',
        )

    # """
    # Plot the total number of occurences of each class in the "detection-results" folder
    # """
    # if draw_plot:
    #     window_title = "detection-results-info"
    #     # Plot title
    #     plot_title = "detection-results\n"
    #     plot_title += "(" + str(len(dr_files_list)) + " files and "
    #     count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
    #     plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
    #     # end Plot title
    #     x_label = "Number of objects per class"
    #     output_path = RESULTS_FILES_PATH + "/detection-results-info.png"
    #     to_show = False
    #     plot_color = 'forestgreen'
    #     true_p_bar = count_true_positives
    #     draw_plot_func(
    #         det_counter_per_class,
    #         len(det_counter_per_class),
    #         window_title,
    #         plot_title,
    #         x_label,
    #         output_path,
    #         to_show,
    #         plot_color,
    #         true_p_bar
    #         )

    """
    Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
    """
    if draw_plot:
        window_title = "lamr"
        plot_title = "log-average miss rate"
        x_label = "log-average miss rate"
        output_path = RESULTS_FILES_PATH + "/lamr.png"
        to_show = False
        plot_color = 'royalblue'
        draw_plot_func(
            lamr_dictionary,
            n_classes,
            window_title,
            plot_title,
            x_label,
            output_path,
            to_show,
            plot_color,
            ""
        )

    """
    Draw mAP plot (Show AP's of all classes in decreasing order)
    """
    if draw_plot:
        window_title = "mAP"
        plot_title = "mAP = {0:.3f}".format(mAP)
        x_label = "AP"
        output_path = RESULTS_FILES_PATH + "/mAP.pdf"
        to_show = True
        plot_color = 'royalblue'
        fig = plt.figure(figsize=(7, 6), dpi=150)
        draw_plot_func(
            ap_dictionary,
            n_classes,
            window_title,
            plot_title,
            x_label,
            output_path,
            to_show,
            plot_color,
            ""
        )
    return mAP, df_dic


################################ PREPROCESSING GT #############################
def preprocess_gt(gt_path, class_names):
    image_ids = os.listdir(gt_path)
    results = {}

    images = []
    bboxes = []
    for i, image_id in enumerate(image_ids):
        lines_list = file_lines_to_list(os.path.join(gt_path, image_id))
        boxes_per_image = []
        image = {}
        image_id = os.path.splitext(image_id)[0]
        image['file_name'] = image_id + '.jpg'
        image['width'] = 1
        image['height'] = 1

        image['id'] = str(image_id)

        for line in lines_list:
            difficult = 0
            if "difficult" in line:
                line_split = line.split()
                left, top, right, bottom, _difficult = line_split[-5:]
                class_name = ""
                for name in line_split[:-5]:
                    class_name += name + " "
                class_name = class_name[:-1]
                difficult = 1
            else:
                line_split = line.split()
                left, top, right, bottom = line_split[-4:]
                class_name = ""
                for name in line_split[:-4]:
                    class_name += name + " "
                class_name = class_name[:-1]

            left, top, right, bottom = float(left), float(top), float(right), float(bottom)
            if class_name not in class_names:
                continue
            cls_id = class_names.index(class_name) + 1
            bbox = [left, top, right - left, bottom - top, difficult, str(image_id), cls_id,
                    (right - left) * (bottom - top) - 10.0]
            boxes_per_image.append(bbox)
        images.append(image)
        bboxes.extend(boxes_per_image)
    results['images'] = images

    categories = []
    for i, cls in enumerate(class_names):
        category = {}
        category['supercategory'] = cls
        category['name'] = cls
        category['id'] = i + 1
        categories.append(category)
    results['categories'] = categories

    annotations = []
    for i, box in enumerate(bboxes):
        annotation = {}
        annotation['area'] = box[-1]
        annotation['category_id'] = box[-2]
        annotation['image_id'] = box[-3]
        annotation['iscrowd'] = box[-4]
        annotation['bbox'] = box[:4]
        annotation['id'] = i
        annotations.append(annotation)
    results['annotations'] = annotations
    return results

################################ PREPROCESSING DR #############################
def preprocess_dr(dr_path, class_names):
    image_ids = os.listdir(dr_path)
    results = []
    for image_id in image_ids:
        lines_list = file_lines_to_list(os.path.join(dr_path, image_id))
        image_id = os.path.splitext(image_id)[0]
        for line in lines_list:
            line_split = line.split()
            confidence, left, top, right, bottom = line_split[-5:]
            class_name = ""
            for name in line_split[:-5]:
                class_name += name + " "
            class_name = class_name[:-1]
            # left, top, right, bottom = float(left), float(top), float(right), float(bottom)
            left, top, right, bottom = float(left), float(top), float(right), float(bottom)
            left = max(0, left)
            top = max(0, top)
            right = max(0, right)
            bottom = max(0, bottom)
            result = {}
            result["image_id"] = str(image_id)
            if class_name not in class_names:
                continue
            result["category_id"] = class_names.index(class_name) + 1
            result["bbox"] = [left, top, right - left, bottom - top]
            result["score"] = float(confidence)
            results.append(result)
    return results


# def get_coco_map(class_names, path):
#     GT_PATH = os.path.join(path, 'ground-truth')
#     DR_PATH = os.path.join(path, 'detection-results')
#     COCO_PATH = os.path.join(path, 'coco_eval')

#     if not os.path.exists(COCO_PATH):
#         os.makedirs(COCO_PATH)

#     GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json')
#     DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json')

#     with open(GT_JSON_PATH, "w") as f:
#         results_gt = preprocess_gt(GT_PATH, class_names)
#         json.dump(results_gt, f, indent=4)

#     with open(DR_JSON_PATH, "w") as f:
#         results_dr = preprocess_dr(DR_PATH, class_names)
#         json.dump(results_dr, f, indent=4)
#         if len(results_dr) == 0:
#             return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

#     cocoGt = COCO(GT_JSON_PATH)
#     cocoDt = cocoGt.loadRes(DR_JSON_PATH)
#     cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
#     cocoEval.evaluate()
#     cocoEval.accumulate()
#     cocoEval.summarize()

#     # Menampilkan AP untuk setiap kelas dalam bentuk persen
#     print("\nAverage Precision (AP) for each class (in %):")
#     for i, class_name in enumerate(class_names):
#         ap = cocoEval.stats[1 + i] * 100  # AP untuk IoU=0.50 dalam persen
#         print(f"AP for {class_name}: {ap:.2f}%")

#     # Menampilkan mAP dalam bentuk persen
#     mAP = cocoEval.stats[0] * 100  # mAP untuk IoU=0.50:0.95 dalam persen
#     print(f"\nMean Average Precision (mAP): {mAP:.2f}%")

#     return cocoEval.stats

def get_coco_map(class_names, path):
    GT_PATH = os.path.join(path, 'ground-truth')
    DR_PATH = os.path.join(path, 'detection-results')
    COCO_PATH = os.path.join(path, 'coco_eval')

    if not os.path.exists(COCO_PATH):
        os.makedirs(COCO_PATH)

    GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json')
    DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json')

    with open(GT_JSON_PATH, "w") as f:
        results_gt = preprocess_gt(GT_PATH, class_names)
        json.dump(results_gt, f, indent=4)

    with open(DR_JSON_PATH, "w") as f:
        results_dr = preprocess_dr(DR_PATH, class_names)
        json.dump(results_dr, f, indent=4)
        if len(results_dr) == 0:
            return {"AP50": 0, "mAP": 0}

    cocoGt = COCO(GT_JSON_PATH)
    cocoDt = cocoGt.loadRes(DR_JSON_PATH)
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    # Ambil hasil mAP untuk IoU=0.50 dan IoU=0.50:0.95
    AP50 = cocoEval.stats[1] * 100  # mAP untuk IoU=0.50 dalam persen
    mAP = cocoEval.stats[0] * 100  # mAP untuk IoU=0.50:0.95 dalam persen

    print(f"\nmAP untuk IoU=0.50: {AP50:.2f}%")
    print(f"mAP untuk IoU=0.50:0.95: {mAP:.2f}%")

    return {"AP50": AP50, "mAP": mAP}


###**UTILS_FIT**

In [None]:
import os

import torch
from tqdm import tqdm

# from utils.utils import get_lr


def fit_one_epoch(model_train, model, ema, yolo_loss, loss_history, eval_callback, optimizer, epoch, epoch_step,
                  epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank=0):
    loss = 0
    val_loss = 0

    if local_rank == 0:
        print('Start Train')
        pbar = tqdm(total=epoch_step, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3)
    model_train.train()
    for iteration, batch in enumerate(gen):
        if iteration >= epoch_step:
            break

        images, targets = batch[0], batch[1]
        with torch.no_grad():
            if cuda:
                images = images.cuda(local_rank)
                targets = [ann.cuda(local_rank) for ann in targets]

        optimizer.zero_grad()
        if not fp16:

            outputs = model_train(images)

            loss_value = yolo_loss(outputs, targets)

            loss_value.backward()
            optimizer.step()
        else:
            from torch.cuda.amp import autocast
            with autocast():
                outputs = model_train(images)

                loss_value = yolo_loss(outputs, targets)

            scaler.scale(loss_value).backward()
            scaler.step(optimizer)
            scaler.update()
        if ema:
            ema.update(model_train)

        loss += loss_value.item()

        if local_rank == 0:
            pbar.set_postfix(**{'loss': loss / (iteration + 1),
                                'lr': get_lr(optimizer)})
            pbar.update(1)

    if local_rank == 0:
        pbar.close()
        print('Finish Train')
        print('Start Validation')
        pbar = tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3)

    if ema:
        model_train_eval = ema.ema
    else:
        model_train_eval = model_train.eval()

    for iteration, batch in enumerate(gen_val):
        if iteration >= epoch_step_val:
            break
        images, targets = batch[0], batch[1]
        with torch.no_grad():
            if cuda:
                images = images.cuda(local_rank)
                targets = [ann.cuda(local_rank) for ann in targets]

            optimizer.zero_grad()

            outputs = model_train_eval(images)

            loss_value = yolo_loss(outputs, targets)

        val_loss += loss_value.item()
        if local_rank == 0:
            pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)})
            pbar.update(1)

    if local_rank == 0:
        pbar.close()
        print('Finish Validation')
        loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val)
        eval_callback.on_epoch_end(epoch + 1, model_train_eval)
        print('Epoch:' + str(epoch + 1) + '/' + str(Epoch))
        print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val))

        if ema:
            save_state_dict = ema.ema.state_dict()
        else:
            save_state_dict = model.state_dict()

        if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
            torch.save(save_state_dict, os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % (
            epoch + 1, loss / epoch_step, val_loss / epoch_step_val)))

        if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss):
            print('Save best model to best_epoch_weights.pth')
            torch.save(save_state_dict, os.path.join(save_dir, "best_epoch_weights.pth"))

        torch.save(save_state_dict, os.path.join(save_dir, "last_epoch_weights.pth"))




###**CALLBACK**

In [None]:
import os

import torch
import matplotlib
matplotlib.use('Agg')
import scipy.signal
from matplotlib import pyplot as plt
from torch.utils.tensorboard import SummaryWriter

import shutil
import numpy as np

from PIL import Image
from tqdm import tqdm
# from .utils import cvtColor, preprocess_input, resize_image
# from .utils_bbox import decode_outputs, non_max_suppression
# from .utils_map import get_coco_map, get_map


class LossHistory():
    def __init__(self, log_dir, model, input_shape):
        self.log_dir    = log_dir
        self.losses     = []
        self.val_loss   = []

        os.makedirs(self.log_dir)
        self.writer     = SummaryWriter(self.log_dir)
        try:
            dummy_input     = torch.randn(2, 3, input_shape[0], input_shape[1])
            self.writer.add_graph(model, dummy_input)
        except:
            pass

    def append_loss(self, epoch, loss, val_loss):
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)

        self.losses.append(loss)
        self.val_loss.append(val_loss)

        with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
            f.write(str(loss))
            f.write("\n")
        with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
            f.write(str(val_loss))
            f.write("\n")

        self.writer.add_scalar('loss', loss, epoch)
        self.writer.add_scalar('val_loss', val_loss, epoch)
        self.loss_plot()

    def loss_plot(self):
        iters = range(len(self.losses))

        plt.figure()
        plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
        plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
        try:
            if len(self.losses) < 25:
                num = 5
            else:
                num = 15

            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
        except:
            pass

        plt.grid(True)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend(loc="upper right")

        plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))

        plt.cla()
        plt.close("all")

class EvalCallback():
    def __init__(self, net, input_shape, class_names, num_classes, val_lines, log_dir, cuda, \
            map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1):
        super(EvalCallback, self).__init__()

        self.net                = net
        self.input_shape        = input_shape
        self.class_names        = class_names
        self.num_classes        = num_classes
        self.val_lines          = val_lines
        self.log_dir            = log_dir
        self.cuda               = cuda
        self.map_out_path       = map_out_path
        self.max_boxes          = max_boxes
        self.confidence         = confidence
        self.nms_iou            = nms_iou
        self.letterbox_image    = letterbox_image
        self.MINOVERLAP         = MINOVERLAP
        self.eval_flag          = eval_flag
        self.period             = period

        self.maps       = [0]
        self.epoches    = [0]
        if self.eval_flag:
            with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
                f.write(str(0))
                f.write("\n")

    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
        image_shape = np.array(np.shape(image)[0:2])

        image       = cvtColor(image)

        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)

        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            outputs = self.net(images)
            outputs = decode_outputs(outputs, self.input_shape)

            results = non_max_suppression(outputs, self.num_classes, self.input_shape,
                        image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)

            if results[0] is None:
                return

            top_label   = np.array(results[0][:, 6], dtype = 'int32')
            top_conf    = results[0][:, 4] * results[0][:, 5]
            top_boxes   = results[0][:, :4]

        top_100     = np.argsort(top_conf)[::-1][:self.max_boxes]
        top_boxes   = top_boxes[top_100]
        top_conf    = top_conf[top_100]
        top_label   = top_label[top_100]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box             = top_boxes[i]
            score           = str(top_conf[i])

            top, left, bottom, right = box
            if predicted_class not in class_names:
                continue

            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

        f.close()
        return

    def on_epoch_end(self, epoch, model_eval):
        if epoch % self.period == 0 and self.eval_flag:
            self.net = model_eval
            if not os.path.exists(self.map_out_path):
                os.makedirs(self.map_out_path)
            if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")):
                os.makedirs(os.path.join(self.map_out_path, "ground-truth"))
            if not os.path.exists(os.path.join(self.map_out_path, "detection-results")):
                os.makedirs(os.path.join(self.map_out_path, "detection-results"))
            print("Get map.")
            for annotation_line in tqdm(self.val_lines):
                line        = annotation_line.split()
                image_id    = os.path.basename(line[0]).split('.')[0]

                image       = Image.open(line[0])

                gt_boxes    = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

                self.get_map_txt(image_id, image, self.class_names, self.map_out_path)

                with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
                    for box in gt_boxes:
                        left, top, right, bottom, obj = box
                        obj_name = self.class_names[obj]
                        new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))

            print("Calculate Map.")
            try:
                temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1]
            except:
                temp_map = get_map(self.MINOVERLAP, True, path = self.map_out_path)
            self.maps.append(temp_map)
            self.epoches.append(epoch)

            with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
                f.write(str(temp_map))
                f.write("\n")

            plt.figure()
            plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map')

            plt.grid(True)
            plt.xlabel('Epoch')
            plt.ylabel('Map %s'%str(self.MINOVERLAP))
            plt.title('A Map Curve')
            plt.legend(loc="upper right")

            plt.savefig(os.path.join(self.log_dir, "epoch_map.png"))
            plt.cla()
            plt.close("all")

            print("Get map done.")
            shutil.rmtree(self.map_out_path)

###**DATALOADER**

In [None]:
from random import sample, shuffle

import cv2
import numpy as np
import torch
from PIL import Image
from torch.utils.data.dataset import Dataset

# from utils.utils import cvtColor, preprocess_input


class YoloDataset(Dataset):
    def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, \
                        mosaic, mixup, mosaic_prob, mixup_prob, train, special_aug_ratio = 0.7):
        super(YoloDataset, self).__init__()
        self.annotation_lines   = annotation_lines
        self.input_shape        = input_shape
        self.num_classes        = num_classes
        self.epoch_length       = epoch_length
        self.mosaic             = mosaic
        self.mosaic_prob        = mosaic_prob
        self.mixup              = mixup
        self.mixup_prob         = mixup_prob
        self.train              = train
        self.special_aug_ratio  = special_aug_ratio

        self.epoch_now          = -1
        self.length             = len(self.annotation_lines)

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        index = index % self.length

        if self.mosaic and self.rand() < self.mosaic_prob and self.epoch_now < self.epoch_length * self.special_aug_ratio:
            lines = sample(self.annotation_lines, 3)
            lines.append(self.annotation_lines[index])
            # shuffle_data = shuffle(lines)
            # shuffle_flag = shuffle(lines)
            shuffle(lines)
            image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)

            if self.mixup and self.rand() < self.mixup_prob:
                lines           = sample(self.annotation_lines, 1)
                image_2, box_2  = self.get_random_data(lines[0], self.input_shape, random = self.train)
                image, box      = self.get_random_data_with_MixUp(image, box, image_2, box_2)
        else:
            image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)

        image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
        box         = np.array(box, dtype=np.float32)
        if len(box) != 0:
            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
        return image, box

    def rand(self, a=0, b=1):
        return np.random.rand()*(b-a) + a

    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
        line    = annotation_line.split()

        image   = Image.open(line[0])
        image   = cvtColor(image)

        iw, ih  = image.size
        h, w    = input_shape

        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

        if not random:
            scale = min(w/iw, h/ih)
            nw = int(iw*scale)
            nh = int(ih*scale)
            dx = (w-nw)//2
            dy = (h-nh)//2

            image       = image.resize((nw,nh), Image.BICUBIC)
            new_image   = Image.new('RGB', (w,h), (128,128,128))
            new_image.paste(image, (dx, dy))
            image_data  = np.array(new_image, np.float32)
#
            if len(box)>0:
                np.random.shuffle(box)
                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
                box[:, 0:2][box[:, 0:2]<0] = 0
                box[:, 2][box[:, 2]>w] = w
                box[:, 3][box[:, 3]>h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box

            return image_data, box

        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
        scale = self.rand(.25, 2)
        if new_ar < 1:
            nh = int(scale*h)
            nw = int(nh*new_ar)
        else:
            nw = int(scale*w)
            nh = int(nw/new_ar)
        image = image.resize((nw,nh), Image.BICUBIC)


        dx = int(self.rand(0, w-nw))
        dy = int(self.rand(0, h-nh))
        new_image = Image.new('RGB', (w,h), (128,128,128))
        new_image.paste(image, (dx, dy))
        image = new_image


        flip = self.rand()<.5
        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

        image_data      = np.array(image, np.uint8)

        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1

        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
        dtype           = image_data.dtype

        x       = np.arange(0, 256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)


        if len(box)>0:
            np.random.shuffle(box)
            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
            if flip: box[:, [0,2]] = w - box[:, [2,0]]
            box[:, 0:2][box[:, 0:2]<0] = 0
            box[:, 2][box[:, 2]>w] = w
            box[:, 3][box[:, 3]>h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w>1, box_h>1)]

        return image_data, box

    def merge_bboxes(self, bboxes, cutx, cuty):
        merge_bbox = []
        for i in range(len(bboxes)):
            for box in bboxes[i]:
                tmp_box = []
                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]

                if i == 0:
                    if y1 > cuty or x1 > cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y2 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x2 = cutx

                if i == 1:
                    if y2 < cuty or x1 > cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y1 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x2 = cutx

                if i == 2:
                    if y2 < cuty or x2 < cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y1 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x1 = cutx

                if i == 3:
                    if y1 > cuty or x2 < cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y2 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x1 = cutx
                tmp_box.append(x1)
                tmp_box.append(y1)
                tmp_box.append(x2)
                tmp_box.append(y2)
                tmp_box.append(box[-1])
                merge_bbox.append(tmp_box)
        return merge_bbox

    def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
        h, w = input_shape
        min_offset_x = self.rand(0.3, 0.7)
        min_offset_y = self.rand(0.3, 0.7)

        image_datas = []
        box_datas   = []
        index       = 0
        for line in annotation_line:

            line_content = line.split()

            image = Image.open(line_content[0])
            image = cvtColor(image)


            iw, ih = image.size

            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])

            flip = self.rand()<.5
            if flip and len(box)>0:
                image = image.transpose(Image.FLIP_LEFT_RIGHT)
                box[:, [0,2]] = iw - box[:, [2,0]]


            new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
            scale = self.rand(.4, 1)
            if new_ar < 1:
                nh = int(scale*h)
                nw = int(nh*new_ar)
            else:
                nw = int(scale*w)
                nh = int(nw/new_ar)
            image = image.resize((nw, nh), Image.BICUBIC)


            if index == 0:
                dx = int(w*min_offset_x) - nw
                dy = int(h*min_offset_y) - nh
            elif index == 1:
                dx = int(w*min_offset_x) - nw
                dy = int(h*min_offset_y)
            elif index == 2:
                dx = int(w*min_offset_x)
                dy = int(h*min_offset_y)
            elif index == 3:
                dx = int(w*min_offset_x)
                dy = int(h*min_offset_y) - nh

            new_image = Image.new('RGB', (w,h), (128,128,128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image)

            index = index + 1
            box_data = []

            if len(box)>0:
                np.random.shuffle(box)
                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
                box[:, 0:2][box[:, 0:2]<0] = 0
                box[:, 2][box[:, 2]>w] = w
                box[:, 3][box[:, 3]>h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w>1, box_h>1)]
                box_data = np.zeros((len(box),5))
                box_data[:len(box)] = box

            image_datas.append(image_data)
            box_datas.append(box_data)


        cutx = int(w * min_offset_x)
        cuty = int(h * min_offset_y)

        new_image = np.zeros([h, w, 3])
        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

        new_image       = np.array(new_image, np.uint8)

        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1

        hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
        dtype           = new_image.dtype

        x       = np.arange(0, 256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

        new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
        new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)

        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)

        return new_image, new_boxes

    def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2):
        new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
        if len(box_1) == 0:
            new_boxes = box_2
        elif len(box_2) == 0:
            new_boxes = box_1
        else:
            new_boxes = np.concatenate([box_1, box_2], axis=0)
        return new_image, new_boxes

# DataLoader中collate_fn使用
def yolo_dataset_collate(batch):
    images = []
    bboxes = []
    for img, box in batch:
        images.append(img)
        bboxes.append(box)
    images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
    bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
    return images, bboxes


## **NETS**

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/skripsiAini/')

### **darknet**

In [None]:
import torch
from torch import nn

# Definisi fungsi aktivasi SiLU (Sigmoid Linear Unit)
class SiLU(nn.Module):
    @staticmethod
    def forward(x):
        return x * torch.sigmoid(x)

# Fungsi untuk memilih jenis aktivasi
def get_activation(name="silu", inplace=True):
    if name == "silu":
        module = SiLU() # Menggunakan SiLU sebagai aktivasi
    elif name == "relu":
        module = nn.ReLU(inplace=inplace) # Menggunakan ReLU sebagai aktivasi
    elif name == "lrelu":
        module = nn.LeakyReLU(0.1, inplace=inplace) # Menggunakan LeakyReLU sebagai aktivasi
    else:
        raise AttributeError("Unsupported act type: {}".format(name))
    return module

# Fokus Layer, digunakan untuk mengambil fitur patch dari citra input
class Focus(nn.Module):
    def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"):
        super().__init__()
        self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)

    # Membagi citra menjadi empat bagian kecil (patch)
    def forward(self, x):
        patch_top_left  = x[...,  ::2,  ::2]
        patch_bot_left  = x[..., 1::2,  ::2]
        patch_top_right = x[...,  ::2, 1::2]
        patch_bot_right = x[..., 1::2, 1::2]
        x = torch.cat((patch_top_left, patch_bot_left, patch_top_right, patch_bot_right,), dim=1,)
        return self.conv(x)

# Convolution dasar dengan batch normalization dan aktivasi
class BaseConv(nn.Module):
    def __init__(self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu"):
        super().__init__()
        pad         = (ksize - 1) // 2 # Padding untuk menjaga dimensi output
        self.conv   = nn.Conv2d(in_channels, out_channels, kernel_size=ksize, stride=stride, padding=pad, groups=groups, bias=bias)
        self.bn     = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.03) # Batch Normalization
        self.act    = get_activation(act, inplace=True) # Memilih aktivasi sesuai parameter

    def forward(self, x):
        return self.act(self.bn(self.conv(x))) # Forward pass dengan konvolusi, batchnorm, dan aktivasi

    def fuseforward(self, x):
        return self.act(self.conv(x)) # Forward pass tanpa batchnorm

# Depthwise Convolution, digunakan untuk optimasi parameter
class DWConv(nn.Module):
    def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"):
        super().__init__()
        # Depthwise Convolution
        self.dconv = BaseConv(in_channels, in_channels, ksize=ksize, stride=stride, groups=in_channels, act=act,)
        # Pointwise Convolution
        self.pconv = BaseConv(in_channels, out_channels, ksize=1, stride=1, groups=1, act=act)

    def forward(self, x):
        x = self.dconv(x) # Depthwise convolution
        return self.pconv(x) # Pointwise convolution

# Spatial Pyramid Pooling (SPP) untuk mengambil fitur multi-scale
class SPPBottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu"):
        super().__init__()
        hidden_channels = in_channels // 2
        self.conv1      = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation)
        # MaxPool dengan berbagai ukuran kernel untuk ekstraksi fitur multi-skala
        self.m          = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes])
        conv2_channels  = hidden_channels * (len(kernel_sizes) + 1)
        self.conv2      = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation)

    def forward(self, x):
        x = self.conv1(x) # Proses konvolusi pertama
        x = torch.cat([x] + [m(x) for m in self.m], dim=1) # Gabungkan hasil dari berbagai skala
        x = self.conv2(x) # Proses konvolusi kedua
        return x

# Bottleneck biasa, digunakan untuk menyusun blok dalam model
class Bottleneck(nn.Module):
    # Standard bottleneck
    def __init__(self, in_channels, out_channels, shortcut=True, expansion=0.5, depthwise=False, act="silu",):
        super().__init__()
        hidden_channels = int(out_channels * expansion)
        Conv = DWConv if depthwise else BaseConv # Pilih antara depthwise atau konvolusi biasa

        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) # Konvolusi pertama

        self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act) # Konvolusi kedua
        self.use_add = shortcut and in_channels == out_channels # Shortcut jika ukuran input dan output sama

    def forward(self, x):
        y = self.conv2(self.conv1(x)) # Forward pass melalui kedua konvolusi
        if self.use_add:
            y = y + x # Menambahkan residual connection jika diperlukan
        return y

# CSPLayer (Cross-Stage Partial Layer) untuk menggabungkan fitur dari beberapa tahap
class CSPLayer(nn.Module):
    def __init__(self, in_channels, out_channels, n=1, shortcut=True, expansion=0.5, depthwise=False, act="silu",):
        # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        hidden_channels = int(out_channels * expansion)

        self.conv1  = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)

        self.conv2  = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)

        self.conv3  = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)

        module_list = [Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act) for _ in range(n)]
        self.m      = nn.Sequential(*module_list)

    def forward(self, x):

        x_1 = self.conv1(x) # Hasil konvolusi pertama

        x_2 = self.conv2(x) # Hasil konvolusi kedua


        x_1 = self.m(x_1) # Proses melalui blok bottleneck berturut-turut

        x = torch.cat((x_1, x_2), dim=1) # Gabungkan hasil dari kedua bagian

        return self.conv3(x) # Output akhir setelah konvolusi ketiga

# Definisi arsitektur CSPDarknet
class CSPDarknet(nn.Module):
    def __init__(self, dep_mul, wid_mul, out_features=("dark3", "dark4", "dark5"), depthwise=False, act="silu",):
        super().__init__()
        assert out_features, "please provide output features of Darknet"
        self.out_features = out_features
        Conv = DWConv if depthwise else BaseConv # Pilih jenis konvolusi

        base_channels   = int(wid_mul * 64)  # 64 # Jumlah saluran dasar
        base_depth      = max(round(dep_mul * 3), 1)  # 3 # Kedalaman jaringan dasar

        # Fokus pada input untuk mengekstrak fitur
        self.stem = Focus(3, base_channels, ksize=3, act=act)

        # Lapisan dark2
        self.dark2 = nn.Sequential(
            Conv(base_channels, base_channels * 2, 3, 2, act=act), # Konvolusi 3x3 dengan stride 2
            CSPLayer(base_channels * 2, base_channels * 2, n=base_depth, depthwise=depthwise, act=act),
        )

        # Lapisan dark3
        self.dark3 = nn.Sequential(
            Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
            CSPLayer(base_channels * 4, base_channels * 4, n=base_depth * 3, depthwise=depthwise, act=act),
        )

        # Lapisan dark4
        self.dark4 = nn.Sequential(
            Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
            CSPLayer(base_channels * 8, base_channels * 8, n=base_depth * 3, depthwise=depthwise, act=act),
        )

        # Lapisan dark5
        self.dark5 = nn.Sequential(
            Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
            SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
            CSPLayer(base_channels * 16, base_channels * 16, n=base_depth, shortcut=False, depthwise=depthwise, act=act),
        )

    def forward(self, x):
        outputs = {}
        x = self.stem(x) # Proses awal dengan Focus layer
        outputs["stem"] = x

        x = self.dark2(x) # Proses melalui lapisan dark2
        outputs["dark2"] = x

        x = self.dark3(x) # Proses melalui lapisan dark3
        outputs["dark3"] = x

        x = self.dark4(x) # Proses melalui lapisan dark4
        outputs["dark4"] = x

        x = self.dark5(x) # Proses melalui lapisan dark5
        outputs["dark5"] = x
        return {k: v for k, v in outputs.items() if k in self.out_features}

# Jika dijalankan sebagai skrip utama, akan mencetak arsitektur CSPDarknet
if __name__ == '__main__':
    print(CSPDarknet(1, 1))

CSPDarknet(
  (stem): Focus(
    (conv): BaseConv(
      (conv): Conv2d(12, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
  )
  (dark2): Sequential(
    (0): BaseConv(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (1): CSPLayer(
      (conv1): BaseConv(
        (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU()
      )
      (conv2): BaseConv(
        (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU()
      )
  

### **yolo**

In [None]:
import torch
import torch.nn as nn

# from darknet import BaseConv, CSPDarknet, CSPLayer, DWConv

# Definisi YOLOXHead: kelas ini berfungsi untuk mendefinisikan kepala (head) dari model YOLOX.
class YOLOXHead(nn.Module):
    def __init__(self, num_classes, width = 1.0, in_channels = [256, 512, 1024], act = "silu", depthwise = False,):
        super().__init__()
        # Memilih jenis convolution, apakah depthwise atau biasa
        Conv            = DWConv if depthwise else BaseConv

        # Menyiapkan layer untuk klasifikasi, prediksi koordinat (bounding box), dan prediksi objek
        self.cls_convs  = nn.ModuleList()
        self.reg_convs  = nn.ModuleList()
        self.cls_preds  = nn.ModuleList()
        self.reg_preds  = nn.ModuleList()
        self.obj_preds  = nn.ModuleList()
        self.stems      = nn.ModuleList()

        # Iterasi untuk setiap channel input dan membangun jaringan
        for i in range(len(in_channels)):
            self.stems.append(BaseConv(in_channels = int(in_channels[i] * width), out_channels = int(256 * width), ksize = 1, stride = 1, act = act))
             # Layer konvolusi untuk klasifikasi objek (cls)
            self.cls_convs.append(nn.Sequential(*[
                Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act),
                Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act),
            ]))

            # Prediksi kelas (jumlah kelas objek)
            self.cls_preds.append(
                nn.Conv2d(in_channels = int(256 * width), out_channels = num_classes, kernel_size = 1, stride = 1, padding = 0)
            )

            # Layer konvolusi untuk regresi (koordinat bounding box)
            self.reg_convs.append(nn.Sequential(*[
                Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act),
                Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act)
            ]))

            # Prediksi koordinat (bounding box)
            self.reg_preds.append(
                nn.Conv2d(in_channels = int(256 * width), out_channels = 4, kernel_size = 1, stride = 1, padding = 0)
            )

            # Prediksi keberadaan objek
            self.obj_preds.append(
                nn.Conv2d(in_channels = int(256 * width), out_channels = 1, kernel_size = 1, stride = 1, padding = 0)
            )

    def forward(self, inputs):
        # Output berupa hasil dari setiap fitur input
        outputs = []
        for k, x in enumerate(inputs):
            # Melalui stem untuk mendapatkan representasi awal
            x       = self.stems[k](x)

            # Mendapatkan fitur klasifikasi
            cls_feat    = self.cls_convs[k](x)
            cls_output  = self.cls_preds[k](cls_feat)

            # Mendapatkan fitur regresi
            reg_feat    = self.reg_convs[k](x)
            reg_output  = self.reg_preds[k](reg_feat)

            # Prediksi objek (apakah objek ada atau tidak)
            obj_output  = self.obj_preds[k](reg_feat)

            # Menggabungkan hasil regresi, objek, dan klasifikasi
            output      = torch.cat([reg_output, obj_output, cls_output], 1)
            outputs.append(output)
        return outputs

# Definisi YOLOPAFPN: kelas ini adalah backbone dari YOLOX menggunakan arsitektur PAFPN (Path Aggregation Network)
class YOLOPAFPN(nn.Module):
    def __init__(self, depth = 1.0, width = 1.0, in_features = ("dark3", "dark4", "dark5"), in_channels = [256, 512, 1024], depthwise = False, act = "silu"):
        super().__init__()

        # Memilih jenis convolution: depthwise atau biasa
        Conv                = DWConv if depthwise else BaseConv

        # Backbone menggunakan arsitektur CSPDarknet
        self.backbone       = CSPDarknet(depth, width, depthwise = depthwise, act = act)
        self.in_features    = in_features

        # Upsample untuk meningkatkan resolusi fitur
        self.upsample       = nn.Upsample(scale_factor=2, mode="nearest")

        #-------------------------------------------#
        #   20, 20, 1024 -> 20, 20, 512
        #-------------------------------------------#
        # Layer lateral untuk mengubah ukuran channel dan menghubungkan fitur dari berbagai level
        self.lateral_conv0  = BaseConv(int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act)

        # CSPLayer untuk agregasi fitur pada berbagai resolusi
        #-------------------------------------------#
        #   40, 40, 1024 -> 40, 40, 512
        #-------------------------------------------#
        self.C3_p4 = CSPLayer(
            int(2 * in_channels[1] * width),
            int(in_channels[1] * width),
            round(3 * depth),
            False,
            depthwise = depthwise,
            act = act,
        )

        # Mengurangi channel untuk level lebih rendah
        #-------------------------------------------#
        #   40, 40, 512 -> 40, 40, 256
        #-------------------------------------------#
        self.reduce_conv1   = BaseConv(int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act)
        #-------------------------------------------#
        #   80, 80, 512 -> 80, 80, 256
        #-------------------------------------------#
        # CSPLayer untuk agregasi pada level yang lebih rendah
        self.C3_p3 = CSPLayer(
            int(2 * in_channels[0] * width),
            int(in_channels[0] * width),
            round(3 * depth),
            False,
            depthwise = depthwise,
            act = act,
        )

        #-------------------------------------------#
        #   80, 80, 256 -> 40, 40, 256
        #-------------------------------------------#
        self.bu_conv2       = Conv(int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act)
        #-------------------------------------------#
        #   40, 40, 256 -> 40, 40, 512
        #-------------------------------------------#
        # CSPLayer pada level yang lebih tinggi
        self.C3_n3 = CSPLayer(
            int(2 * in_channels[0] * width),
            int(in_channels[1] * width),
            round(3 * depth),
            False,
            depthwise = depthwise,
            act = act,
        )

        #-------------------------------------------#
        #   40, 40, 512 -> 20, 20, 512
        #-------------------------------------------#
        self.bu_conv1       = Conv(int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act)
        #-------------------------------------------#
        #   20, 20, 1024 -> 20, 20, 1024
        #-------------------------------------------#
        # CSPLayer untuk level tertinggi
        self.C3_n4 = CSPLayer(
            int(2 * in_channels[1] * width),
            int(in_channels[2] * width),
            round(3 * depth),
            False,
            depthwise = depthwise,
            act = act,
        )

    def forward(self, input):
        # Mengambil fitur dari backbone
        out_features            = self.backbone.forward(input)
        [feat1, feat2, feat3]   = [out_features[f] for f in self.in_features]

        # Proses pengolahan fitur dari berbagai resolusi dan level

        #-------------------------------------------#
        #   20, 20, 1024 -> 20, 20, 512
        #-------------------------------------------#
        P5          = self.lateral_conv0(feat3)
        #-------------------------------------------#
        #  20, 20, 512 -> 40, 40, 512
        #-------------------------------------------#
        P5_upsample = self.upsample(P5)
        #-------------------------------------------#
        #  40, 40, 512 + 40, 40, 512 -> 40, 40, 1024
        #-------------------------------------------#
        P5_upsample = torch.cat([P5_upsample, feat2], 1)
        #-------------------------------------------#
        #   40, 40, 1024 -> 40, 40, 512
        #-------------------------------------------#
        P5_upsample = self.C3_p4(P5_upsample)

        #-------------------------------------------#
        #   40, 40, 512 -> 40, 40, 256
        #-------------------------------------------#
        P4          = self.reduce_conv1(P5_upsample)
        #-------------------------------------------#
        #   40, 40, 256 -> 80, 80, 256
        #-------------------------------------------#
        P4_upsample = self.upsample(P4)
        #-------------------------------------------#
        #   80, 80, 256 + 80, 80, 256 -> 80, 80, 512
        #-------------------------------------------#
        P4_upsample = torch.cat([P4_upsample, feat1], 1)
        #-------------------------------------------#
        #   80, 80, 512 -> 80, 80, 256
        #-------------------------------------------#
        P3_out      = self.C3_p3(P4_upsample)

        #-------------------------------------------#
        #   80, 80, 256 -> 40, 40, 256
        #-------------------------------------------#
        P3_downsample   = self.bu_conv2(P3_out)
        #-------------------------------------------#
        #   40, 40, 256 + 40, 40, 256 -> 40, 40, 512
        #-------------------------------------------#
        P3_downsample   = torch.cat([P3_downsample, P4], 1)
        #-------------------------------------------#
        #   40, 40, 256 -> 40, 40, 512
        #-------------------------------------------#
        P4_out          = self.C3_n3(P3_downsample)

        #-------------------------------------------#
        #   40, 40, 512 -> 20, 20, 512
        #-------------------------------------------#
        P4_downsample   = self.bu_conv1(P4_out)
        #-------------------------------------------#
        #   20, 20, 512 + 20, 20, 512 -> 20, 20, 1024
        #-------------------------------------------#
        P4_downsample   = torch.cat([P4_downsample, P5], 1)
        #-------------------------------------------#
        #   20, 20, 1024 -> 20, 20, 1024
        #-------------------------------------------#
        P5_out          = self.C3_n4(P4_downsample)

        # Mengembalikan hasil fitur dari 3 level
        return (P3_out, P4_out, P5_out)

# Definisi YoloBody: kelas utama yang menyatukan backbone dan kepala model
class YoloBody(nn.Module):
    def __init__(self, num_classes, phi):
        super().__init__()

        # Parameter phi untuk menentukan ukuran model (nano, tiny, s, m, l, x)
        depth_dict = {'nano': 0.33, 'tiny': 0.33, 's' : 0.33, 'm' : 0.67, 'l' : 1.00, 'x' : 1.33,}
        width_dict = {'nano': 0.25, 'tiny': 0.375, 's' : 0.50, 'm' : 0.75, 'l' : 1.00, 'x' : 1.25,}

        # Mengatur kedalaman dan lebar berdasarkan phi
        depth, width    = depth_dict[phi], width_dict[phi]
        depthwise       = True if phi == 'nano' else False

        # Membuat backbone dan head model YOLOX
        self.backbone   = YOLOPAFPN(depth, width, depthwise=depthwise)
        self.head       = YOLOXHead(num_classes, width, depthwise=depthwise)

    def forward(self, x):
        # Mendapatkan fitur dari backbone dan output dari kepala (head)
        fpn_outs    = self.backbone.forward(x)
        outputs     = self.head.forward(fpn_outs)
        return outputs

### **yolo_training**

In [None]:
import math
from copy import deepcopy
from functools import partial

import torch
import torch.nn as nn
import torch.nn.functional as F

# Kelas untuk menghitung IOU (Intersection over Union) loss
class IOUloss(nn.Module):
    def __init__(self, reduction="none", loss_type="iou"):
        super(IOUloss, self).__init__()
        self.reduction = reduction # Metode reduksi loss: "mean", "sum", atau "none"
        self.loss_type = loss_type # Jenis IOU loss: "iou" atau "giou"

    def forward(self, pred, target):
        # Prediksi dan target berbentuk tensor [N, 4] (N = jumlah bounding box)
        assert pred.shape[0] == target.shape[0]

        # Mengubah bounding box menjadi format [x_center, y_center, width, height]
        pred = pred.view(-1, 4)
        target = target.view(-1, 4)
        # Menghitung sudut atas kiri dan sudut bawah kanan dari IOU
        tl = torch.max(
            (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
        )
        br = torch.min(
            (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
        )

        # Luas dari prediksi dan target
        area_p = torch.prod(pred[:, 2:], 1)
        area_g = torch.prod(target[:, 2:], 1)

        # Luas intersection dan union
        en = (tl < br).type(tl.type()).prod(dim=1) # Memastikan bahwa kotak overlap
        area_i = torch.prod(br - tl, 1) * en
        area_u = area_p + area_g - area_i
        iou = (area_i) / (area_u + 1e-16) # IOU = intersection / union

        # IOU atau GIOU loss
        if self.loss_type == "iou":
            loss = 1 - iou ** 2 # Menggunakan IOU kuadrat
        elif self.loss_type == "giou":
            # Menghitung GIOU loss
            c_tl = torch.min((pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2))
            c_br = torch.max((pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2))
            area_c = torch.prod(c_br - c_tl, 1) # Luas dari kotak terluar
            giou = iou - (area_c - area_u) / area_c.clamp(1e-16)
            loss = 1 - giou.clamp(min=-1.0, max=1.0)
        # Reduksi hasil loss
        if self.reduction == "mean":
            loss = loss.mean()
        elif self.reduction == "sum":
            loss = loss.sum()

        return loss # Mengembalikan nilai loss

################################# YOLO LOSS ################################

# Kelas utama YOLO untuk perhitungan loss dalam object detection
class YOLOLoss(nn.Module):
    def __init__(self, num_classes, fp16, strides=[8, 16, 32]):
        super().__init__()
        self.num_classes        = num_classes # Jumlah kelas yang akan dideteksi
        self.strides            = strides # Ukuran stride untuk setiap FPN level

        # Loss BCE untuk klasifikasi dan IOU untuk regression
        self.bcewithlog_loss    = nn.BCEWithLogitsLoss(reduction="none")
        self.iou_loss           = IOUloss(reduction="none")
        self.grids              = [torch.zeros(1)] * len(strides) # Placeholder grid untuk deteksi
        self.fp16               = fp16 # Indikasi apakah model menggunakan float16

    # Fungsi utama untuk menghitung loss
    def forward(self, inputs, labels=None):
        outputs             = [] # Menyimpan output dari setiap level FPN
        x_shifts            = [] # Offset grid sumbu X
        y_shifts            = [] # Offset grid sumbu Y
        expanded_strides    = [] # Stride yang diperbesar

        #-----------------------------------------------#
        # inputs    [[batch_size, num_classes + 5, 20, 20]
        #            [batch_size, num_classes + 5, 40, 40]
        #            [batch_size, num_classes + 5, 80, 80]]
        # outputs   [[batch_size, 400, num_classes + 5]
        #            [batch_size, 1600, num_classes + 5]
        #            [batch_size, 6400, num_classes + 5]]
        # x_shifts  [[batch_size, 400]
        #            [batch_size, 1600]
        #            [batch_size, 6400]]
        #-----------------------------------------------#

        # Mengiterasi setiap level FPN (inputs berupa list tensor dari berbagai level)
        for k, (stride, output) in enumerate(zip(self.strides, inputs)):
            output, grid = self.get_output_and_grid(output, k, stride)
            x_shifts.append(grid[:, :, 0])
            y_shifts.append(grid[:, :, 1])
            expanded_strides.append(torch.ones_like(grid[:, :, 0]) * stride)
            outputs.append(output)
        # Menggabungkan hasil dari semua level FPN untuk dihitung loss-nya
        return self.get_losses(x_shifts, y_shifts, expanded_strides, labels, torch.cat(outputs, 1))

    def get_output_and_grid(self, output, k, stride):
        # Grid dari anchor pada FPN level k
        grid            = self.grids[k]
        hsize, wsize    = output.shape[-2:] # Dimensi grid (height, width)

        # Jika ukuran grid belum sesuai dengan ukuran output
        if grid.shape[2:4] != output.shape[2:4]:
            # Membuat meshgrid (koordinat anchor) dengan dimensi height x width
            yv, xv          = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)])
            grid            = torch.stack((xv, yv), 2).view(1, hsize, wsize, 2).type(output.type())
            self.grids[k]   = grid # Menyimpan grid untuk penggunaan berikutnya
        # Mengubah grid ke format tensor 1D untuk setiap anchor
        grid                = grid.view(1, -1, 2)

        # Memproses output untuk setiap anchor
        output              = output.flatten(start_dim=2).permute(0, 2, 1) # [batch, anchors, channels]
        output[..., :2]     = (output[..., :2] + grid.type_as(output)) * stride # Koordinat x, y dalam skala asli
        output[..., 2:4]    = torch.exp(output[..., 2:4]) * stride # Skala width dan height
        return output, grid # Mengembalikan output yang diproses dan grid anchor

    def get_losses(self, x_shifts, y_shifts, expanded_strides, labels, outputs):
        # Membagi outputs menjadi prediksi bounding box, objectness, dan kelas
        #-----------------------------------------------#
        #   [batch, n_anchors_all, 4] -> bounding box prediksi
        #-----------------------------------------------#
        bbox_preds  = outputs[:, :, :4]
        #-----------------------------------------------#
        #   [batch, n_anchors_all, 1] -> confidence score
        #-----------------------------------------------#
        obj_preds   = outputs[:, :, 4:5]
        #-----------------------------------------------#
        #   [batch, n_anchors_all, n_cls] -> prediksi kelas
        #-----------------------------------------------#
        cls_preds   = outputs[:, :, 5:]
        # Total anchor di semua level FPN
        total_num_anchors   = outputs.shape[1]
        #-----------------------------------------------#
        #   x_shifts            [1, n_anchors_all]
        #   y_shifts            [1, n_anchors_all]
        #   expanded_strides    [1, n_anchors_all]
        #-----------------------------------------------#

        # Menggabungkan informasi grid dari semua FPN level
        x_shifts            = torch.cat(x_shifts, 1).type_as(outputs)
        y_shifts            = torch.cat(y_shifts, 1).type_as(outputs)
        expanded_strides    = torch.cat(expanded_strides, 1).type_as(outputs)

        # Inisialisasi variabel target dan mask
        cls_targets = []
        reg_targets = []
        obj_targets = []
        fg_masks    = []

        num_fg  = 0.0 # Counter foreground anchor

        # Mengiterasi untuk setiap batch
        for batch_idx in range(outputs.shape[0]):
            num_gt          = len(labels[batch_idx]) # Jumlah ground truth untuk batch ini

            # Jika tidak ada ground truth, buat tensor kosong
            if num_gt == 0:
                cls_target  = outputs.new_zeros((0, self.num_classes))
                reg_target  = outputs.new_zeros((0, 4))
                obj_target  = outputs.new_zeros((total_num_anchors, 1))
                fg_mask     = outputs.new_zeros(total_num_anchors).bool()
            else:
            # Memisahkan ground truth untuk bounding box dan kelas
                #-----------------------------------------------#
                #   gt_bboxes_per_image     [num_gt, num_classes]
                #   gt_classes              [num_gt]
                #   bboxes_preds_per_image  [n_anchors_all, 4]
                #   cls_preds_per_image     [n_anchors_all, num_classes]
                #   obj_preds_per_image     [n_anchors_all, 1]
                #-----------------------------------------------#
                gt_bboxes_per_image     = labels[batch_idx][..., :4].type_as(outputs) # [num_gt, 4]
                gt_classes              = labels[batch_idx][..., 4].type_as(outputs) # [num_gt]
                bboxes_preds_per_image  = bbox_preds[batch_idx] # Prediksi bbox untuk batch ini
                cls_preds_per_image     = cls_preds[batch_idx] # Prediksi kelas
                obj_preds_per_image     = obj_preds[batch_idx] # Prediksi objectness

                # Proses assignment untuk mencocokkan anchor dan ground truth
                gt_matched_classes, fg_mask, pred_ious_this_matching, matched_gt_inds, num_fg_img = self.get_assignments(
                    num_gt, total_num_anchors, gt_bboxes_per_image, gt_classes, bboxes_preds_per_image, cls_preds_per_image, obj_preds_per_image,
                    expanded_strides, x_shifts, y_shifts,
                )
                torch.cuda.empty_cache() # Membersihkan cache GPU

                # Menambahkan jumlah foreground anchor
                num_fg      += num_fg_img
                # Membuat target untuk kelas, objectness, dan bounding box
                cls_target  = F.one_hot(gt_matched_classes.to(torch.int64), self.num_classes).float() * pred_ious_this_matching.unsqueeze(-1)
                obj_target  = fg_mask.unsqueeze(-1) # Objectness target
                reg_target  = gt_bboxes_per_image[matched_gt_inds] # Target bounding box

            # Menambahkan target ke list untuk batch ini
            cls_targets.append(cls_target)
            reg_targets.append(reg_target)
            obj_targets.append(obj_target.type(cls_target.type()))
            fg_masks.append(fg_mask)

        # Menggabungkan semua target dari seluruh batch
        cls_targets = torch.cat(cls_targets, 0)
        reg_targets = torch.cat(reg_targets, 0)
        obj_targets = torch.cat(obj_targets, 0)
        fg_masks    = torch.cat(fg_masks, 0)

        # Jika tidak ada foreground anchor, set num_fg ke 1 untuk menghindari pembagian nol
        num_fg      = max(num_fg, 1)

         # Menghitung komponen loss
        loss_iou    = (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets)).sum()
        loss_obj    = (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)).sum()
        loss_cls    = (self.bcewithlog_loss(cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets)).sum()

        reg_weight  = 5.0 # Bobot loss regresi
        loss = reg_weight * loss_iou + loss_obj + loss_cls

        return loss / num_fg # Normalisasi dengan jumlah foreground anchor

    @torch.no_grad()

    def get_assignments(self, num_gt, total_num_anchors, gt_bboxes_per_image, gt_classes, bboxes_preds_per_image, cls_preds_per_image, obj_preds_per_image, expanded_strides, x_shifts, y_shifts):
        #-------------------------------------------------------#
        #   fg_mask                 [n_anchors_all]
        #   is_in_boxes_and_center  [num_gt, len(fg_mask)]
        #-------------------------------------------------------#
        fg_mask, is_in_boxes_and_center = self.get_in_boxes_info(gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts, total_num_anchors, num_gt)

        #-------------------------------------------------------#
        #   Memfilter prediksi berdasarkan fg_mask.
        #   fg_mask                 [n_anchors_all]
        #   bboxes_preds_per_image  [fg_mask, 4] - bboxes_preds_per_image: bounding box prediksi.
        #   cls_preds_              [fg_mask, num_classes] - cls_preds_: prediksi kelas.
        #   obj_preds_              [fg_mask, 1] - obj_preds_: prediksi confidence objectness.
        #-------------------------------------------------------#
        bboxes_preds_per_image  = bboxes_preds_per_image[fg_mask]
        cls_preds_              = cls_preds_per_image[fg_mask]
        obj_preds_              = obj_preds_per_image[fg_mask]
        num_in_boxes_anchor     = bboxes_preds_per_image.shape[0]

        #-------------------------------------------------------#
        #   Menghitung IoU (Intersection over Union) antara
        #   bounding box ground truth dan prediksi.
        #   pair_wise_ious      [num_gt, fg_mask]
        #-------------------------------------------------------#
        pair_wise_ious      = self.bboxes_iou(gt_bboxes_per_image, bboxes_preds_per_image, False)
        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)

        #-------------------------------------------------------#
        #   cls_preds_          [num_gt, fg_mask, num_classes]
        #   gt_cls_per_image    [num_gt, fg_mask, num_classes]
        #   Menghitung pair-wise class loss.
        #   - cls_preds_: prediksi kelas setelah sigmoid.
        #   - gt_cls_per_image: representasi ground truth kelas
        #     dalam format one-hot.
        #-------------------------------------------------------#
        if self.fp16:
            with torch.cuda.amp.autocast(enabled=False):
                cls_preds_          = cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() * obj_preds_.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
                gt_cls_per_image    = F.one_hot(gt_classes.to(torch.int64), self.num_classes).float().unsqueeze(1).repeat(1, num_in_boxes_anchor, 1)
                pair_wise_cls_loss  = F.binary_cross_entropy(cls_preds_.sqrt_(), gt_cls_per_image, reduction="none").sum(-1)
        else:
            cls_preds_          = cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() * obj_preds_.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
            gt_cls_per_image    = F.one_hot(gt_classes.to(torch.int64), self.num_classes).float().unsqueeze(1).repeat(1, num_in_boxes_anchor, 1)
            pair_wise_cls_loss  = F.binary_cross_entropy(cls_preds_.sqrt_(), gt_cls_per_image, reduction="none").sum(-1)
            del cls_preds_

        #-------------------------------------------------------#
        #   Menghitung total biaya (cost) untuk proses matching.
        #   Biaya dihitung dari kombinasi:
        #   - Loss prediksi kelas.
        #   - Loss IoU.
        #   - Anchor yang tidak berada di dalam area dipenalti.
        #-------------------------------------------------------#
        cost = pair_wise_cls_loss + 3.0 * pair_wise_ious_loss + 100000.0 * (~is_in_boxes_and_center).float()

        #-------------------------------------------------------#
        #   Melakukan dynamic k-matching untuk menentukan
        #   assignment antara ground truth dan prediksi.
        #-------------------------------------------------------#
        num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask)
        del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss
        return gt_matched_classes, fg_mask, pred_ious_this_matching, matched_gt_inds, num_fg

    def bboxes_iou(self, bboxes_a, bboxes_b, xyxy=True):
        # Menghitung IoU antara dua set bounding box.
        if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
            raise IndexError

        if xyxy:
             # Koordinat bounding box dalam format (x1, y1, x2, y2).
            tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) # Titik kiri atas
            br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) # Titik kanan bawah
            area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
            area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
        else:
            # Koordinat bounding box dalam format (cx, cy, w, h).
            tl = torch.max(
                (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
                (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
            )
            br = torch.min(
                (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
                (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
            )

            area_a = torch.prod(bboxes_a[:, 2:], 1)
            area_b = torch.prod(bboxes_b[:, 2:], 1)
        en = (tl < br).type(tl.type()).prod(dim=2) # Validitas overlap
        area_i = torch.prod(br - tl, 2) * en # Area overlap
        return area_i / (area_a[:, None] + area_b - area_i) # Rasio IoU

    def get_in_boxes_info(self, gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts, total_num_anchors, num_gt, center_radius = 2.5):
        #-------------------------------------------------------#
        #   Menghitung pusat anchor berdasarkan stride.
        #   expanded_strides_per_image  [n_anchors_all]
        #   x_centers_per_image         [num_gt, n_anchors_all]
        #   x_centers_per_image         [num_gt, n_anchors_all]
        #-------------------------------------------------------#
        expanded_strides_per_image  = expanded_strides[0]
        x_centers_per_image         = ((x_shifts[0] + 0.5) * expanded_strides_per_image).unsqueeze(0).repeat(num_gt, 1)
        y_centers_per_image         = ((y_shifts[0] + 0.5) * expanded_strides_per_image).unsqueeze(0).repeat(num_gt, 1)

        #-------------------------------------------------------#
        #   Mendefinisikan bounding box ground truth.
        #   gt_bboxes_per_image_x       [num_gt, n_anchors_all]
        #-------------------------------------------------------#
        gt_bboxes_per_image_l = (gt_bboxes_per_image[:, 0] - 0.5 * gt_bboxes_per_image[:, 2]).unsqueeze(1).repeat(1, total_num_anchors)
        gt_bboxes_per_image_r = (gt_bboxes_per_image[:, 0] + 0.5 * gt_bboxes_per_image[:, 2]).unsqueeze(1).repeat(1, total_num_anchors)
        gt_bboxes_per_image_t = (gt_bboxes_per_image[:, 1] - 0.5 * gt_bboxes_per_image[:, 3]).unsqueeze(1).repeat(1, total_num_anchors)
        gt_bboxes_per_image_b = (gt_bboxes_per_image[:, 1] + 0.5 * gt_bboxes_per_image[:, 3]).unsqueeze(1).repeat(1, total_num_anchors)

        #-------------------------------------------------------#
        #   Menghitung jarak anchor ke bounding box.
        #   bbox_deltas     [num_gt, n_anchors_all, 4]
        #-------------------------------------------------------#
        b_l = x_centers_per_image - gt_bboxes_per_image_l
        b_r = gt_bboxes_per_image_r - x_centers_per_image
        b_t = y_centers_per_image - gt_bboxes_per_image_t
        b_b = gt_bboxes_per_image_b - y_centers_per_image
        bbox_deltas = torch.stack([b_l, b_t, b_r, b_b], 2)

        #-------------------------------------------------------#
        #   Memeriksa apakah anchor berada di dalam bounding box.
        #   is_in_boxes     [num_gt, n_anchors_all]
        #   is_in_boxes_all [n_anchors_all]
        #-------------------------------------------------------#
        is_in_boxes     = bbox_deltas.min(dim=-1).values > 0.0
        is_in_boxes_all = is_in_boxes.sum(dim=0) > 0

        gt_bboxes_per_image_l = (gt_bboxes_per_image[:, 0]).unsqueeze(1).repeat(1, total_num_anchors) - center_radius * expanded_strides_per_image.unsqueeze(0)
        gt_bboxes_per_image_r = (gt_bboxes_per_image[:, 0]).unsqueeze(1).repeat(1, total_num_anchors) + center_radius * expanded_strides_per_image.unsqueeze(0)
        gt_bboxes_per_image_t = (gt_bboxes_per_image[:, 1]).unsqueeze(1).repeat(1, total_num_anchors) - center_radius * expanded_strides_per_image.unsqueeze(0)
        gt_bboxes_per_image_b = (gt_bboxes_per_image[:, 1]).unsqueeze(1).repeat(1, total_num_anchors) + center_radius * expanded_strides_per_image.unsqueeze(0)

        #-------------------------------------------------------#
        #   Mendefinisikan area pusat bounding box.
        #   center_deltas   [num_gt, n_anchors_all, 4]
        #-------------------------------------------------------#
        c_l = x_centers_per_image - gt_bboxes_per_image_l
        c_r = gt_bboxes_per_image_r - x_centers_per_image
        c_t = y_centers_per_image - gt_bboxes_per_image_t
        c_b = gt_bboxes_per_image_b - y_centers_per_image
        center_deltas       = torch.stack([c_l, c_t, c_r, c_b], 2)

        #-------------------------------------------------------#
        #   Memeriksa apakah anchor berada di pusat bounding box.
        #   is_in_centers       [num_gt, n_anchors_all]
        #   is_in_centers_all   [n_anchors_all]
        #-------------------------------------------------------#
        is_in_centers       = center_deltas.min(dim=-1).values > 0.0
        is_in_centers_all   = is_in_centers.sum(dim=0) > 0

        #-------------------------------------------------------#
        #   Menggabungkan informasi apakah anchor berada di dalam bounding box atau area pusatnya.
        #   is_in_boxes_anchor      [n_anchors_all]
        #   is_in_boxes_and_center  [num_gt, is_in_boxes_anchor]
        #-------------------------------------------------------#
        is_in_boxes_anchor      = is_in_boxes_all | is_in_centers_all
        is_in_boxes_and_center  = is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
        return is_in_boxes_anchor, is_in_boxes_and_center

    def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
        #-------------------------------------------------------#
        #   cost                : [num_gt, fg_mask] - Biaya antara GT dan anchor
        #   pair_wise_ious      : [num_gt, fg_mask] - IoU antara GT dan anchor
        #   gt_classes          : [num_gt]          - Kelas ground truth
        #   fg_mask             : [n_anchors_all]   - Mask untuk anchor foreground
        #   matching_matrix     : [num_gt, fg_mask] - Matriks pencocokan
        #-------------------------------------------------------#
        matching_matrix         = torch.zeros_like(cost)

        #-------------------------------------------------------#
        #   Menghitung jumlah kandidat anchor berdasarkan IoU.
        #   Top-k IoU dihitung untuk setiap ground truth.
        #-------------------------------------------------------#
        n_candidate_k           = min(10, pair_wise_ious.size(1))
        topk_ious, _            = torch.topk(pair_wise_ious, n_candidate_k, dim=1)
        dynamic_ks              = torch.clamp(topk_ious.sum(1).int(), min=1)

        #-------------------------------------------------------#
        #   Pilih anchor terbaik untuk setiap ground truth
        #   berdasarkan nilai cost yang terendah.
        #-------------------------------------------------------#
        for gt_idx in range(num_gt):
            _, pos_idx = torch.topk(cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False)
            matching_matrix[gt_idx][pos_idx] = 1.0
        del topk_ious, dynamic_ks, pos_idx

        #-------------------------------------------------------#
        #   Jika ada anchor yang cocok dengan lebih dari satu GT,
        #   pilih GT dengan biaya terendah untuk anchor tersebut.
        #-------------------------------------------------------#
        anchor_matching_gt = matching_matrix.sum(0)
        if (anchor_matching_gt > 1).sum() > 0: # Kasus banyak GT untuk satu anchor

            _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)
            matching_matrix[:, anchor_matching_gt > 1] *= 0.0 # Hapus pencocokan lama
            matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0

        #-------------------------------------------------------#
        #   Hitung anchor yang benar-benar digunakan sebagai
        #   foreground (fg_mask_inboxes).
        #-------------------------------------------------------#
        fg_mask_inboxes = matching_matrix.sum(0) > 0.0
        num_fg          = fg_mask_inboxes.sum().item()

        # Update fg_mask dengan fg_mask_inboxes
        fg_mask[fg_mask.clone()] = fg_mask_inboxes

        #-------------------------------------------------------#
        #   Mendapatkan indeks GT yang cocok dengan setiap anchor,
        #   kelas GT, dan IoU untuk anchor yang terpilih.
        #-------------------------------------------------------#
        matched_gt_inds     = matching_matrix[:, fg_mask_inboxes].argmax(0)
        gt_matched_classes  = gt_classes[matched_gt_inds]

        pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[fg_mask_inboxes]
        return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds

def is_parallel(model):
    # Mengembalikan True jika model adalah tipe DataParallel atau DistributedDataParallel (berjalan di beberapa GPU).
    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)

def de_parallel(model):
    #-------------------------------------------------------#
    #   Mengembalikan model tanpa parallelism:
    #   - Jika model tipe DP atau DDP, ambil atribut 'module'.
    #   - Jika bukan tipe DP atau DDP, kembalikan model apa adanya.
    #-------------------------------------------------------#
    return model.module if is_parallel(model) else model

def copy_attr(a, b, include=(), exclude=()):
    #-------------------------------------------------------#
    #   Menyalin atribut dari objek `b` ke `a`.
    #   - `include`: Hanya atribut tertentu yang disalin.
    #   - `exclude`: Daftar atribut yang tidak disalin.
    #   - Atribut yang diawali '_' juga tidak disalin.
    #-------------------------------------------------------#
    for k, v in b.__dict__.items():
        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
            continue
        else:
            setattr(a, k, v)


########################################## CLASS EMA ###################################################

class ModelEMA:
    """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
    Keeps a moving average of everything in the model state_dict (parameters and buffers)
    For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
    """

    def __init__(self, model, decay=0.9999, tau=2000, updates=0):
        # Create EMA
        self.ema = deepcopy(de_parallel(model)).eval()  # FP32 EMA
        # if next(model.parameters()).device.type != 'cpu':
        #     self.ema.half()  # FP16 EMA
        self.updates = updates  # number of EMA updates
        self.decay = lambda x: decay * (1 - math.exp(-x / tau))  # decay exponential ramp (to help early epochs)
        for p in self.ema.parameters():
            p.requires_grad_(False)

    def update(self, model):
        # Update EMA parameters
        with torch.no_grad():
            self.updates += 1
            d = self.decay(self.updates)

            msd = de_parallel(model).state_dict()  # model state_dict
            for k, v in self.ema.state_dict().items():
                if v.dtype.is_floating_point:
                    v *= d
                    v += (1 - d) * msd[k].detach()

    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
        # Update EMA attributes
        copy_attr(self.ema, model, include, exclude)

def weights_init(net, init_type='normal', init_gain = 0.02):
    # Fungsi inisialisasi bobot jaringan
    def init_func(m):
        classname = m.__class__.__name__
        # Inisialisasi hanya untuk layer dengan atribut 'weight' (misal: Convolutional Layer)
        if hasattr(m, 'weight') and classname.find('Conv') != -1:
            if init_type == 'normal':
                torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
            elif init_type == 'xavier':
                torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
            elif init_type == 'kaiming':
                torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
            elif init_type == 'orthogonal':
                torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
            else:
                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
        # Inisialisasi layer BatchNorm2d
        elif classname.find('BatchNorm2d') != -1:
            torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
            torch.nn.init.constant_(m.bias.data, 0.0)
    print('initialize network with %s type' % init_type)
    net.apply(init_func)

# Scheduler learning rate dengan dua metode: Cosine Decay dan Step Decay
def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
    """
    Fungsi untuk mendapatkan fungsi scheduler learning rate (LR).

    Parameter:
    - lr_decay_type: Tipe decay LR (cosine decay atau step decay).
    - lr: Learning rate awal.
    - min_lr: Learning rate minimum.
    - total_iters: Total jumlah iterasi.
    - warmup_iters_ratio: Rasio warmup terhadap total iterasi.
    - warmup_lr_ratio: Rasio LR selama warmup terhadap LR awal.
    - no_aug_iter_ratio: Rasio iterasi tanpa augmentasi terhadap total iterasi.
    - step_num: Jumlah langkah penurunan untuk step decay.

    Returns:
    - func: Fungsi scheduler LR.
    """

    # Scheduler dengan Warmup dan Cosine Decay
    def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
        # Jika iterasi dalam fase warmup
        if iters <= warmup_total_iters:
            # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
            # LR naik dari warmup_lr_start ke lr menggunakan kuadrat
            lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
        # Jika iterasi dalam fase no augmentation
        elif iters >= total_iters - no_aug_iter:
            # Tetapkan LR ke nilai minimum
            lr = min_lr
        else:
            # LR mengikuti kurva cosine decay
            lr = min_lr + 0.5 * (lr - min_lr) * (
                1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter))
            )
        return lr

    # Scheduler dengan Step Decay
    def step_lr(lr, decay_rate, step_size, iters):
        # Pastikan ukuran langkah valid
        if step_size < 1:
            raise ValueError("step_size must above 1.")
        # Hitung jumlah langkah yang telah dilalui
        n       = iters // step_size
        # LR turun dengan rate decay_rate setiap langkah
        out_lr  = lr * decay_rate ** n
        return out_lr

    if lr_decay_type == "cos": # Jika memilih cosine decay
        # Hitung jumlah iterasi warmup
        warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
         # Hitung nilai LR awal saat warmup
        warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
        # Hitung iterasi tanpa augmentasi
        no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
        # Scheduler berbasis cosine decay
        func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
    else: # Jika memilih step decay
        # Hitung decay rate untuk step decay
        decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
        # Hitung ukuran langkah
        step_size   = total_iters / step_num
        # Scheduler berbasis step decay
        func = partial(step_lr, lr, decay_rate, step_size)

    return func

# Fungsi untuk mengatur LR optimizer pada setiap epoch
def set_optimizer_lr(optimizer, lr_scheduler_func, epoch):
    """
    Parameter:
    - optimizer: Optimizer PyTorch (misalnya SGD atau Adam).
    - lr_scheduler_func: Fungsi scheduler untuk menghitung LR.
    - epoch: Epoch saat ini.
    """
    # Hitung LR untuk epoch saat ini
    lr = lr_scheduler_func(epoch)
    # Perbarui LR untuk setiap kelompok parameter di optimizer
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# **TRAINING**

In [None]:
import os
os.chdir('/content/drive/MyDrive/210411100054-SitiNurAini')

In [None]:
!ls

 COBA.ipynb		    dataset		       DatasetProcessing.ipynb
'conf Training_8:2.ipynb'   DATASET-CXR-AL3	       _logs_9:1
'conf Training_9:1.ipynb'   DatasetPreparation.ipynb   model_data


In [None]:
pip install pycocotools



In [None]:
import datetime
import os

from random import shuffle
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

if __name__ == "__main__":

    Cuda = True

    distributed = False

    sync_bn = False

    fp16 = True

    classes_path = '/content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt'  # Path ke file kelas (cari daftar kelas)

    model_path      = ''

    input_shape = [512, 512]

    phi = 's'
    mosaic = True
    mosaic_prob = 0.5
    mixup = True
    mixup_prob = 0.5
    special_aug_ratio = 0.7
    Init_Epoch = 0
    Freeze_Epoch = False
    Freeze_batch_size = False
    UnFreeze_Epoch = 50
    Unfreeze_batch_size = 16

    Freeze_Train = False

    Init_lr = 1e-3
    Min_lr = Init_lr

    optimizer_type = "sgd"
    momentum = 0.937
    weight_decay = 5e-4

    lr_decay_type = "cos"

    save_period = 1

    save_dir = 'logs_9:1'

    eval_flag = True
    eval_period = 5

    num_workers = 8


    train_annotation_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3/ImageSets/Main/2024_train_9:1.txt'  # Path ke anotasi data latih
    val_annotation_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3/ImageSets/Main/2024_val_9:1.txt'  # Path ke anotasi data validasi


    ngpus_per_node = torch.cuda.device_count()
    if distributed:
        dist.init_process_group(backend="nccl")
        local_rank = int(os.environ["LOCAL_RANK"])
        rank = int(os.environ["RANK"])
        device = torch.device("cuda", local_rank)
        if local_rank == 0:
            print(f"[{os.getpid()}] (rank = {rank}, local_rank = {local_rank}) training...")
            print("Gpu Device Count : ", ngpus_per_node)
    else:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        local_rank = 0
        rank = 0


    class_names, num_classes = get_classes(classes_path)

    model = YoloBody(num_classes, phi)
    weights_init(model)
    if model_path != '':

        if local_rank == 0:
            print('Load weights {}.'.format(model_path))

        model_dict = model.state_dict()
        pretrained_dict = torch.load(model_path, map_location=device)
        load_key, no_load_key, temp_dict = [], [], {}
        for k, v in pretrained_dict.items():
            if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
                temp_dict[k] = v
                load_key.append(k)
            else:
                no_load_key.append(k)
        model_dict.update(temp_dict)
        model.load_state_dict(model_dict)

        if local_rank == 0:
            pass


    yolo_loss = YOLOLoss(num_classes, fp16)

    if local_rank == 0:
        time_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S')
        log_dir = os.path.join(save_dir, "loss_" + str(time_str))
        loss_history = LossHistory(log_dir, model, input_shape=input_shape)
    else:
        loss_history = None

    if fp16:
        from torch.cuda.amp import GradScaler as GradScaler

        scaler = GradScaler()
    else:
        scaler = None

    model_train = model.train()

    if sync_bn and ngpus_per_node > 1 and distributed:
        model_train = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model_train)
    elif sync_bn:
        print("Sync_bn is not support in one gpu or not distributed.")

    if Cuda:
        if distributed:

            model_train = model_train.cuda(local_rank)
            model_train = torch.nn.parallel.DistributedDataParallel(model_train, device_ids=[local_rank],
                                                                    find_unused_parameters=True)
        else:
            model_train = torch.nn.DataParallel(model)
            cudnn.benchmark = True
            model_train = model_train.cuda()


    ema = ModelEMA(model_train)

    with open(train_annotation_path, encoding='utf-8') as f:
        train_lines = f.readlines()
    with open(val_annotation_path, encoding='utf-8') as f:
        val_lines = f.readlines()
    num_train = len(train_lines)
    num_val = len(val_lines)

    if local_rank == 0:
        show_config(
            classes_path=classes_path, model_path=model_path, input_shape=input_shape, \
            Init_Epoch=Init_Epoch, Freeze_Epoch=Freeze_Epoch, UnFreeze_Epoch=UnFreeze_Epoch,
            Freeze_batch_size=Freeze_batch_size, Unfreeze_batch_size=Unfreeze_batch_size, Freeze_Train=Freeze_Train, \
            Init_lr=Init_lr, Min_lr=Min_lr, optimizer_type=optimizer_type, momentum=momentum,
            lr_decay_type=lr_decay_type, \
            save_period=save_period, save_dir=save_dir, num_workers=num_workers, num_train=num_train, num_val=num_val
        )
        wanted_step = 5e4 if optimizer_type == "sgd" else 1.5e4
        total_step = num_train // Unfreeze_batch_size * UnFreeze_Epoch
        if total_step <= wanted_step:
            if num_train // Unfreeze_batch_size == 0:
                raise ValueError("The dataset is too small to continue training. Please expand the dataset."

)
            wanted_epoch = wanted_step // (num_train // Unfreeze_batch_size) + 1


    if True:
        UnFreeze_flag = False

        if Freeze_Train:
            for param in model.backbone.parameters():
                param.requires_grad = False

        batch_size = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size


        nbs = 64
        lr_limit_max = 1e-3 if optimizer_type == 'adam' else 5e-2
        lr_limit_min = 3e-4 if optimizer_type == 'adam' else 5e-4
        Init_lr_fit = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
        Min_lr_fit = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)


        pg0, pg1, pg2 = [], [], []
        for k, v in model.named_modules():
            if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
                pg2.append(v.bias)
            if isinstance(v, nn.BatchNorm2d) or "bn" in k:
                pg0.append(v.weight)
            elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
                pg1.append(v.weight)
        optimizer = {
            'adam': optim.Adam(pg0, Init_lr_fit, betas=(momentum, 0.999)),
            'sgd': optim.SGD(pg0, Init_lr_fit, momentum=momentum, nesterov=True)
        }[optimizer_type]
        optimizer.add_param_group({"params": pg1, "weight_decay": weight_decay})
        optimizer.add_param_group({"params": pg2})

        lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)

        epoch_step = num_train // batch_size
        epoch_step_val = num_val // batch_size

        if epoch_step == 0 or epoch_step_val == 0:
            raise ValueError("The dataset is too small to continue training. Please expand the dataset."

)

        if ema:
            ema.updates = epoch_step * Init_Epoch

        # ---------------------------------------#
        #   构建数据集加载器。
        # ---------------------------------------#
        train_dataset = YoloDataset(train_lines, input_shape, num_classes, epoch_length=UnFreeze_Epoch, \
                                    mosaic=mosaic, mixup=mixup, mosaic_prob=mosaic_prob, mixup_prob=mixup_prob,
                                    train=True, special_aug_ratio=special_aug_ratio)
        val_dataset = YoloDataset(val_lines, input_shape, num_classes, epoch_length=UnFreeze_Epoch, \
                                  mosaic=False, mixup=False, mosaic_prob=0, mixup_prob=0, train=False,
                                  special_aug_ratio=0)

        if distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True, )
            val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, )
            batch_size = batch_size // ngpus_per_node
            shuffle = False
        else:
            train_sampler = None
            val_sampler = None
            # shuffle = True
            my_shuffle = True

        gen = DataLoader(train_dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
                         pin_memory=True,
                         drop_last=True, collate_fn=yolo_dataset_collate, sampler=train_sampler)
        gen_val = DataLoader(val_dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
                             pin_memory=True,
                             drop_last=True, collate_fn=yolo_dataset_collate, sampler=val_sampler)

        if local_rank == 0:
            eval_callback = EvalCallback(model, input_shape, class_names, num_classes, val_lines, log_dir, Cuda, \
                                         eval_flag=eval_flag, period=eval_period)
        else:
            eval_callback = None

        for epoch in range(Init_Epoch, UnFreeze_Epoch):

            if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train:
                batch_size = Unfreeze_batch_size


                nbs = 64
                lr_limit_max = 1e-3 if optimizer_type == 'adam' else 5e-2
                lr_limit_min = 3e-4 if optimizer_type == 'adam' else 5e-4
                Init_lr_fit = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
                Min_lr_fit = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)

                lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)

                for param in model.backbone.parameters():
                    param.requires_grad = True

                epoch_step = num_train // batch_size
                epoch_step_val = num_val // batch_size

                if epoch_step == 0 or epoch_step_val == 0:
                    raise ValueError("The dataset is too small to continue training. Please expand the dataset."

)

                if distributed:
                    batch_size = batch_size // ngpus_per_node

                if ema:
                    ema.updates = epoch_step * epoch

                gen = DataLoader(train_dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
                                 pin_memory=True,
                                 drop_last=True, collate_fn=yolo_dataset_collate, sampler=train_sampler)
                gen_val = DataLoader(val_dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
                                     pin_memory=True,
                                     drop_last=True, collate_fn=yolo_dataset_collate, sampler=val_sampler)

                UnFreeze_flag = True

            gen.dataset.epoch_now = epoch
            gen_val.dataset.epoch_now = epoch

            if distributed:
                train_sampler.set_epoch(epoch)

            set_optimizer_lr(optimizer, lr_scheduler_func, epoch)

            fit_one_epoch(model_train, model, ema, yolo_loss, loss_history, eval_callback, optimizer, epoch, epoch_step,
                          epoch_step_val, gen, gen_val, UnFreeze_Epoch, Cuda, fp16, scaler, save_period, save_dir,
                          local_rank)

            if distributed:
                dist.barrier()

        if local_rank == 0:
            loss_history.writer.close()

initialize network with normal type


  module._c._create_method_from_trace(
  scaler = GradScaler()


Configurations:
----------------------------------------------------------------------
|                     keys |                                   values|
----------------------------------------------------------------------
|             classes_path | /content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt|
|               model_path |                                         |
|              input_shape |                               [512, 512]|
|               Init_Epoch |                                        0|
|             Freeze_Epoch |                                    False|
|           UnFreeze_Epoch |                                       50|
|        Freeze_batch_size |                                    False|
|      Unfreeze_batch_size |                                       16|
|             Freeze_Train |                                    False|
|                  Init_lr |                                    0.001|
|                   Min_lr 

  with autocast():
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  with torch.cuda.amp.autocast(enabled=False):
Epoch 1/50: 100%|██████████| 57/57 [02:40<00:00,  2.82s/it, loss=472, lr=5e-5]


Finish Train
Start Validation


Epoch 1/50: 100%|██████████| 6/6 [00:21<00:00,  3.52s/it, val_loss=63.8]


Finish Validation
Epoch:1/50
Total Loss: 472.299 || Val Loss: 63.834 
Save best model to best_epoch_weights.pth
Start Train


Epoch 2/50: 100%|██████████| 57/57 [00:12<00:00,  4.43it/s, loss=14.4, lr=0.000122]


Finish Train
Start Validation


Epoch 2/50: 100%|██████████| 6/6 [00:03<00:00,  1.71it/s, val_loss=14.6]


Finish Validation
Epoch:2/50
Total Loss: 14.440 || Val Loss: 14.639 
Save best model to best_epoch_weights.pth
Start Train


Epoch 3/50: 100%|██████████| 57/57 [00:12<00:00,  4.41it/s, loss=13.2, lr=0.000338]


Finish Train
Start Validation


Epoch 3/50: 100%|██████████| 6/6 [00:01<00:00,  4.48it/s, val_loss=14.2]


Finish Validation
Epoch:3/50
Total Loss: 13.229 || Val Loss: 14.231 
Save best model to best_epoch_weights.pth
Start Train


Epoch 4/50: 100%|██████████| 57/57 [00:13<00:00,  4.33it/s, loss=12.3, lr=0.0005]


Finish Train
Start Validation


Epoch 4/50: 100%|██████████| 6/6 [00:01<00:00,  4.53it/s, val_loss=12]


Finish Validation
Epoch:4/50
Total Loss: 12.305 || Val Loss: 12.008 
Save best model to best_epoch_weights.pth
Start Train


Epoch 5/50: 100%|██████████| 57/57 [00:12<00:00,  4.46it/s, loss=11.5, lr=0.000499]


Finish Train
Start Validation


Epoch 5/50: 100%|██████████| 6/6 [00:01<00:00,  4.58it/s, val_loss=11.1]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 22.79it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 6/50: 100%|██████████| 57/57 [00:12<00:00,  4.50it/s, loss=11.1, lr=0.000498]


Finish Train
Start Validation


Epoch 6/50: 100%|██████████| 6/6 [00:01<00:00,  4.25it/s, val_loss=10.9]


Finish Validation
Epoch:6/50
Total Loss: 11.141 || Val Loss: 10.894 
Save best model to best_epoch_weights.pth
Start Train


Epoch 7/50: 100%|██████████| 57/57 [00:12<00:00,  4.57it/s, loss=10.9, lr=0.000496]


Finish Train
Start Validation


Epoch 7/50: 100%|██████████| 6/6 [00:01<00:00,  4.36it/s, val_loss=10.8]


Finish Validation
Epoch:7/50
Total Loss: 10.930 || Val Loss: 10.847 
Save best model to best_epoch_weights.pth
Start Train


Epoch 8/50: 100%|██████████| 57/57 [00:12<00:00,  4.55it/s, loss=10.7, lr=0.000494]


Finish Train
Start Validation


Epoch 8/50: 100%|██████████| 6/6 [00:01<00:00,  4.42it/s, val_loss=10.4]


Finish Validation
Epoch:8/50
Total Loss: 10.685 || Val Loss: 10.384 
Save best model to best_epoch_weights.pth
Start Train


Epoch 9/50: 100%|██████████| 57/57 [00:12<00:00,  4.40it/s, loss=10.4, lr=0.000491]


Finish Train
Start Validation


Epoch 9/50: 100%|██████████| 6/6 [00:01<00:00,  4.31it/s, val_loss=10.7]


Finish Validation
Epoch:9/50
Total Loss: 10.440 || Val Loss: 10.662 
Start Train


Epoch 10/50: 100%|██████████| 57/57 [00:13<00:00,  4.38it/s, loss=10.2, lr=0.000487]


Finish Train
Start Validation


Epoch 10/50: 100%|██████████| 6/6 [00:01<00:00,  4.61it/s, val_loss=10.3]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 23.41it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.03s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.018
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.009
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.020
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.030
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 11/50: 100%|██████████| 57/57 [00:13<00:00,  4.31it/s, loss=10.2, lr=0.000483]


Finish Train
Start Validation


Epoch 11/50: 100%|██████████| 6/6 [00:01<00:00,  4.17it/s, val_loss=10.1]


Finish Validation
Epoch:11/50
Total Loss: 10.153 || Val Loss: 10.069 
Save best model to best_epoch_weights.pth
Start Train


Epoch 12/50: 100%|██████████| 57/57 [00:12<00:00,  4.49it/s, loss=9.8, lr=0.000479]


Finish Train
Start Validation


Epoch 12/50: 100%|██████████| 6/6 [00:01<00:00,  4.68it/s, val_loss=9.74]


Finish Validation
Epoch:12/50
Total Loss: 9.798 || Val Loss: 9.739 
Save best model to best_epoch_weights.pth
Start Train


Epoch 13/50: 100%|██████████| 57/57 [00:12<00:00,  4.43it/s, loss=9.65, lr=0.000474]


Finish Train
Start Validation


Epoch 13/50: 100%|██████████| 6/6 [00:01<00:00,  4.07it/s, val_loss=9.95]


Finish Validation
Epoch:13/50
Total Loss: 9.654 || Val Loss: 9.951 
Start Train


Epoch 14/50: 100%|██████████| 57/57 [00:12<00:00,  4.53it/s, loss=9.6, lr=0.000468]


Finish Train
Start Validation


Epoch 14/50: 100%|██████████| 6/6 [00:01<00:00,  4.58it/s, val_loss=9.77]


Finish Validation
Epoch:14/50
Total Loss: 9.599 || Val Loss: 9.765 
Start Train


Epoch 15/50: 100%|██████████| 57/57 [00:12<00:00,  4.43it/s, loss=9.51, lr=0.000462]


Finish Train
Start Validation


Epoch 15/50: 100%|██████████| 6/6 [00:01<00:00,  4.19it/s, val_loss=9.8]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 23.59it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.03s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.019
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.051
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.009
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.054
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.019
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.058
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.088
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 16/50: 100%|██████████| 57/57 [00:13<00:00,  4.37it/s, loss=9.35, lr=0.000455]


Finish Train
Start Validation


Epoch 16/50: 100%|██████████| 6/6 [00:01<00:00,  4.53it/s, val_loss=9.17]


Finish Validation
Epoch:16/50
Total Loss: 9.348 || Val Loss: 9.170 
Save best model to best_epoch_weights.pth
Start Train


Epoch 17/50: 100%|██████████| 57/57 [00:12<00:00,  4.56it/s, loss=9.23, lr=0.000448]


Finish Train
Start Validation


Epoch 17/50: 100%|██████████| 6/6 [00:01<00:00,  4.23it/s, val_loss=8.97]


Finish Validation
Epoch:17/50
Total Loss: 9.232 || Val Loss: 8.973 
Save best model to best_epoch_weights.pth
Start Train


Epoch 18/50: 100%|██████████| 57/57 [00:12<00:00,  4.40it/s, loss=9.15, lr=0.000441]


Finish Train
Start Validation


Epoch 18/50: 100%|██████████| 6/6 [00:01<00:00,  4.20it/s, val_loss=9.24]


Finish Validation
Epoch:18/50
Total Loss: 9.145 || Val Loss: 9.238 
Start Train


Epoch 19/50: 100%|██████████| 57/57 [00:12<00:00,  4.51it/s, loss=9.01, lr=0.000434]


Finish Train
Start Validation


Epoch 19/50: 100%|██████████| 6/6 [00:01<00:00,  4.53it/s, val_loss=9.26]


Finish Validation
Epoch:19/50
Total Loss: 9.013 || Val Loss: 9.261 
Start Train


Epoch 20/50: 100%|██████████| 57/57 [00:12<00:00,  4.47it/s, loss=9.06, lr=0.000426]


Finish Train
Start Validation


Epoch 20/50: 100%|██████████| 6/6 [00:01<00:00,  4.67it/s, val_loss=8.72]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 22.70it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.03s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.043
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.134
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.010
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.075
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.042
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.083
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.103
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 21/50: 100%|██████████| 57/57 [00:12<00:00,  4.50it/s, loss=8.99, lr=0.000418]


Finish Train
Start Validation


Epoch 21/50: 100%|██████████| 6/6 [00:01<00:00,  4.19it/s, val_loss=8.96]


Finish Validation
Epoch:21/50
Total Loss: 8.986 || Val Loss: 8.956 
Start Train


Epoch 22/50: 100%|██████████| 57/57 [00:13<00:00,  4.36it/s, loss=8.89, lr=0.000409]


Finish Train
Start Validation


Epoch 22/50: 100%|██████████| 6/6 [00:01<00:00,  4.69it/s, val_loss=8.88]


Finish Validation
Epoch:22/50
Total Loss: 8.894 || Val Loss: 8.878 
Start Train


Epoch 23/50: 100%|██████████| 57/57 [00:12<00:00,  4.48it/s, loss=8.72, lr=0.000401]


Finish Train
Start Validation


Epoch 23/50: 100%|██████████| 6/6 [00:01<00:00,  4.12it/s, val_loss=8.84]


Finish Validation
Epoch:23/50
Total Loss: 8.723 || Val Loss: 8.837 
Start Train


Epoch 24/50: 100%|██████████| 57/57 [00:12<00:00,  4.51it/s, loss=8.78, lr=0.000392]


Finish Train
Start Validation


Epoch 24/50: 100%|██████████| 6/6 [00:01<00:00,  4.26it/s, val_loss=8.72]


Finish Validation
Epoch:24/50
Total Loss: 8.777 || Val Loss: 8.715 
Save best model to best_epoch_weights.pth
Start Train


Epoch 25/50: 100%|██████████| 57/57 [00:12<00:00,  4.47it/s, loss=8.79, lr=0.000384]


Finish Train
Start Validation


Epoch 25/50: 100%|██████████| 6/6 [00:01<00:00,  4.41it/s, val_loss=8.73]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 22.06it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.05s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.051
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.129
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.045
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.063
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.049
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.113
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.129
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 26/50: 100%|██████████| 57/57 [00:12<00:00,  4.47it/s, loss=8.62, lr=0.000375]


Finish Train
Start Validation


Epoch 26/50: 100%|██████████| 6/6 [00:01<00:00,  4.39it/s, val_loss=8.6]


Finish Validation
Epoch:26/50
Total Loss: 8.618 || Val Loss: 8.604 
Save best model to best_epoch_weights.pth
Start Train


Epoch 27/50: 100%|██████████| 57/57 [00:12<00:00,  4.49it/s, loss=8.56, lr=0.000366]


Finish Train
Start Validation


Epoch 27/50: 100%|██████████| 6/6 [00:01<00:00,  4.27it/s, val_loss=8.46]


Finish Validation
Epoch:27/50
Total Loss: 8.558 || Val Loss: 8.456 
Save best model to best_epoch_weights.pth
Start Train


Epoch 28/50: 100%|██████████| 57/57 [00:12<00:00,  4.48it/s, loss=8.74, lr=0.000358]


Finish Train
Start Validation


Epoch 28/50: 100%|██████████| 6/6 [00:01<00:00,  4.12it/s, val_loss=8.23]


Finish Validation
Epoch:28/50
Total Loss: 8.745 || Val Loss: 8.229 
Save best model to best_epoch_weights.pth
Start Train


Epoch 29/50: 100%|██████████| 57/57 [00:12<00:00,  4.54it/s, loss=8.59, lr=0.000349]


Finish Train
Start Validation


Epoch 29/50: 100%|██████████| 6/6 [00:01<00:00,  4.47it/s, val_loss=8.45]


Finish Validation
Epoch:29/50
Total Loss: 8.591 || Val Loss: 8.455 
Start Train


Epoch 30/50: 100%|██████████| 57/57 [00:12<00:00,  4.61it/s, loss=8.42, lr=0.000341]


Finish Train
Start Validation


Epoch 30/50: 100%|██████████| 6/6 [00:01<00:00,  4.24it/s, val_loss=8.39]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 21.77it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.075
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.174
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.044
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.082
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.078
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.145
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.150
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 31/50: 100%|██████████| 57/57 [00:12<00:00,  4.51it/s, loss=8.42, lr=0.000332]


Finish Train
Start Validation


Epoch 31/50: 100%|██████████| 6/6 [00:01<00:00,  4.48it/s, val_loss=8.49]


Finish Validation
Epoch:31/50
Total Loss: 8.421 || Val Loss: 8.495 
Start Train


Epoch 32/50: 100%|██████████| 57/57 [00:12<00:00,  4.52it/s, loss=8.28, lr=0.000324]


Finish Train
Start Validation


Epoch 32/50: 100%|██████████| 6/6 [00:01<00:00,  4.33it/s, val_loss=8.21]


Finish Validation
Epoch:32/50
Total Loss: 8.279 || Val Loss: 8.211 
Save best model to best_epoch_weights.pth
Start Train


Epoch 33/50: 100%|██████████| 57/57 [00:12<00:00,  4.42it/s, loss=8.2, lr=0.000316]


Finish Train
Start Validation


Epoch 33/50: 100%|██████████| 6/6 [00:01<00:00,  4.34it/s, val_loss=8.36]


Finish Validation
Epoch:33/50
Total Loss: 8.196 || Val Loss: 8.364 
Start Train


Epoch 34/50: 100%|██████████| 57/57 [00:12<00:00,  4.55it/s, loss=8.27, lr=0.000309]


Finish Train
Start Validation


Epoch 34/50: 100%|██████████| 6/6 [00:01<00:00,  4.59it/s, val_loss=8.37]


Finish Validation
Epoch:34/50
Total Loss: 8.274 || Val Loss: 8.374 
Start Train


Epoch 35/50: 100%|██████████| 57/57 [00:13<00:00,  4.38it/s, loss=8.28, lr=0.000302]


Finish Train
Start Validation


Epoch 35/50: 100%|██████████| 6/6 [00:01<00:00,  4.52it/s, val_loss=8.39]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 21.66it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.05s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.054
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.176
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.022
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.048
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.061
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.130
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.146
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 36/50: 100%|██████████| 57/57 [00:10<00:00,  5.24it/s, loss=8.49, lr=0.000295]


Finish Train
Start Validation


Epoch 36/50: 100%|██████████| 6/6 [00:01<00:00,  4.38it/s, val_loss=8.55]


Finish Validation
Epoch:36/50
Total Loss: 8.488 || Val Loss: 8.548 
Start Train


Epoch 37/50: 100%|██████████| 57/57 [00:10<00:00,  5.28it/s, loss=8.34, lr=0.000288]


Finish Train
Start Validation


Epoch 37/50: 100%|██████████| 6/6 [00:01<00:00,  4.26it/s, val_loss=8.7]


Finish Validation
Epoch:37/50
Total Loss: 8.341 || Val Loss: 8.701 
Start Train


Epoch 38/50: 100%|██████████| 57/57 [00:10<00:00,  5.26it/s, loss=8.25, lr=0.000282]


Finish Train
Start Validation


Epoch 38/50: 100%|██████████| 6/6 [00:01<00:00,  4.31it/s, val_loss=8.16]


Finish Validation
Epoch:38/50
Total Loss: 8.245 || Val Loss: 8.157 
Save best model to best_epoch_weights.pth
Start Train


Epoch 39/50: 100%|██████████| 57/57 [00:10<00:00,  5.33it/s, loss=8.22, lr=0.000276]


Finish Train
Start Validation


Epoch 39/50: 100%|██████████| 6/6 [00:01<00:00,  4.55it/s, val_loss=8.13]


Finish Validation
Epoch:39/50
Total Loss: 8.223 || Val Loss: 8.133 
Save best model to best_epoch_weights.pth
Start Train


Epoch 40/50: 100%|██████████| 57/57 [00:10<00:00,  5.44it/s, loss=8.17, lr=0.000271]


Finish Train
Start Validation


Epoch 40/50: 100%|██████████| 6/6 [00:01<00:00,  4.27it/s, val_loss=8.36]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 22.10it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.065
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.212
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.010
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.003
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.047
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.068
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.124
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.140
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 41/50: 100%|██████████| 57/57 [00:10<00:00,  5.25it/s, loss=8.07, lr=0.000267]


Finish Train
Start Validation


Epoch 41/50: 100%|██████████| 6/6 [00:01<00:00,  4.51it/s, val_loss=8.22]


Finish Validation
Epoch:41/50
Total Loss: 8.072 || Val Loss: 8.224 
Start Train


Epoch 42/50: 100%|██████████| 57/57 [00:10<00:00,  5.35it/s, loss=8.1, lr=0.000263]


Finish Train
Start Validation


Epoch 42/50: 100%|██████████| 6/6 [00:01<00:00,  4.38it/s, val_loss=7.95]


Finish Validation
Epoch:42/50
Total Loss: 8.103 || Val Loss: 7.949 
Save best model to best_epoch_weights.pth
Start Train


Epoch 43/50: 100%|██████████| 57/57 [00:10<00:00,  5.39it/s, loss=7.93, lr=0.000259]


Finish Train
Start Validation


Epoch 43/50: 100%|██████████| 6/6 [00:01<00:00,  4.57it/s, val_loss=8.06]


Finish Validation
Epoch:43/50
Total Loss: 7.926 || Val Loss: 8.057 
Start Train


Epoch 44/50: 100%|██████████| 57/57 [00:10<00:00,  5.31it/s, loss=7.93, lr=0.000256]


Finish Train
Start Validation


Epoch 44/50: 100%|██████████| 6/6 [00:01<00:00,  4.56it/s, val_loss=8.16]


Finish Validation
Epoch:44/50
Total Loss: 7.926 || Val Loss: 8.156 
Start Train


Epoch 45/50: 100%|██████████| 57/57 [00:10<00:00,  5.46it/s, loss=7.9, lr=0.000254]


Finish Train
Start Validation


Epoch 45/50: 100%|██████████| 6/6 [00:01<00:00,  4.30it/s, val_loss=8.04]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 22.80it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.30s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.091
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.237
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.012
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.123
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.074
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.164
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.168
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

Epoch 46/50: 100%|██████████| 57/57 [00:10<00:00,  5.31it/s, loss=7.73, lr=0.000252]


Finish Train
Start Validation


Epoch 46/50: 100%|██████████| 6/6 [00:01<00:00,  4.27it/s, val_loss=8]


Finish Validation
Epoch:46/50
Total Loss: 7.729 || Val Loss: 8.000 
Start Train


Epoch 47/50: 100%|██████████| 57/57 [00:10<00:00,  5.24it/s, loss=7.82, lr=0.000251]


Finish Train
Start Validation


Epoch 47/50: 100%|██████████| 6/6 [00:01<00:00,  4.56it/s, val_loss=8.23]


Finish Validation
Epoch:47/50
Total Loss: 7.821 || Val Loss: 8.225 
Start Train


Epoch 48/50: 100%|██████████| 57/57 [00:10<00:00,  5.37it/s, loss=7.65, lr=0.00025]


Finish Train
Start Validation


Epoch 48/50: 100%|██████████| 6/6 [00:01<00:00,  3.97it/s, val_loss=8.06]


Finish Validation
Epoch:48/50
Total Loss: 7.649 || Val Loss: 8.063 
Start Train


Epoch 49/50: 100%|██████████| 57/57 [00:10<00:00,  5.41it/s, loss=7.72, lr=0.00025]


Finish Train
Start Validation


Epoch 49/50: 100%|██████████| 6/6 [00:01<00:00,  4.25it/s, val_loss=7.61]


Finish Validation
Epoch:49/50
Total Loss: 7.721 || Val Loss: 7.608 
Save best model to best_epoch_weights.pth
Start Train


Epoch 50/50: 100%|██████████| 57/57 [00:10<00:00,  5.26it/s, loss=7.65, lr=0.00025]


Finish Train
Start Validation


Epoch 50/50: 100%|██████████| 6/6 [00:01<00:00,  4.44it/s, val_loss=7.95]


Finish Validation
Get map.


100%|██████████| 103/103 [00:04<00:00, 23.05it/s]


Calculate Map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.100
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.245
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.055
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.013
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.101
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.088
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.182
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.199
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= 

# **PREDIKSI HASIL TRAINING**

In [None]:
import colorsys
import os
import time

import numpy as np
import torch
import torch.nn as nn
from PIL import ImageDraw, ImageFont


class YOLO(object):
    _defaults = {
        "model_path": '/content/drive/MyDrive/210411100054-SitiNurAini/logs_9:1/best_epoch_weights.pth',
        "classes_path": '/content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt',
        "input_shape": [512, 512],
        "phi": 's',
        "confidence": 0.01,
        "nms_iou": 0.5,
        "letterbox_image": True,
        "cuda": False,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)
            self._defaults[name] = value


        self.class_names, self.num_classes = get_classes(self.classes_path)


        hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
        self.generate()

        show_config(**self._defaults)

    def generate(self, onnx=False):
        self.net = YoloBody(self.num_classes, self.phi)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.net.load_state_dict(torch.load(self.model_path, map_location=device))
        self.net = self.net.eval()
        print('{} model, and classes loaded.'.format(self.model_path))
        if not onnx:
            if self.cuda:
                self.net = nn.DataParallel(self.net)
                self.net = self.net.cuda()


    def detect_image(self, image, crop=False, count=False):
        image_shape = np.array(np.shape(image)[0:2])

        # Konversi warna gambar
        image = cvtColor(image)

        # Resize gambar sesuai dengan input shape model
        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            # Forward pass ke model
            outputs = self.net(images)
            outputs = decode_outputs(outputs, self.input_shape)

            # Non-Maximum Suppression
            results = non_max_suppression(outputs, self.num_classes, self.input_shape,
                                        image_shape, self.letterbox_image, conf_thres=self.confidence,
                                        nms_thres=self.nms_iou)

            if results[0] is None:
                return image

            # Ekstrak label, confidence, dan bounding box
            top_label = np.array(results[0][:, 6], dtype='int32')
            top_conf = results[0][:, 4] * results[0][:, 5]
            top_boxes = results[0][:, :4]

        # Pilih hasil dengan confidence tertinggi untuk setiap kelas
        unique_labels = np.unique(top_label)
        selected_indices = []

        for label in unique_labels:
            label_indices = np.where(top_label == label)[0]
            if len(label_indices) > 0:
                max_idx = label_indices[np.argmax(top_conf[label_indices])]
                selected_indices.append(max_idx)

        # Ambil data berdasarkan indeks yang dipilih
        selected_indices = np.array(selected_indices)
        top_label = top_label[selected_indices]
        top_conf = top_conf[selected_indices]
        top_boxes = top_boxes[selected_indices]

        # Visualisasi hasil
        font = ImageFont.truetype(font='/content/drive/MyDrive/210411100054-SitiNurAini/model_data/arial.ttf',
                                size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = top_conf[i]

            top, left, bottom, right = box
            top = max(0, np.floor(top).astype('int32'))
            left = max(0, np.floor(left).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom).astype('int32'))
            right = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textbbox((0, 0), label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])

            right_bottom = (text_origin[0] + label_size[2], text_origin[1] + label_size[3])
            draw.rectangle([tuple(text_origin), right_bottom], fill=self.colors[c])
            draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        return image


    # def detect_image(self, image, crop=False, count=False):

    #     image_shape = np.array(np.shape(image)[0:2])

    #     image = cvtColor(image)

    #     image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)

    #     image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

    #     with torch.no_grad():
    #         images = torch.from_numpy(image_data)
    #         if self.cuda:
    #             images = images.cuda()

    #         outputs = self.net(images)
    #         outputs = decode_outputs(outputs, self.input_shape)

    #         results = non_max_suppression(outputs, self.num_classes, self.input_shape,
    #                                       image_shape, self.letterbox_image, conf_thres=self.confidence,
    #                                       nms_thres=self.nms_iou)

    #         if results[0] is None:
    #             return image

    #         top_label = np.array(results[0][:, 6], dtype='int32')
    #         top_conf = results[0][:, 4] * results[0][:, 5]
    #         top_boxes = results[0][:, :4]

    #     font = ImageFont.truetype(font='/content/drive/MyDrive/210411100054-SitiNurAini/model_data/arial.ttf',
    #                               size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    #     thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))

    #     if count:
    #         print("top_label:", top_label)
    #         classes_nums = np.zeros([self.num_classes])
    #         for i in range(self.num_classes):
    #             num = np.sum(top_label == i)
    #             if num > 0:
    #                 print(self.class_names[i], " : ", num)
    #             classes_nums[i] = num
    #         print("classes_nums:", classes_nums)

    #     if crop:
    #         for i, c in list(enumerate(top_label)):
    #             top, left, bottom, right = top_boxes[i]
    #             top = max(0, np.floor(top).astype('int32'))
    #             left = max(0, np.floor(left).astype('int32'))
    #             bottom = min(image.size[1], np.floor(bottom).astype('int32'))
    #             right = min(image.size[0], np.floor(right).astype('int32'))

    #             dir_save_path = "img_crop"
    #             if not os.path.exists(dir_save_path):
    #                 os.makedirs(dir_save_path)
    #             crop_image = image.crop([left, top, right, bottom])
    #             crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
    #             print("save crop_" + str(i) + ".png to " + dir_save_path)


    #     for i, c in list(enumerate(top_label)):
    #           predicted_class = self.class_names[int(c)]
    #           box = top_boxes[i]
    #           score = top_conf[i]

    #           top, left, bottom, right = box

    #           top = max(0, np.floor(top).astype('int32'))
    #           left = max(0, np.floor(left).astype('int32'))
    #           bottom = min(image.size[1], np.floor(bottom).astype('int32'))
    #           right = min(image.size[0], np.floor(right).astype('int32'))

    #           label = '{} {:.2f}'.format(predicted_class, score)
    #           draw = ImageDraw.Draw(image)
    #           label_size = draw.textbbox((0, 0), label, font)
    #           label = label.encode('utf-8')
    #           print(label, top, left, bottom, right)

    #           if top - label_size[1] >= 0:
    #               text_origin = np.array([left, top - label_size[1]])
    #           else:
    #               text_origin = np.array([left, top + 1])

    #           for i in range(thickness):
    #               draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])

    #           # Menghitung sudut kanan bawah dari kotak label
    #           right_bottom = (text_origin[0] + label_size[2], text_origin[1] + label_size[3])

    #           # Gambar persegi panjang berisi latar belakang label
    #           draw.rectangle([tuple(text_origin), right_bottom], fill=self.colors[c])

    #           # Gambar teks pada gambar
    #           draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
    #           del draw


    #     return image


    def detect_heatmap(self, image, heatmap_save_path):
        import cv2
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        def sigmoid(x):
            y = 1.0 / (1.0 + np.exp(-x))
            return y

        image_shape = np.array(np.shape(image)[0:2])

        image = cvtColor(image)

        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)

        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        outputs = [output.cpu().numpy() for output in outputs]
        plt.imshow(image, alpha=1)
        plt.axis('off')
        mask = np.zeros((image.size[1], image.size[0]))
        for sub_output in outputs:
            b, c, h, w = np.shape(sub_output)
            sub_output = np.transpose(sub_output, [0, 2, 3, 1])[0]
            score = np.max(sigmoid(sub_output[..., 5:]), -1) * sigmoid(sub_output[..., 4])
            score = cv2.resize(score, (image.size[0], image.size[1]))
            normed_score = (score * 255).astype('uint8')
            mask = np.maximum(mask, normed_score)

        plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")

        plt.axis('off')
        plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
        plt.margins(0, 0)
        plt.savefig(heatmap_save_path, dpi=200)
        print("Save to the " + heatmap_save_path)
        plt.cla()


    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w")
        image_shape = np.array(np.shape(image)[0:2])

        image = cvtColor(image)

        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)

        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            outputs = self.net(images)
            outputs = decode_outputs(outputs, self.input_shape)

            results = non_max_suppression(outputs, self.num_classes, self.input_shape,
                                          image_shape, self.letterbox_image, conf_thres=self.confidence,
                                          nms_thres=self.nms_iou)

            if results[0] is None:
                return

            top_label = np.array(results[0][:, 6], dtype='int32')
            top_conf = results[0][:, 4] * results[0][:, 5]
            top_boxes = results[0][:, :4]

        objects = []
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = str(top_conf[i])

            top, left, bottom, right = box
            if predicted_class not in class_names:
                continue
            f.write("%s %s %s %s %s %s\n" % (
                predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))
            objects.append(
                [predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))])
        f.close()
        save_path = os.path.join(map_out_path, "xmls/" + image_id + ".xml")
        self.create_xml(objects, save_path)
        return

    def create_xml(self, objects, save_path):
        with open(save_path, 'w', encoding="utf-8") as xml_files:
            xml_files.write('<annotation>\n')
            xml_files.write('   <folder>folder</folder>\n')
            xml_files.write(f'   <filename></filename>\n')
            xml_files.write('   <source>\n')
            xml_files.write('   <database>XinQiao</database>\n')
            xml_files.write('   </source>\n')
            xml_files.write('   <size>\n')
            xml_files.write(f'     <width>0</width>\n')
            xml_files.write(f'     <height>0</height>\n')
            xml_files.write(f'     <depth>3</depth>\n')
            xml_files.write('   </size>\n')
            xml_files.write('   <segmented>0</segmented>\n')
            for ob in objects:
                xml_files.write('   <object>\n')
                xml_files.write(f'      <name>{ob[0]}</name>\n')
                xml_files.write('      <pose>Unspecified</pose>\n')
                xml_files.write(f'      <truncated>{ob[1]}</truncated>\n')
                xml_files.write('      <difficult>0</difficult>\n')
                xml_files.write('      <bndbox>\n')
                xml_files.write(f'         <xmin>{int(ob[2])}</xmin>\n')
                xml_files.write(f'         <ymin>{int(ob[3])}</ymin>\n')
                xml_files.write(f'         <xmax>{int(ob[4])}</xmax>\n')
                xml_files.write(f'         <ymax>{int(ob[5])}</ymax>\n')
                xml_files.write('      </bndbox>\n')
                xml_files.write('   </object>\n')
            xml_files.write('</annotation>', )
        pass

## predict

In [None]:
import time
from PIL import Image

# from yolo import YOLO

if __name__ == "__main__":
    yolo = YOLO()

    crop  = False
    count = False

    # Minta input untuk gambar
    img = input('Input image filename: ')
    try:
        image = Image.open(img)
    except:
        print('Open Error! Try again!')
    else:
        # Proses gambar
        r_image = yolo.detect_image(image, crop=crop, count=count)
        r_image.show()
        # Program akan berhenti setelah memproses gambar pertama


  self.net.load_state_dict(torch.load(self.model_path, map_location=device))


/content/drive/MyDrive/210411100054-SitiNurAini/logs_9:1/best_epoch_weights.pth model, and classes loaded.
Configurations:
----------------------------------------------------------------------
|                     keys |                                   values|
----------------------------------------------------------------------
|               model_path | /content/drive/MyDrive/210411100054-SitiNurAini/logs_9:1/best_epoch_weights.pth|
|             classes_path | /content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt|
|              input_shape |                               [512, 512]|
|                      phi |                                        s|
|               confidence |                                     0.01|
|                  nms_iou |                                      0.5|
|          letterbox_image |                                     True|
|                     cuda |                                    False|
-----------------------

# **EVALUASI**

In [None]:
import os
import xml.etree.ElementTree as ET
from collections import defaultdict

# Langkah 1: Baca isi file test.txt
test_file_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3/ImageSets/Main/test_9:1.txt'
with open(test_file_path, 'r') as file:
    test_ids = file.read().splitlines()

# Langkah 2: Tentukan path ke direktori XML
xml_dir_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3/annotations/'

# Dictionary untuk menyimpan jumlah setiap kelas
class_count = defaultdict(int)

# Langkah 3: Iterasi melalui semua ID di test_ids
for image_id in test_ids:
    # Buat nama file XML yang sesuai dengan ID
    xml_file_name = f"{image_id}.xml"
    xml_file_path = os.path.join(xml_dir_path, xml_file_name)

    # Cek apakah file XML ada
    if os.path.exists(xml_file_path):
        # Baca file XML
        tree = ET.parse(xml_file_path)
        root = tree.getroot()

        # Ambil kelas objek
        object_name = root.find('object/name').text

        # Hitung jumlah kelas
        class_count[object_name] += 1

# Menampilkan jumlah setiap kelas
print("\nJumlah setiap kelas:")
for class_name, count in class_count.items():
    print(f"{class_name}: {count}")


Jumlah setiap kelas:
Cardiomegaly: 43
Nodule_Mass: 39
Pneumothorax: 33


## **MODE 0 = 1-2**

In [None]:
import datetime
import os
import xml.etree.ElementTree as ET

from PIL import Image
from tqdm import tqdm


if __name__ == "__main__":

    map_mode = 0

    classes_path = '/content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt'

    MINOVERLAP = 0.5

    confidence = 0.01

    nms_iou = 0.5

    map_vis = True

    base_output_dir = '/content/drive/MyDrive/210411100054-SitiNurAini/'

    VOCdevkit_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3'
    txtNmae = "test_9:1"

    map_out_path = os.path.join(base_output_dir, f'runs_9:1/2024_{txtNmae}_iou={MINOVERLAP}')

    image_ids = open(
        os.path.join(VOCdevkit_path, "ImageSets/Main/%s.txt") % txtNmae).read().strip().split()

    if not os.path.exists(map_out_path):
        os.makedirs(map_out_path)
    print("Folder runs_9:1 telah dibuat di:", map_out_path)

    if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
        os.makedirs(os.path.join(map_out_path, 'ground-truth'))
    if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
        os.makedirs(os.path.join(map_out_path, 'detection-results'))
    if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
        os.makedirs(os.path.join(map_out_path, 'images-optional'))
    if not os.path.exists(os.path.join(map_out_path, 'xmls')):
        os.makedirs(os.path.join(map_out_path, 'xmls'))
    class_names, _ = get_classes(classes_path)

    if map_mode == 0 or map_mode == 1:
        print("Load model.")
        yolo = YOLO(confidence=confidence, nms_iou=nms_iou)
        print("Load model done.")

        print("Get predict result.")
        for image_id in tqdm(image_ids):
            image_path = os.path.join(VOCdevkit_path, "JPGImages/" + image_id + ".jpg")
            image = Image.open(image_path)
            if map_vis:
                image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
            yolo.get_map_txt(image_id, image, class_names, map_out_path)
        print("Get predict result | MODE 1 done.")

    if map_mode == 0 or map_mode == 2:
        print("Get ground truth result.")
        for image_id in tqdm(image_ids):
            with open(os.path.join(map_out_path, "ground-truth/" + image_id + ".txt"), "w") as new_f:
                root = ET.parse(
                    os.path.join(VOCdevkit_path, "annotations/" + image_id + ".xml")).getroot()
                for obj in root.findall('object'):
                    difficult_flag = False
                    if obj.find('difficult') != None:
                        difficult = obj.find('difficult').text
                        if int(difficult) == 1:
                            difficult_flag = True
                    obj_name = obj.find('name').text
                    if obj_name not in class_names:
                        continue
                    bndbox = obj.find('bndbox')
                    left = bndbox.find('xmin').text
                    top = bndbox.find('ymin').text
                    right = bndbox.find('xmax').text
                    bottom = bndbox.find('ymax').text

                    if difficult_flag:
                        new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
                    else:
                        new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
        print("Get ground truth result | MODE 2 done.")

    if map_mode == 3:
        print("Get map.")
        get_map(MINOVERLAP, False, path=map_out_path)
        print("Get map done | MODE 3.")

    if map_mode == 4:
        print("Get map.")
        get_coco_map(class_names=class_names, path=map_out_path)
        print("Get map | MODE 4 done.")

Folder runs_9:1 telah dibuat di: /content/drive/MyDrive/210411100054-SitiNurAini/runs_9:1/2024_test_9:1_iou=0.5
Load model.


  self.net.load_state_dict(torch.load(self.model_path, map_location=device))


/content/drive/MyDrive/210411100054-SitiNurAini/logs_9:1/best_epoch_weights.pth model, and classes loaded.
Configurations:
----------------------------------------------------------------------
|                     keys |                                   values|
----------------------------------------------------------------------
|               model_path | /content/drive/MyDrive/210411100054-SitiNurAini/logs_9:1/best_epoch_weights.pth|
|             classes_path | /content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt|
|              input_shape |                               [512, 512]|
|                      phi |                                        s|
|               confidence |                                     0.01|
|                  nms_iou |                                      0.5|
|          letterbox_image |                                     True|
|                     cuda |                                    False|
-----------------------

100%|██████████| 115/115 [00:23<00:00,  4.83it/s]


Get predict result | MODE 1 done.
Get ground truth result.


100%|██████████| 115/115 [00:01<00:00, 94.33it/s]

Get ground truth result | MODE 2 done.





1. hijau = TP
2. merah = FP
3. biru = GT

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")


## **MODE 3**

In [None]:
import datetime
import os
import xml.etree.ElementTree as ET

from PIL import Image
from tqdm import tqdm


if __name__ == "__main__":

    map_mode = 3

    classes_path = '/content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt'

    MINOVERLAP = 0.5

    confidence = 0.01

    nms_iou = 0.5

    map_vis = True

    base_output_dir = '/content/drive/MyDrive/210411100054-SitiNurAini/'

    VOCdevkit_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3'
    txtNmae = "test_9:1"

    map_out_path = os.path.join(base_output_dir, f'runs_9:1/2024_{txtNmae}_iou={MINOVERLAP}')

    image_ids = open(
        os.path.join(VOCdevkit_path, "ImageSets/Main/%s.txt") % txtNmae).read().strip().split()

    if not os.path.exists(map_out_path):
        os.makedirs(map_out_path)
    print("Folder runs_9:1 telah dibuat di:", map_out_path)

    if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
        os.makedirs(os.path.join(map_out_path, 'ground-truth'))
    if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
        os.makedirs(os.path.join(map_out_path, 'detection-results'))
    if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
        os.makedirs(os.path.join(map_out_path, 'images-optional'))
    if not os.path.exists(os.path.join(map_out_path, 'xmls')):
        os.makedirs(os.path.join(map_out_path, 'xmls'))
    class_names, _ = get_classes(classes_path)

    if map_mode == 0 or map_mode == 1:
        print("Load model.")
        yolo = YOLO(confidence=confidence, nms_iou=nms_iou)
        print("Load model done.")

        print("Get predict result.")
        for image_id in tqdm(image_ids):
            image_path = os.path.join(VOCdevkit_path, "JPGImages/" + image_id + ".jpg")
            image = Image.open(image_path)
            if map_vis:
                image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
            yolo.get_map_txt(image_id, image, class_names, map_out_path)
        print("Get predict result | MODE 1 done.")

    if map_mode == 0 or map_mode == 2:
        print("Get ground truth result.")
        for image_id in tqdm(image_ids):
            with open(os.path.join(map_out_path, "ground-truth/" + image_id + ".txt"), "w") as new_f:
                root = ET.parse(
                    os.path.join(VOCdevkit_path, "annotations/" + image_id + ".xml")).getroot()
                for obj in root.findall('object'):
                    difficult_flag = False
                    if obj.find('difficult') != None:
                        difficult = obj.find('difficult').text
                        if int(difficult) == 1:
                            difficult_flag = True
                    obj_name = obj.find('name').text
                    if obj_name not in class_names:
                        continue
                    bndbox = obj.find('bndbox')
                    left = bndbox.find('xmin').text
                    top = bndbox.find('ymin').text
                    right = bndbox.find('xmax').text
                    bottom = bndbox.find('ymax').text

                    if difficult_flag:
                        new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
                    else:
                        new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
        print("Get ground truth result | MODE 2 done.")

    if map_mode == 3:
        print("Get map.")
        get_map(MINOVERLAP, False, path=map_out_path)
        print("Get map done | MODE 3.")

    if map_mode == 4:
        print("Get map.")
        get_coco_map(class_names=class_names, path=map_out_path)
        print("Get map | MODE 4 done.")

Output hidden; open in https://colab.research.google.com to view.

## **MODE 4**

In [None]:
import datetime
import os
import xml.etree.ElementTree as ET

from PIL import Image
from tqdm import tqdm


if __name__ == "__main__":

    map_mode = 4

    classes_path = '/content/drive/MyDrive/210411100054-SitiNurAini/model_data/cxr_classes.txt'

    MINOVERLAP = 0.5

    confidence = 0.01

    nms_iou = 0.5

    map_vis = True

    base_output_dir = '/content/drive/MyDrive/210411100054-SitiNurAini/'

    VOCdevkit_path = '/content/drive/MyDrive/210411100054-SitiNurAini/DATASET-CXR-AL3'
    txtNmae = "test_9:1"

    map_out_path = os.path.join(base_output_dir, f'runs_9:1/2024_{txtNmae}_iou={MINOVERLAP}')

    image_ids = open(
        os.path.join(VOCdevkit_path, "ImageSets/Main/%s.txt") % txtNmae).read().strip().split()

    if not os.path.exists(map_out_path):
        os.makedirs(map_out_path)
    print("Folder runs_9:1 telah dibuat di:", map_out_path)

    if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
        os.makedirs(os.path.join(map_out_path, 'ground-truth'))
    if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
        os.makedirs(os.path.join(map_out_path, 'detection-results'))
    if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
        os.makedirs(os.path.join(map_out_path, 'images-optional'))
    if not os.path.exists(os.path.join(map_out_path, 'xmls')):
        os.makedirs(os.path.join(map_out_path, 'xmls'))
    class_names, _ = get_classes(classes_path)

    if map_mode == 0 or map_mode == 1:
        print("Load model.")
        yolo = YOLO(confidence=confidence, nms_iou=nms_iou)
        print("Load model done.")

        print("Get predict result.")
        for image_id in tqdm(image_ids):
            image_path = os.path.join(VOCdevkit_path, "JPGImages/" + image_id + ".jpg")
            image = Image.open(image_path)
            if map_vis:
                image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
            yolo.get_map_txt(image_id, image, class_names, map_out_path)
        print("Get predict result | MODE 1 done.")

    if map_mode == 0 or map_mode == 2:
        print("Get ground truth result.")
        for image_id in tqdm(image_ids):
            with open(os.path.join(map_out_path, "ground-truth/" + image_id + ".txt"), "w") as new_f:
                root = ET.parse(
                    os.path.join(VOCdevkit_path, "annotations/" + image_id + ".xml")).getroot()
                for obj in root.findall('object'):
                    difficult_flag = False
                    if obj.find('difficult') != None:
                        difficult = obj.find('difficult').text
                        if int(difficult) == 1:
                            difficult_flag = True
                    obj_name = obj.find('name').text
                    if obj_name not in class_names:
                        continue
                    bndbox = obj.find('bndbox')
                    left = bndbox.find('xmin').text
                    top = bndbox.find('ymin').text
                    right = bndbox.find('xmax').text
                    bottom = bndbox.find('ymax').text

                    if difficult_flag:
                        new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
                    else:
                        new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
        print("Get ground truth result | MODE 2 done.")

    if map_mode == 3:
        print("Get map.")
        get_map(MINOVERLAP, False, path=map_out_path)
        print("Get map done | MODE 3.")

    if map_mode == 4:
        print("Get map.")
        get_coco_map(class_names=class_names, path=map_out_path)
        print("Get map | MODE 4 done.")

Folder runs_9:1 telah dibuat di: /content/drive/MyDrive/210411100054-SitiNurAini/runs_9:1/2024_test_9:1_iou=0.5
Get map.
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.10s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.094
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.064
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.136
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.099
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.164
 Average Recall     (AR)