In [1]:
# CÀI ĐẶT & CẤU HÌNH (SETUP & CONFIGURATION)

# 0.1. Cài đặt các thư viện cần thiết
!pip install -q fpdf2 noisereduce librosa tensorflow scikit-learn matplotlib seaborn pytz PyDrive2

# 0.2. Import thư viện
import os
import glob
import random
import datetime
import pytz
import shutil
import joblib
import zipfile
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from fpdf import FPDF
from tqdm import tqdm
import librosa
import noisereduce as nr
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D, AveragePooling2D
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import classification_report, confusion_matrix
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from kaggle_secrets import UserSecretsClient
from oauth2client.service_account import ServiceAccountCredentials
from tensorflow.keras.regularizers import l2
from kaggle_datasets import KaggleDatasets
try:
    # Cố gắng kết nối với TPU bằng cách chỉ định tpu='local'
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')
    print('Đã tìm thấy TPU Resolver.')
    
    # Kết nối và khởi tạo hệ thống TPU
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    print('Đã khởi tạo hệ thống TPU.')

    # Tạo strategy
    strategy = tf.distribute.TPUStrategy(tpu)
    print('THÀNH CÔNG: Đã tạo TPUStrategy!')
    print(f'Số lượng nhân (replicas): {strategy.num_replicas_in_sync}')
    
except (ValueError, RuntimeError) as e:
    # Nếu vẫn không tìm thấy TPU, tự động chuyển về chiến lược mặc định
    print(f'Lỗi kết nối TPU: {e}')
    print('Không tìm thấy TPU. Sử dụng chiến lược mặc định cho GPU/CPU.')
    strategy = tf.distribute.get_strategy()

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1756270405.633710      10 common_lib.cc:612] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="local"
=== Source Location Trace: === 
learning/45eac/tfrc/runtime/common_lib.cc:230


Đã tìm thấy TPU Resolver.
INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.
INFO:tensorflow:Initializing the TPU system: local


I0000 00:00:1756270423.501540      10 service.cc:148] XLA service 0x5a7224f9d550 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1756270423.501616      10 service.cc:156]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1756270423.501622      10 service.cc:156]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1756270423.501625      10 service.cc:156]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1756270423.501628      10 service.cc:156]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1756270423.501630      10 service.cc:156]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1756270423.501633      10 service.cc:156]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1756270423.501636      10 service.cc:156]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1756270423.501639      10 service.cc:156]   StreamExecutor device (7): TPU, 2a886c8


INFO:tensorflow:Finished initializing TPU system.
Đã khởi tạo hệ thống TPU.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/

In [2]:
# THIẾT LẬP CẤU HÌNH 

# --- Các cấu hình cơ bản ---
SEED = 42
def set_seed(seed_value):
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    #os.environ['TF_DETERMINISTIC_OPS'] = '1'
    random.seed(seed_value)
    np.random.seed(seed_value)
    tf.random.set_seed(seed_value)
set_seed(SEED)

KAGGLE_PROCESSED_DATA_PATH = "/kaggle/input/ngt-spectrogram-id/"
KAGGLE_OUTPUT_PATH = "/kaggle/working/output_results"
os.makedirs(KAGGLE_OUTPUT_PATH, exist_ok=True)

CLASSES_TO_TRAIN = ['covid', 'asthma', 'healthy', 'tuberculosis']
ALL_CLASSES = ['healthy', 'asthma', 'covid', 'tuberculosis']
N_SPLITS = 5
TEST_SPLIT_RATIO = 0.15
USE_DATA_AUGMENTATION = False # Bật/tắt augmentation ở đây
USE_FOCAL_LOSS = True

MODEL_ID = f'ResNet50V2_CV_TPU'
EPOCHS = 50
EARLY_STOPPING_PATIENCE = 7
MIN_DELTA = 1e-4
SHUFFLE_BUFFER_SIZE = 512 

# --- ĐỊNH NGHĨA BATCH SIZE ---
# BATCH_SIZE này là batch size cho mỗi nhân TPU (per-replica)
BATCH_SIZE = 16
# Tính toán GLOBAL_BATCH_SIZE để dùng trong pipeline
# Biến 'strategy' được lấy từ ô code đầu tiên
GLOBAL_BATCH_SIZE = BATCH_SIZE * strategy.num_replicas_in_sync
print(f"Batch size mỗi nhân: {BATCH_SIZE}")
print(f"Global batch size (tổng cộng): {GLOBAL_BATCH_SIZE}")

LEARNING_RATE = 3e-5
# INPUT_SHAPE sẽ được cập nhật lại ở ô chuẩn bị dữ liệu
INPUT_SHAPE = (256, 126, 3)

Batch size mỗi nhân: 16
Global batch size (tổng cộng): 128


In [3]:
# KHỞI TẠO CÁC HÀM CẦN THIẾT (PHIÊN BẢN ĐÚNG)

def get_patient_id(filepath, class_name):
    filename = os.path.basename(filepath)
    if class_name.lower() in ['asthma', 'covid', 'healthy']:
        return filename.split('_')[0]
    elif class_name.lower() == 'tuberculosis':
        return '_'.join(filename.split('_')[:-1]).replace('.npy', '')
    else:
        return filename.split('_')[0]

def parse_tfrecord_fn(example):
    """Hàm đọc và xử lý một mẫu từ file TFRecord."""
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(example, feature_description)
    image = tf.io.parse_tensor(example['image'], out_type=tf.float32)
    image = tf.reshape(image, (256, 126))
    image = tf.stack([image, image, image], axis=-1)
    image.set_shape(INPUT_SHAPE)
    image_shape = tf.shape(image)
    image_flat = tf.reshape(image, (1, -1))
    def _scale(data):
        scaled_data = scaler.transform(data)
        return np.nan_to_num(scaled_data).astype(np.float32)
    scaled_flat = tf.numpy_function(_scale, [image_flat], tf.float32)
    image_scaled = tf.reshape(scaled_flat, image_shape)
    label_encoded = tf.cast(example['label'], tf.int32)
    label_onehot = tf.one_hot(label_encoded, depth=len(ALL_CLASSES))
    return image_scaled, label_onehot

def augment(spectrogram, label):
    spectrogram = spec_augment(spectrogram)
    return spectrogram, label

def focal_loss_from_logits_optimized(active_indices, gamma=2.0, alpha=0.25):
    """Hàm Focal Loss phiên bản tối ưu."""
    def focal_loss_fixed(y_true, y_pred):
        y_true = tf.cast(y_true, 'float32')
        y_true_filtered = tf.gather(y_true, active_indices, axis=-1)
        y_pred_filtered = tf.gather(y_pred, active_indices, axis=-1)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_true_filtered, logits=y_pred_filtered)
        probs = tf.nn.softmax(y_pred_filtered)
        pt = tf.reduce_sum(y_true_filtered * probs, axis=-1)
        focal_term = (1.0 - pt) ** gamma
        loss = alpha * focal_term * cross_entropy
        return loss
    return focal_loss_fixed

def spec_augment(spectrogram, time_masking_para=40, frequency_masking_para=30,
                 num_time_masks=1, num_freq_masks=1):
    """
    Hàm SpecAugment đã được sửa lỗi để làm việc với tensor 4D (batch, freq, time, channels).
    """
    spectrogram_aug = spectrogram
    
    # Lấy ra các chiều để làm việc từ shape 4D
    # tf.shape(spectrogram) = [batch, freq, time, channels]
    freq_bins = tf.shape(spectrogram)[1]
    time_steps = tf.shape(spectrogram)[2]

    # --- 1. Frequency Masking ---
    for _ in range(num_freq_masks):
        f = tf.random.uniform(shape=(), minval=0, maxval=frequency_masking_para, dtype=tf.int32)
        f0 = tf.random.uniform(shape=(), minval=0, maxval=freq_bins - f, dtype=tf.int32)

        # Tạo mặt nạ 1D cho chiều tần số
        freq_mask_1d = tf.concat([
            tf.ones(shape=(f0,), dtype=spectrogram.dtype),
            tf.zeros(shape=(f,), dtype=spectrogram.dtype),
            tf.ones(shape=(freq_bins - f0 - f,), dtype=spectrogram.dtype)
        ], axis=0)
        
        # Reshape mặt nạ để broadcast qua cả 4 chiều
        # Shape sẽ là (1, freq, 1, 1) để broadcast qua batch, time, và channels
        freq_mask_4d = tf.reshape(freq_mask_1d, (1, freq_bins, 1, 1))
        spectrogram_aug = spectrogram_aug * freq_mask_4d

    # --- 2. Time Masking ---
    for _ in range(num_time_masks):
        t = tf.random.uniform(shape=(), minval=0, maxval=time_masking_para, dtype=tf.int32)
        t0 = tf.random.uniform(shape=(), minval=0, maxval=time_steps - t, dtype=tf.int32)

        # Tạo mặt nạ 1D cho chiều thời gian
        time_mask_1d = tf.concat([
            tf.ones(shape=(t0,), dtype=spectrogram.dtype),
            tf.zeros(shape=(t,), dtype=spectrogram.dtype),
            tf.ones(shape=(time_steps - t0 - t,), dtype=spectrogram.dtype)
        ], axis=0)

        # Reshape mặt nạ để broadcast qua cả 4 chiều
        # Shape sẽ là (1, 1, time, 1) để broadcast qua batch, freq, và channels
        time_mask_4d = tf.reshape(time_mask_1d, (1, 1, time_steps, 1))
        spectrogram_aug = spectrogram_aug * time_mask_4d
        
    return spectrogram_aug


def create_model(input_shape, num_classes):
    # 1. Khởi tạo mô hình gốc
    base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=input_shape)
    
    # 2. Đóng băng các lớp ban đầu
    for layer in base_model.layers[:100]:
        layer.trainable = False
    base_model.trainable = True 
    
    # 3. Xây dựng phần đầu ra
    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=True) 
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='linear', kernel_regularizer=l2(0.001))(x) 
    
    return Model(inputs, outputs)

def load_data_from_df(df):
    X, y = [], []
    for _, row in df.iterrows():
        X.append(np.load(row['filepath']))
        y.append(row['label'])
    return np.array(X), np.array(y)

def get_grad_cam(model, img_array, last_conv_layer_name, pred_index=None):
    grad_model = Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(tf.cast(img_array, tf.float32))
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]
    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def overlay_grad_cam(spec, heatmap, alpha=0.6):
    heatmap_resized = tf.image.resize(heatmap[..., np.newaxis], (spec.shape[0], spec.shape[1]))
    heatmap_resized = np.uint8(255 * heatmap_resized)
    jet = plt.cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap_resized.squeeze()]
    spec_display = np.stack([spec]*3, axis=-1)
    spec_display = (spec_display - spec_display.min()) / (spec_display.max() - spec_display.min())
    superimposed_img = jet_heatmap * alpha + spec_display
    superimposed_img = np.clip(superimposed_img, 0, 1)
    return superimposed_img

class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'BAO CAO KET QUA HUAN LUYEN MO HINH AI', 0, 1, 'C')
        self.ln(10)
    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Trang {self.page_no()}', 0, 0, 'C')
    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)
    def chapter_body(self, content):
        self.set_font('Arial', '', 10)
        safe_content = content.encode('latin-1', 'replace').decode('latin-1')
        self.multi_cell(0, 5, safe_content)
        self.ln()
    def add_image_section(self, title, img_path):
        self.chapter_title(title)
        if os.path.exists(img_path):
            self.image(img_path, x=None, y=None, w=180)
            self.ln(5)
        else:
            self.chapter_body(f"Khong tim thay hinh anh: {img_path}")

def authenticate_gdrive():
    user_secrets = UserSecretsClient()
    secret_value = user_secrets.get_secret("google_service_account_key")
    with open("service_account.json", "w") as f:
        f.write(secret_value)
    scope = ["https://www.googleapis.com/auth/drive"]
    gauth = GoogleAuth()
    gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name("service_account.json", scope)
    drive = GoogleDrive(gauth)
    return drive

def upload_folder_to_drive(drive, folder_path, parent_folder_id):
    folder_name = os.path.basename(folder_path)
    print(f"Đang tạo thư mục '{folder_name}' trên Google Drive...")
    folder_metadata = {'title': folder_name, 'mimeType': 'application/vnd.google-apps.folder', 'parents': [{'id': parent_folder_id}]}
    folder = drive.CreateFile(folder_metadata)
    folder.Upload()
    
    print(f"Bắt đầu tải nội dung của '{folder_name}'...")
    for item in tqdm(os.listdir(folder_path), desc=f"Uploading {folder_name}"):
        item_path = os.path.join(folder_path, item)
        if os.path.isfile(item_path):
            gfile = drive.CreateFile({'title': item, 'parents': [{'id': folder['id']}]})
            gfile.SetContentFile(item_path)
            gfile.Upload(param={'supportsTeamDrives': True})
        elif os.path.isdir(item_path):
            upload_folder_to_drive(drive, item_path, folder['id'])

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_example(image, label):
    """Creates a tf.train.Example message ready to be written to a file."""
    feature = {
        'image': _bytes_feature(tf.io.serialize_tensor(image)),
        'label': _int64_feature(label)
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

In [4]:
# CHUẨN BỊ DỮ LIỆU VÀ TẠO TFRECORD

# --- BƯỚC 1: TẢI VÀ PHÂN CHIA DỮ LIỆU BAN ĐẦU ---
print("Bắt đầu chuẩn bị và phân chia dữ liệu...")
all_files_to_split = []
for class_name in ALL_CLASSES:
    source_dir = os.path.join(KAGGLE_PROCESSED_DATA_PATH, class_name)
    if os.path.exists(source_dir):
        files = glob.glob(os.path.join(source_dir, '*.npy'))
        for f in files:
            all_files_to_split.append({'filepath': f, 'label': class_name})

all_data_df = pd.DataFrame(all_files_to_split)
all_data_df['patient_id'] = all_data_df.apply(lambda row: get_patient_id(row['filepath'], row['label']), axis=1)

print("Tách tập Test cuối cùng (Hold-out set)...")
patient_ids = all_data_df['patient_id'].unique()
np.random.shuffle(patient_ids)
test_patient_count = int(len(patient_ids) * TEST_SPLIT_RATIO)
test_patients = patient_ids[:test_patient_count]
train_val_patients = patient_ids[test_patient_count:]

test_df = all_data_df[all_data_df['patient_id'].isin(test_patients)].reset_index(drop=True)
train_val_df = all_data_df[all_data_df['patient_id'].isin(train_val_patients)].reset_index(drop=True)

print(f"Đã tách: {len(train_val_df)} mẫu cho Train/Validation (CV) và {len(test_df)} mẫu cho Test cuối cùng.")

# --- BƯỚC 2: KHỞI TẠO LABEL ENCODER VÀ STANDARD SCALER ---
le = LabelEncoder().fit(ALL_CLASSES)
# Cập nhật INPUT_SHAPE từ một file mẫu
sample_spec = np.load(train_val_df['filepath'][0])
INPUT_SHAPE = (sample_spec.shape[0], sample_spec.shape[1], 3)
print(f"Kích thước input được cập nhật: {INPUT_SHAPE}")

print("Đang fit StandardScaler...")
scaler_fit_sample_df = train_val_df.sample(n=min(len(train_val_df), 500), random_state=SEED)
scaler_fit_data = []
for filepath in scaler_fit_sample_df['filepath']:
    spec = np.load(filepath)
    # QUAN TRỌNG: Stack 3 kênh TRƯỚC KHI flatten, để khớp với pipeline
    spec_3_channels = np.stack([spec, spec, spec], axis=-1)
    scaler_fit_data.append(spec_3_channels.flatten())
scaler = StandardScaler().fit(scaler_fit_data)
print("Fit StandardScaler hoàn tất.")

# --- BƯỚC 3: CHUYỂN ĐỔI DỮ LIỆU SANG ĐỊNH DẠNG TFRECORD ---
TFRECORD_OUTPUT_PATH = "/kaggle/working/tfrecords"
os.makedirs(TFRECORD_OUTPUT_PATH, exist_ok=True)
print(f"Bắt đầu chuyển đổi dữ liệu sang TFRecord tại: {TFRECORD_OUTPUT_PATH}")

all_dfs = {'train_val': train_val_df, 'test': test_df}

for df_name, df in all_dfs.items():
    print(f"--- Đang xử lý tập {df_name} ---")
    tfrecord_path = os.path.join(TFRECORD_OUTPUT_PATH, f"{df_name}.tfrec")
    
    with tf.io.TFRecordWriter(tfrecord_path) as writer:
        # Sử dụng tqdm tiêu chuẩn để tránh lỗi ImportError
        for _, row in tqdm(df.iterrows(), total=len(df), desc=f"Creating {df_name}.tfrec"):
            spectrogram = np.load(row['filepath']).astype(np.float32)
            label_encoded = le.transform([row['label']])[0]
            
            example = serialize_example(spectrogram, label_encoded)
            writer.write(example)
            
print("\nChuyển đổi sang TFRecord hoàn tất!")

Bắt đầu chuẩn bị và phân chia dữ liệu...
Tách tập Test cuối cùng (Hold-out set)...
Đã tách: 28054 mẫu cho Train/Validation (CV) và 5030 mẫu cho Test cuối cùng.
Kích thước input được cập nhật: (256, 126, 3)
Đang fit StandardScaler...
Fit StandardScaler hoàn tất.
Bắt đầu chuyển đổi dữ liệu sang TFRecord tại: /kaggle/working/tfrecords
--- Đang xử lý tập train_val ---


Creating train_val.tfrec: 100%|██████████| 28054/28054 [02:51<00:00, 163.55it/s]


--- Đang xử lý tập test ---


Creating test.tfrec: 100%|██████████| 5030/5030 [00:30<00:00, 162.54it/s]



Chuyển đổi sang TFRecord hoàn tất!


In [None]:
# VÒNG LẶP HUẤN LUYỆN 
#tf.debugging.enable_check_numerics()
AUTOTUNE = tf.data.AUTOTUNE
    
active_indices = [le.transform([c])[0] for c in CLASSES_TO_TRAIN]

# --- LẤY ĐƯỜNG DẪN LOCAL TỚI DỮ LIỆU TFRECORD ---
# Trỏ trực tiếp đến thư mục output đã được tạo bởi ô code trước đó
LOCAL_TFRECORD_PATH = TFRECORD_OUTPUT_PATH
TRAIN_VAL_TFREC = os.path.join(LOCAL_TFRECORD_PATH, 'train_val.tfrec')
print(f"Đang đọc dữ liệu TFRecord từ đường dẫn local: {TRAIN_VAL_TFREC}")

# Tính toán các chỉ số cần thiết MỘT LẦN DUY NHẤT
active_indices = [le.transform([c])[0] for c in CLASSES_TO_TRAIN]
skf = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
cv_data_to_split = train_val_df[train_val_df['label'].isin(CLASSES_TO_TRAIN)]
X_cv_paths = cv_data_to_split['filepath'].values
y_cv_labels = le.transform(cv_data_to_split['label'])
groups_cv = cv_data_to_split['patient_id'].values
fold_accuracies, fold_losses = [], []


# --- Bắt đầu Cross-Validation ---
for fold, (train_indices, val_indices) in enumerate(skf.split(X_cv_paths, y_cv_labels, groups_cv)):
    fold_number = fold + 1
    print("-" * 50 + f"\nBắt đầu Fold {fold_number}/{N_SPLITS}\n" + "-" * 50)
    
    # --- TẠO PIPELINE DỮ LIỆU TỪ TFRECORD ---
    train_indices_tf = tf.constant(train_indices, dtype=tf.int64)
    val_indices_tf = tf.constant(val_indices, dtype=tf.int64)

    full_ds = tf.data.TFRecordDataset(TRAIN_VAL_TFREC).enumerate()
    train_ds = full_ds.filter(lambda i, data: tf.reduce_any(i == train_indices_tf)).map(lambda i, data: data)
    val_ds = full_ds.filter(lambda i, data: tf.reduce_any(i == val_indices_tf)).map(lambda i, data: data)
    
    train_ds = train_ds.map(parse_tfrecord_fn, num_parallel_calls=AUTOTUNE)
    val_ds = val_ds.map(parse_tfrecord_fn, num_parallel_calls=AUTOTUNE)
    
    train_ds = train_ds.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    if USE_DATA_AUGMENTATION:
        train_ds = train_ds.map(augment, num_parallel_calls=AUTOTUNE)
    val_ds = val_ds.cache().batch(GLOBAL_BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

    # --- TẠO VÀ BIÊN DỊCH MODEL TRONG STRATEGY.SCOPE ---
    with strategy.scope():
        model = create_model(INPUT_SHAPE, len(ALL_CLASSES))
        optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, clipvalue=1.0)
        loss_function = focal_loss_from_logits_optimized(active_indices=active_indices) if USE_FOCAL_LOSS else tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        model.compile(optimizer=optimizer, loss=loss_function, metrics=['accuracy'])

    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-8, verbose=1)
    history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, 
                        callbacks=[EarlyStopping(monitor='val_loss', patience=EARLY_STOPPING_PATIENCE, min_delta=MIN_DELTA, restore_best_weights=True), reduce_lr],
                        verbose=1)
    
    # Phần code vẽ biểu đồ và đánh giá
    plt.figure(figsize=(15, 6))
    plt.suptitle(f'Training Metrics for Fold {fold_number}', fontsize=16)
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy vs. Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.grid(True)
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss vs. Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.grid(True)
    
    plot_filename = f'fold_{fold_number}_metrics.png'
    plot_filepath = os.path.join(KAGGLE_OUTPUT_PATH, plot_filename)
    plt.savefig(plot_filepath)
    plt.close()
    
    print(f"Đã lưu biểu đồ cho Fold {fold_number} tại: {plot_filepath}")
    
    loss, accuracy = model.evaluate(val_ds, verbose=0)
    print(f"Fold {fold_number} - Validation Loss: {loss:.4f}, Validation Accuracy: {accuracy:.4f}")
    fold_losses.append(loss)
    fold_accuracies.append(accuracy)

print("=" * 50 + "\nKết quả Cross-Validation:\n" + f"Validation Accuracy trung bình: {np.mean(fold_accuracies):.4f} +/- {np.std(fold_accuracies):.4f}\n" + f"Validation Loss trung bình: {np.mean(fold_losses):.4f} +/- {np.std(fold_losses):.4f}\n" + "=" * 50)

Đang đọc dữ liệu TFRecord từ đường dẫn local: /kaggle/working/tfrecords/train_val.tfrec
--------------------------------------------------
Bắt đầu Fold 1/5
--------------------------------------------------
Cause: could not parse the source code of <function <lambda> at 0x7a9468427d00>: found multiple definitions with identical signatures at the location. This error may be avoided by defining each lambda on a single line and with unique argument names. The matching definitions were:
Match 0:
lambda i, data: data

Match 1:
lambda i, data: tf.reduce_any(i == train_indices_tf)

Cause: could not parse the source code of <function <lambda> at 0x7a9468427d00>: found multiple definitions with identical signatures at the location. This error may be avoided by defining each lambda on a single line and with unique argument names. The matching definitions were:
Match 0:
lambda i, data: data

Match 1:
lambda i, data: tf.reduce_any(i == train_indices_tf)

Cause: could not parse the source code of <

I0000 00:00:1756270639.955740      10 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/50


I0000 00:00:1756270668.739944      10 encapsulate_tpu_computations_pass.cc:266] Subgraph fingerprint:12919805009671706515
I0000 00:00:1756270672.551973     966 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(10086872593214157428), session_name()
I0000 00:00:1756270689.405870     966 tpu_compile_op_common.cc:245] Compilation of 10086872593214157428 with session name  took 16.853834308s and succeeded
I0000 00:00:1756270689.456917     966 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(10086872593214157428), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_12919805009671706515", property.function_library_fingerprint = 8059175274092058519, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap

    175/Unknown [1m62s[0m 189ms/step - accuracy: 0.7766 - loss: 0.1153

I0000 00:00:1756270722.328629     949 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(1474413540789118273), session_name()
I0000 00:00:1756270737.200290     949 tpu_compile_op_common.cc:245] Compilation of 1474413540789118273 with session name  took 14.871617711s and succeeded
I0000 00:00:1756270737.248308     949 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(1474413540789118273), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_12919805009671706515", property.function_library_fingerprint = 8059175274092058519, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,256,126,3,;8,4,;", property.guaranteed_constants_size = 0, embedding_p

    176/Unknown [1m77s[0m 273ms/step - accuracy: 0.7765 - loss: 0.1153

I0000 00:00:1756270742.028668      10 encapsulate_tpu_computations_pass.cc:266] Subgraph fingerprint:10395212107791225878
I0000 00:00:1756270743.073092     936 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(17292937946307723510), session_name()
I0000 00:00:1756270747.394427     936 tpu_compile_op_common.cc:245] Compilation of 17292937946307723510 with session name  took 4.321277754s and succeeded
I0000 00:00:1756270747.406013     936 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(17292937946307723510), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_10395212107791225878", property.function_library_fingerprint = 12061867487176611346, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap

[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 411ms/step - accuracy: 0.7764 - loss: 0.1154 - val_accuracy: 0.4489 - val_loss: 0.5913 - learning_rate: 3.0000e-05
Epoch 2/50
[1m175/176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 194ms/step - accuracy: 0.4490 - loss: 0.2174



[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 211ms/step - accuracy: 0.4510 - loss: 0.2166 - val_accuracy: 0.3092 - val_loss: 0.2066 - learning_rate: 3.0000e-05
Epoch 3/50
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step - accuracy: 0.6558 - loss: 0.1377



[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 211ms/step - accuracy: 0.6566 - loss: 0.1374 - val_accuracy: 0.4605 - val_loss: 0.6883 - learning_rate: 3.0000e-05
Epoch 4/50
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step - accuracy: 0.6105 - loss: 0.2073



[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 212ms/step - accuracy: 0.6114 - loss: 0.2068 - val_accuracy: 0.4626 - val_loss: 0.4577 - learning_rate: 3.0000e-05
Epoch 5/50
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step - accuracy: 0.6904 - loss: 0.1306



[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 212ms/step - accuracy: 0.6911 - loss: 0.1304 - val_accuracy: 0.4558 - val_loss: 0.2634 - learning_rate: 3.0000e-05
Epoch 6/50
[1m175/176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 198ms/step - accuracy: 0.7242 - loss: 0.1315



[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 213ms/step - accuracy: 0.7254 - loss: 0.1309 - val_accuracy: 0.4682 - val_loss: 0.3151 - learning_rate: 3.0000e-05
Epoch 7/50
[1m117/176[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m11s[0m 199ms/step - accuracy: 0.6259 - loss: 0.1532

In [None]:
# HUẤN LUYỆN MÔ HÌNH CUỐI CÙNG
print("Bắt đầu huấn luyện lại mô hình cuối cùng trên toàn bộ dữ liệu Train+Validation...")
final_train_df = train_val_df[train_val_df['label'].isin(CLASSES_TO_TRAIN)]

# Tạo pipeline tf.data cho việc huấn luyện cuối cùng
final_train_paths = final_train_df['filepath'].values
final_train_labels = le.transform(final_train_df['label'])
final_train_labels_onehot = tf.keras.utils.to_categorical(final_train_labels, num_classes=len(ALL_CLASSES))

final_train_ds = tf.data.Dataset.from_tensor_slices((final_train_paths, final_train_labels_onehot))
final_train_ds = final_train_ds.map(parse_and_process, num_parallel_calls=AUTOTUNE)
final_train_ds = final_train_ds.shuffle(buffer_size=len(final_train_paths))
final_train_ds = final_train_ds.batch(BATCH_SIZE)
if USE_DATA_AUGMENTATION:
    final_train_ds = final_train_ds.map(augment, num_parallel_calls=AUTOTUNE)
final_train_ds = final_train_ds.prefetch(buffer_size=AUTOTUNE)


final_model = create_model(INPUT_SHAPE, len(ALL_CLASSES))

# Optimizer sạch, không có mixed precision
final_optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, clipnorm=1.0)

final_model.compile(optimizer=final_optimizer, 
                    loss='categorical_crossentropy' if not USE_FOCAL_LOSS else focal_loss(), 
                    metrics=['accuracy'], 
                    jit_compile=True)

run_timestamp = datetime.datetime.now(pytz.timezone('Asia/Ho_Chi_Minh')).strftime("%Y-%m-%d_%H-%M-%S")
model_checkpoint_path = os.path.join(KAGGLE_OUTPUT_PATH, f'{MODEL_ID}_final_model_{run_timestamp}.h5')
final_history = final_model.fit(final_train_ds, epochs=EPOCHS, 
                                callbacks=[EarlyStopping(monitor='loss', patience=EARLY_STOPPING_PATIENCE), 
                                           ModelCheckpoint(filepath=model_checkpoint_path, save_best_only=True, monitor='loss')], 
                                verbose=1)
print("Huấn luyện mô hình cuối cùng hoàn tất.")

In [None]:
# ĐÁNH GIÁ MÔ HÌNH VÀ VẼ CÁC SƠ ĐỒ
print("\nĐang đánh giá mô hình cuối cùng trên tập Test (Hold-out)...")
final_model.load_weights(model_checkpoint_path)
final_test_df = test_df[test_df['label'].isin(CLASSES_TO_TRAIN)]



X_test, y_test_labels = load_data_from_df(final_test_df)
y_test_encoded = le.transform(y_test_labels)
y_test_onehot = tf.keras.utils.to_categorical(y_test_encoded, num_classes=len(ALL_CLASSES))
X_test = np.stack([X_test]*3, axis=-1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)
X_test_scaled = scaler.transform(X_test_flat)
X_test = np.nan_to_num(X_test_scaled).reshape(X_test.shape)
print("Tải dữ liệu test hoàn tất!")

loss, accuracy = final_model.evaluate(X_test, y_test_onehot, verbose=0)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

y_pred_probs = final_model.predict(X_test)
y_pred_encoded = np.argmax(y_pred_probs, axis=1)

trained_class_indices = np.unique(y_test_encoded)
target_names_trained = le.inverse_transform(trained_class_indices)

report = classification_report(y_test_encoded, y_pred_encoded, target_names=target_names_trained, labels=trained_class_indices)
print("\nClassification Report:\n", report)

report_figs_path = os.path.join(KAGGLE_OUTPUT_PATH, "report_figures")
os.makedirs(report_figs_path, exist_ok=True)

plt.figure(figsize=(8, 6))
plt.plot(final_history.history['accuracy'], label='Training Accuracy')
plt.title('Biểu đồ Accuracy của mô hình cuối cùng')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
accuracy_plot_path = os.path.join(report_figs_path, f'final_accuracy_plot_{run_timestamp}.png')
plt.savefig(accuracy_plot_path)
plt.close()

plt.figure(figsize=(8, 6))
plt.plot(final_history.history['loss'], label='Training Loss')
plt.title('Biểu đồ Loss của mô hình cuối cùng')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
loss_plot_path = os.path.join(report_figs_path, f'final_loss_plot_{run_timestamp}.png')
plt.savefig(loss_plot_path)
plt.close()

cm = confusion_matrix(y_test_encoded, y_pred_encoded, labels=trained_class_indices)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=target_names_trained, yticklabels=target_names_trained)
plt.title('Ma trận nhầm lẫn trên tập Test cuối cùng')
plt.ylabel('Nhãn thật')
plt.xlabel('Nhãn dự đoán')
cm_plot_path = os.path.join(report_figs_path, f'confusion_matrix_{run_timestamp}.png')
plt.savefig(cm_plot_path)
plt.close()

In [None]:
# VẼ GRAD-CAM
last_conv_layer_name = None
for layer in reversed(final_model.layers):
    if isinstance(layer, tf.keras.layers.GlobalAveragePooling2D):
        pooling_index = final_model.layers.index(layer)
        last_conv_layer_name = final_model.layers[pooling_index - 1].name
        break
if last_conv_layer_name is None:
    raise ValueError("Không thể tự động tìm thấy lớp phù hợp cho Grad-CAM.")
print(f"Đã tự động xác định lớp Grad-CAM: {last_conv_layer_name}")

gradcam_path = os.path.join(report_figs_path, "grad_cam")
os.makedirs(gradcam_path, exist_ok=True)
print("Tạo hình ảnh Grad-CAM...")
results_list = []
for i in range(len(y_test_encoded)):
    true_label_encoded = y_test_encoded[i]
    pred_label_encoded = y_pred_encoded[i]
    confidence = y_pred_probs[i][pred_label_encoded]
    is_correct = (true_label_encoded == pred_label_encoded)
    results_list.append({'index': i, 'true_label': true_label_encoded, 'pred_label': pred_label_encoded, 'confidence': confidence, 'is_correct': is_correct})
results_df = pd.DataFrame(results_list)

for class_index, class_name in zip(trained_class_indices, target_names_trained):
    correct_samples = results_df[(results_df['is_correct'] == True) & (results_df['true_label'] == class_index)].nlargest(3, 'confidence')
    incorrect_samples = results_df[(results_df['is_correct'] == False) & (results_df['true_label'] == class_index)].nlargest(3, 'confidence')
    for _, row in correct_samples.iterrows():
        idx = int(row['index'])
        img_array, spec = X_test[idx][np.newaxis, ...], X_test[idx, :, :, 0]
        heatmap = get_grad_cam(final_model, img_array, last_conv_layer_name, pred_index=class_index)
        overlay = overlay_grad_cam(spec, heatmap)
        plt.imshow(overlay)
        plt.title(f"Đúng: {class_name}, Tin cậy: {row['confidence']:.2f}")
        plt.axis('off')
        plt.savefig(os.path.join(gradcam_path, f"correct_{class_name}_{idx}_{run_timestamp}.png"))
        plt.close()
    for _, row in incorrect_samples.iterrows():
        idx = int(row['index'])
        pred_class_name = le.inverse_transform([int(row['pred_label'])])[0]
        img_array, spec = X_test[idx][np.newaxis, ...], X_test[idx, :, :, 0]
        heatmap = get_grad_cam(final_model, img_array, last_conv_layer_name, pred_index=class_index)
        overlay = overlay_grad_cam(spec, heatmap)
        plt.imshow(overlay)
        plt.title(f"Thật: {class_name}, Sai -> {pred_class_name}, Tin cậy: {row['confidence']:.2f}")
        plt.axis('off')
        plt.savefig(os.path.join(gradcam_path, f"incorrect_{class_name}_as_{pred_class_name}_{idx}_{run_timestamp}.png"))
        plt.close()

correct_heatmaps = {label: [] for label in target_names_trained}
incorrect_heatmaps = {label: [] for label in target_names_trained}
for i, row in tqdm(results_df.iterrows(), total=len(results_df), desc="Calculating Avg Grad-CAMs"):
    idx = int(row['index'])
    true_label_index = int(row['true_label'])
    class_name = le.inverse_transform([true_label_index])[0]
    img_array = X_test[idx][np.newaxis, ...]
    heatmap = get_grad_cam(final_model, img_array, last_conv_layer_name, pred_index=true_label_index)
    if row['is_correct']:
        if class_name in correct_heatmaps: correct_heatmaps[class_name].append(heatmap)
    else:
        if class_name in incorrect_heatmaps: incorrect_heatmaps[class_name].append(heatmap)

for class_name in target_names_trained:
    if correct_heatmaps.get(class_name):
        avg_heatmap_correct = np.mean(correct_heatmaps[class_name], axis=0)
        overlay = overlay_grad_cam(np.zeros(INPUT_SHAPE[:2]), avg_heatmap_correct)
        plt.imshow(overlay)
        plt.title(f"Grad-CAM TB - Đúng cho lớp {class_name}")
        plt.axis('off')
        plt.savefig(os.path.join(gradcam_path, f"avg_correct_{class_name}_{run_timestamp}.png"))
        plt.close()
    if incorrect_heatmaps.get(class_name):
        avg_heatmap_incorrect = np.mean(incorrect_heatmaps[class_name], axis=0)
        overlay = overlay_grad_cam(np.zeros(INPUT_SHAPE[:2]), avg_heatmap_incorrect)
        plt.imshow(overlay)
        plt.title(f"Grad-CAM TB - Sai cho lớp {class_name}")
        plt.axis('off')
        plt.savefig(os.path.join(gradcam_path, f"avg_incorrect_{class_name}_{run_timestamp}.png"))
        plt.close()

In [None]:
# TẠO BÁO CÁO PDF
print("Tạo báo cáo PDF...")
pdf = PDFReport()
pdf.add_page()
pdf.chapter_title("1. Tom tat cau hinh va Ket qua")
config_summary = f"""
- Model ID: {MODEL_ID}
- Thoi gian chay: {datetime.datetime.now(pytz.timezone('Asia/Ho_Chi_Minh')).strftime("%Y-%m-%d %H:%M:%S")}
- Cac lop huan luyen: {', '.join(CLASSES_TO_TRAIN)}
- K-Fold Cross-Validation: {N_SPLITS} folds

--- KET QUA CROSS-VALIDATION ---
- Validation Accuracy trung binh: {np.mean(fold_accuracies):.4f} +/- {np.std(fold_accuracies):.4f}
- Validation Loss trung binh: {np.mean(fold_losses):.4f} +/- {np.std(fold_losses):.4f}

--- KET QUA TREN TAP TEST CUOI CUNG ---
- Test Loss: {loss:.4f}
- Test Accuracy: {accuracy:.4f}

--- CAU HINH CHI TIET ---
- SEED: {SEED}
- Epochs: {EPOCHS} (Patience: {EARLY_STOPPING_PATIENCE})
- Batch Size: {BATCH_SIZE}
- Learning Rate: {LEARNING_RATE}
- Ham Loss: {'Focal Loss' if USE_FOCAL_LOSS else 'Categorical Crossentropy'}
- Tang cuong du lieu: {'Co (SpecAugment)' if USE_DATA_AUGMENTATION else 'Khong'}
- Kich thuoc Input: {INPUT_SHAPE}
"""
pdf.chapter_body(config_summary)
pdf.add_image_section("2. Bieu do Huan luyen cua Mo hinh Cuoi cung", accuracy_plot_path)
pdf.add_image_section("", loss_plot_path)
pdf.chapter_title("3. Danh gia chi tiet tren tap Test")
pdf.chapter_body("Bao cao phan loai chi tiet:")
pdf.set_font('Courier', '', 8)
pdf.chapter_body(report)
pdf.add_image_section("Ma tran nham lan:", cm_plot_path)

pdf.add_page()
pdf.chapter_title("4. Phan tich Grad-CAM")
for class_name in target_names_trained:
    pdf.chapter_body(f"Lop: {class_name}")
    correct_imgs = sorted(glob.glob(os.path.join(gradcam_path, f"correct_{class_name}_*_{run_timestamp}.png")))
    incorrect_imgs = sorted(glob.glob(os.path.join(gradcam_path, f"incorrect_{class_name}_*_{run_timestamp}.png")))
    
    x_pos, y_pos = pdf.get_x(), pdf.get_y()
    for i, img_path in enumerate(correct_imgs[:3]):
        if os.path.exists(img_path): pdf.image(img_path, x=x_pos + i * 60, y=y_pos, w=55)
    if correct_imgs: y_pos += 45
    for i, img_path in enumerate(incorrect_imgs[:3]):
        if os.path.exists(img_path): pdf.image(img_path, x=x_pos + i * 60, y=y_pos, w=55)
    if incorrect_imgs: y_pos += 45
    pdf.set_y(y_pos)
    
    avg_correct_path = os.path.join(gradcam_path, f"avg_correct_{class_name}_{run_timestamp}.png")
    avg_incorrect_path = os.path.join(gradcam_path, f"avg_incorrect_{class_name}_{run_timestamp}.png")
    if os.path.exists(avg_correct_path):
        pdf.image(avg_correct_path, w=80)
    if os.path.exists(avg_incorrect_path):
        pdf.image(avg_incorrect_path, w=80)
    pdf.ln(10)

report_filename = f"report_{MODEL_ID}_{run_timestamp}.pdf"
report_filepath = os.path.join(KAGGLE_OUTPUT_PATH, report_filename)
pdf.output(report_filepath)
print(f"Đã tạo báo cáo PDF tại: {report_filepath}")

print("\nBắt đầu quá trình tải kết quả lên Google Drive...")
drive = authenticate_gdrive()
upload_folder_to_drive(drive, KAGGLE_OUTPUT_PATH, DRIVE_RESULTS_FOLDER_ID)
print("Hoàn tất! Toàn bộ kết quả đã được lưu về Google Drive.")