In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.multimodal import MultiModalPredictor
import os
from sklearn.metrics import accuracy_score, log_loss, f1_score, roc_auc_score

print("Bắt đầu quá trình phân tích và huấn luyện...")

# --- Bước 2: Đọc và xử lý dữ liệu ---

base_path = '/kaggle/input/dog-breed-identification/'
train_path = os.path.join(base_path, 'train')
labels_file = os.path.join(base_path, 'labels.csv')

# Đọc file labels.csv
print(f"Đọc dữ liệu từ: {labels_file}")
df_labels = pd.read_csv(labels_file)

df_labels['image'] = df_labels['id'].apply(lambda x: os.path.join(train_path, f'{x}.jpg'))

print("\n5 dòng dữ liệu đầu tiên sau khi xử lý:")
print(df_labels.head())

df = df_labels[['image', 'breed']].copy()


# --- Bước 3: Phân chia dữ liệu (80% train, 20% test/validation) ---

print("\nChia dữ liệu thành 80% huấn luyện và 20% kiểm tra...")
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['breed'],
    random_state=42
)

print(f"Số lượng mẫu trong tập huấn luyện: {len(train_df)}")
print(f"Số lượng mẫu trong tập kiểm tra: {len(test_df)}")


# --- Bước 4: Huấn luyện với AutoGluon ---

predictor = MultiModalPredictor(
    label='breed',
    problem_type='classification',
    path='autogluon_dog_breed',
)

print("\nBắt đầu huấn luyện mô hình với AutoGluon...")
predictor.fit(
    train_data=train_df,
    time_limit=1800,
    presets='medium_quality',
    hyperparameters={'env': {'num_gpus': 1}}
)

# --- Bước 5: Đánh giá chi tiết trên tập kiểm tra (20%) ---

print("\nHoàn tất huấn luyện. Bắt đầu đánh giá chi tiết trên tập kiểm tra...")

y_true = test_df['breed']

print("Thực hiện dự đoán trên tập kiểm tra...")
y_pred = predictor.predict(test_df, as_pandas=True)
y_pred_proba = predictor.predict_proba(test_df, as_pandas=True)

# **Lấy danh sách các lớp theo đúng thứ tự mà AutoGluon sử dụng**
# **Điều này quan trọng để tính log_loss và roc_auc**
ordered_labels = predictor.class_labels

# **Tính toán các chỉ số yêu cầu**
print("Tính toán các chỉ số đánh giá...")
accuracy = accuracy_score(y_true, y_pred)
loss = log_loss(y_true, y_pred_proba, labels=ordered_labels)
f1_macro = f1_score(y_true, y_pred, average='macro')
f1_weighted = f1_score(y_true, y_pred, average='weighted')
roc_auc = roc_auc_score(y_true, y_pred_proba, multi_class='ovr', labels=ordered_labels)


print("\n--- BÁO CÁO KẾT QUẢ CHI TIẾT ---")
print("Các chỉ số đánh giá trên tập kiểm tra (20% dữ liệu):")
print(f"  - Accuracy:    {accuracy:.4f}")
print(f"  - Log Loss:    {loss:.4f}")
print(f"  - F1 (Macro):  {f1_macro:.4f}")
print(f"  - F1 (Weighted): {f1_weighted:.4f}")
print(f"  - ROC AUC (OvR): {roc_auc:.4f}")

print("\n--- QUÁ TRÌNH HOÀN TẤT ---")
print(f"Mô hình tốt nhất được lưu tại thư mục: {predictor.path}")

AutoGluon Version:  1.3.1
Python Version:     3.11.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Nov 10 10:07:59 UTC 2024
CPU Count:          4
Pytorch Version:    2.6.0+cu124
CUDA Version:       12.4
Memory Avail:       29.24 GB / 31.35 GB (93.3%)
Disk Space Avail:   19.50 GB / 19.52 GB (99.9%)
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	First 10 (of 120) unique label values:  ['gordon_setter', 'vizsla', 'australian_terrier', 'norwich_terrier', 'bernese_mountain_dog', 'chow', 'english_foxhound', 'bloodhound', 'sussex_spaniel', 'ibizan_hound']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quantile'])


Bắt đầu quá trình phân tích và huấn luyện...
Đọc dữ liệu từ: /kaggle/input/dog-breed-identification/labels.csv

5 dòng dữ liệu đầu tiên sau khi xử lý:
                                 id             breed  \
0  000bec180eb18c7604dcecc8fe0dba07       boston_bull   
1  001513dfcb2ffafc82cccf4d8bbaba97             dingo   
2  001cdf01b096e06d78e9e5112d419397          pekinese   
3  00214f311d5d2247d5dfe4fe24b2303d          bluetick   
4  0021f9ceb3235effd7fcde7f7538ed62  golden_retriever   

                                               image  
0  /kaggle/input/dog-breed-identification/train/0...  
1  /kaggle/input/dog-breed-identification/train/0...  
2  /kaggle/input/dog-breed-identification/train/0...  
3  /kaggle/input/dog-breed-identification/train/0...  
4  /kaggle/input/dog-breed-identification/train/0...  

Chia dữ liệu thành 80% huấn luyện và 20% kiểm tra...
Số lượng mẫu trong tập huấn luyện: 8177
Số lượng mẫu trong tập kiểm tra: 2045

Bắt đầu huấn luyện mô hình với AutoGluon...


AutoMM starts to create your model. ✨✨✨

To track the learning progress, you can open a terminal and launch Tensorboard:
    ```shell
    # Assume you have installed tensorboard
    tensorboard --logdir /kaggle/working/autogluon_dog_breed
    ```

INFO: Seed set to 0


model.safetensors:   0%|          | 0.00/22.1M [00:00<?, ?B/s]

GPU Count: 1
GPU Count to be Used: 1

INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name              | Type                            | Params | Mode 
------------------------------------------------------------------------------
0 | model             | TimmAutoModelForImagePrediction | 4.4 M  | train
1 | validation_metric | MulticlassAccuracy              | 0      | train
2 | loss_func         | CrossEntropyLoss                | 0      | train
------------------------------------------------------------------------------
4.4 M     Trainable params
0         Non-trainable params
4.4 M     Total params
17.423    Total estimated model params size (MB)
286       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 28: 'val_accuracy' reached 0.03178 (best 0.03178), saving model to '/kaggle/working/autogluon_dog_breed/epoch=0-step=28.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 58: 'val_accuracy' reached 0.20905 (best 0.20905), saving model to '/kaggle/working/autogluon_dog_breed/epoch=0-step=58.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 1, global step 86: 'val_accuracy' reached 0.44377 (best 0.44377), saving model to '/kaggle/working/autogluon_dog_breed/epoch=1-step=86.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 1, global step 116: 'val_accuracy' reached 0.61736 (best 0.61736), saving model to '/kaggle/working/autogluon_dog_breed/epoch=1-step=116.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 2, global step 144: 'val_accuracy' reached 0.67359 (best 0.67359), saving model to '/kaggle/working/autogluon_dog_breed/epoch=2-step=144.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 2, global step 174: 'val_accuracy' reached 0.71149 (best 0.71149), saving model to '/kaggle/working/autogluon_dog_breed/epoch=2-step=174.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 3, global step 202: 'val_accuracy' reached 0.72738 (best 0.72738), saving model to '/kaggle/working/autogluon_dog_breed/epoch=3-step=202.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 3, global step 232: 'val_accuracy' reached 0.73594 (best 0.73594), saving model to '/kaggle/working/autogluon_dog_breed/epoch=3-step=232.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 4, global step 260: 'val_accuracy' reached 0.74817 (best 0.74817), saving model to '/kaggle/working/autogluon_dog_breed/epoch=4-step=260.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 4, global step 290: 'val_accuracy' reached 0.75917 (best 0.75917), saving model to '/kaggle/working/autogluon_dog_breed/epoch=4-step=290.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 318: 'val_accuracy' reached 0.76284 (best 0.76284), saving model to '/kaggle/working/autogluon_dog_breed/epoch=5-step=318.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 348: 'val_accuracy' reached 0.75428 (best 0.76284), saving model to '/kaggle/working/autogluon_dog_breed/epoch=5-step=348.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 376: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 406: 'val_accuracy' reached 0.76039 (best 0.76284), saving model to '/kaggle/working/autogluon_dog_breed/epoch=6-step=406.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 7, global step 434: 'val_accuracy' reached 0.77262 (best 0.77262), saving model to '/kaggle/working/autogluon_dog_breed/epoch=7-step=434.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 7, global step 464: 'val_accuracy' reached 0.78117 (best 0.78117), saving model to '/kaggle/working/autogluon_dog_breed/epoch=7-step=464.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 492: 'val_accuracy' reached 0.77628 (best 0.78117), saving model to '/kaggle/working/autogluon_dog_breed/epoch=8-step=492.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 522: 'val_accuracy' reached 0.77995 (best 0.78117), saving model to '/kaggle/working/autogluon_dog_breed/epoch=8-step=522.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 550: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 580: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 10, global step 608: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 10, global step 638: 'val_accuracy' reached 0.78117 (best 0.78117), saving model to '/kaggle/working/autogluon_dog_breed/epoch=10-step=638.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 11, global step 666: 'val_accuracy' reached 0.78117 (best 0.78117), saving model to '/kaggle/working/autogluon_dog_breed/epoch=11-step=666.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 11, global step 696: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 12, global step 724: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 12, global step 754: 'val_accuracy' reached 0.79218 (best 0.79218), saving model to '/kaggle/working/autogluon_dog_breed/epoch=12-step=754.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 13, global step 782: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 13, global step 812: 'val_accuracy' reached 0.78484 (best 0.79218), saving model to '/kaggle/working/autogluon_dog_breed/epoch=13-step=812.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 14, global step 840: 'val_accuracy' reached 0.78362 (best 0.79218), saving model to '/kaggle/working/autogluon_dog_breed/epoch=14-step=840.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 14, global step 870: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 15, global step 898: 'val_accuracy' reached 0.78606 (best 0.79218), saving model to '/kaggle/working/autogluon_dog_breed/epoch=15-step=898.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 15, global step 928: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 16, global step 956: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 16, global step 986: 'val_accuracy' was not in top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 17, global step 1014: 'val_accuracy' reached 0.79095 (best 0.79218), saving model to '/kaggle/working/autogluon_dog_breed/epoch=17-step=1014.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 17, global step 1044: 'val_accuracy' was not in top 3
Start to fuse 3 checkpoints via the greedy soup algorithm.
INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

AutoMM has created your model. 🎉🎉🎉

To load the model, use the code below:
    ```python
    from autogluon.multimodal import MultiModalPredictor
    predictor = MultiModalPredictor.load("/kaggle/working/autogluon_dog_breed")
    ```

If you are not satisfied with the model, try to increase the training time, 
adjust the hyperparameters (https://auto.gluon.ai/stable/tutorials/multimodal/advanced_topics/customization.html),
or post issues on GitHub (https://github.com/autogluon/autogluon/issues).





Hoàn tất huấn luyện. Bắt đầu đánh giá chi tiết trên tập kiểm tra...
Thực hiện dự đoán trên tập kiểm tra...


INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

Tính toán các chỉ số đánh giá...

--- BÁO CÁO KẾT QUẢ CHI TIẾT ---
Các chỉ số đánh giá trên tập kiểm tra (20% dữ liệu):
  - Accuracy:    0.7609
  - Log Loss:    0.8899
  - F1 (Macro):  0.7534
  - F1 (Weighted): 0.7588
  - ROC AUC (OvR): 0.9958

--- QUÁ TRÌNH HOÀN TẤT ---
Mô hình tốt nhất được lưu tại thư mục: /kaggle/working/autogluon_dog_breed
