In [1]:
from autogluon.multimodal import MultiModalPredictor
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split

In [3]:
# 설정
train_dir = 'open/train'
test_csv_path = 'open/test.csv'
output_dir = 'autogluon_output_best'

# 1. 학습 데이터 구성
all_img_paths = list(Path(train_dir).rglob("*/*.jpg"))
df = pd.DataFrame({'img_path': [str(p) for p in all_img_paths]})
df['label'] = df['img_path'].apply(lambda x: Path(x).parent.name)
df = df.rename(columns={'img_path': 'image'})  # 컬럼명 변경

# 2. train/val 분리 (train 데이터 5만개, val 데이터 1만5천개로 설정)
train_df, val_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=41)
train_df_small = train_df.sample(n=50000, random_state=41)  # 5만개 데이터 샘플링
val_df_small = val_df.sample(n=15000, random_state=41)  # 1만5천개 데이터 샘플링

# 3. predictor 설정 및 학습 (하이퍼파라미터 튜닝 끄기)
predictor = MultiModalPredictor(
    label='label',
    problem_type='classification',
    path=output_dir
)


In [5]:
predictor.fit(
    train_data=train_df_small,
    tuning_data=val_df_small,
    time_limit=10800,  # 최대 3시간
    hyperparameter_tune_kwargs=None,  # 하이퍼파라미터 튜닝 끄기
    presets='high_quality',  # 성능과 속도 균형 (빠르고 좋음)
)

# 4. 테스트 데이터 준비
test_df = pd.read_csv(test_csv_path)
test_df['image'] = test_df['img_path']

# 5. 예측
preds = predictor.predict(test_df)

AutoGluon Version:  1.3.0
Python Version:     3.11.1
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          6
Pytorch Version:    2.6.0+cpu
CUDA Version:       CUDA is not available
Memory Avail:       18.58 GB / 23.91 GB (77.7%)
Disk Space Avail:   61.14 GB / 222.28 GB (27.5%)

AutoMM starts to create your model. ✨✨✨

To track the learning progress, you can open a terminal and launch Tensorboard:
    ```shell
    # Assume you have installed tensorboard
    tensorboard --logdir C:\Users\FOR\Deep Learning\autogluon_output_best
    ```

Seed set to 0


model.safetensors:   0%|          | 0.00/395M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
GPU Count: 0
GPU Count to be Used: 0

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name              | Type                            | Params | Mode 
------------------------------------------------------------------------------
0 | model             | TimmAutoModelForImagePrediction | 95.7 M | train
1 | validation_metric | MulticlassAccuracy              | 0      | train
2 | loss_func         | CrossEntropyLoss                | 0      | train
------------------------------------------------------------------------------
95.7 M    Trainable params
0         Non-trainable params
95.7 M    Total params
382.808   Total estimated model params size (MB)
863      

Sanity Checking: |                                                                               | 0/? [00:00<…

Training: |                                                                                      | 0/? [00:00<…

Time limit reached. Elapsed time is 3:00:01. Signaling Trainer to stop.


Validation: |                                                                                    | 0/? [00:00<…

Epoch 0, global step 128: 'val_accuracy' reached 0.74240 (best 0.74240), saving model to 'C:\\Users\\FOR\\Deep Learning\\autogluon_output_best\\epoch=0-step=128.ckpt' as top 3
AutoMM has created your model. 🎉🎉🎉

To load the model, use the code below:
    ```python
    from autogluon.multimodal import MultiModalPredictor
    predictor = MultiModalPredictor.load("C:\Users\FOR\Deep Learning\autogluon_output_best")
    ```

If you are not satisfied with the model, try to increase the training time, 
adjust the hyperparameters (https://auto.gluon.ai/stable/tutorials/multimodal/advanced_topics/customization.html),
or post issues on GitHub (https://github.com/autogluon/autogluon/issues).


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.


Predicting: |                                                                                    | 0/? [00:00<…

In [7]:
# 6. 제출 파일 생성
submission = pd.read_csv('open/sample_submission.csv')
submission['rock_type'] = preds
submission.to_csv('submission.csv', index=False)


# 

#### Macro f1 측정

In [None]:
from sklearn.metrics import f1_score

# 1. validation 데이터에 대한 예측 수행
val_preds = predictor.predict(val_df)

# 2. 실제 라벨
true_labels = val_df['label']

# 3. macro f1 계산
macro_f1 = f1_score(true_labels, val_preds, average='macro')
print(f"Macro F1 Score on Validation Set: {macro_f1:.4f}")
