## Autogluon 5번째 시도

In [2]:
from autogluon.multimodal import MultiModalPredictor
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
import torch
import os

# 설정
train_dir = 'open/train'
test_csv_path = 'open/test.csv'
output_dir = 'autogluon_output_best5'

resume = True  # True로 두면 중단 후 이어서 학습

In [3]:
# 1. 학습 데이터 구성
all_img_paths = list(Path(train_dir).rglob("*/*.jpg"))
df = pd.DataFrame({'img_path': [str(p) for p in all_img_paths]})
df['label'] = df['img_path'].apply(lambda x: Path(x).parent.name)
df = df.rename(columns={'img_path': 'image'})  # 컬럼명 변경

# 2. 각 폴더에서 7천 장의 데이터를 랜덤 추출
df_balanced = pd.DataFrame()
for label in df['label'].unique():
    label_df = df[df['label'] == label]
    label_df_sampled = label_df.sample(n=7000, random_state=41)
    df_balanced = pd.concat([df_balanced, label_df_sampled], axis=0)

# 3. train/val 분리
train_df, val_df = train_test_split(df_balanced, test_size=0.3, stratify=df_balanced['label'], random_state=41)
train_df_small = train_df.sample(n=min(49000, len(train_df)), random_state=41)
val_df_small = val_df.sample(n=min(15000, len(val_df)), random_state=41)

# # GPU 설정
# num_gpus = 1 if torch.cuda.is_available() else 0


In [4]:
# 4. 기존 predictor 불러오기 또는 새로 생성

if resume and os.path.exists(os.path.join(output_dir, 'predictor.pkl')):
    print("🔁 이전 학습 결과를 불러옵니다.")
    predictor = MultiModalPredictor.load(output_dir)
else:
    print("🆕 새로운 predictor를 생성합니다.")
    predictor = MultiModalPredictor(
        label='label',
        problem_type='classification',
        eval_metric='accuracy',
        path=output_dir
    )


🆕 새로운 predictor를 생성합니다.


In [5]:
hyperparameters = None  # 또는 빈 dict {} 로 초기화

# 5. 학습 시작 (남은 에포크만큼)
predictor.fit(
    train_data=train_df_small,
    tuning_data=val_df_small,
    presets="high_quality",
    time_limit=30600,
    column_types={"image": "image"}
)

# 6. 모델 저장
predictor.save('autogluon_output_best5')  

AutoGluon Version:  1.3.0
Python Version:     3.11.1
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          6
Pytorch Version:    2.6.0+cu126
CUDA Version:       12.6
Memory Avail:       18.21 GB / 23.91 GB (76.2%)
Disk Space Avail:   33.76 GB / 222.28 GB (15.2%)
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	7 unique label values:  ['Andesite', 'Basalt', 'Mud_Sandstone', 'Etc', 'Gneiss', 'Weathered_Rock', 'Granite']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quantile'])

AutoMM starts to create your model. ✨✨✨

To track the learning progress, you can open a terminal and launch Tensorboard:
    ```shell
    # Assume you have installed tensorboard
    tensorboard --logdir C:\Users\FOR\Deep Learning\autogluon_output_best5
    

Sanity Checking: |                                                                               | 0/? [00:00<…

Training: |                                                                                      | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Epoch 0, global step 134: 'val_accuracy' reached 0.14286 (best 0.14286), saving model to 'C:\\Users\\FOR\\Deep Learning\\autogluon_output_best5\\epoch=0-step=134.ckpt' as top 3


Validation: |                                                                                    | 0/? [00:00<…

Epoch 0, global step 268: 'val_accuracy' reached 0.14286 (best 0.14286), saving model to 'C:\\Users\\FOR\\Deep Learning\\autogluon_output_best5\\epoch=0-step=268.ckpt' as top 3


Validation: |                                                                                    | 0/? [00:00<…

Epoch 1, global step 402: 'val_accuracy' reached 0.14286 (best 0.14286), saving model to 'C:\\Users\\FOR\\Deep Learning\\autogluon_output_best5\\epoch=1-step=402.ckpt' as top 3


Validation: |                                                                                    | 0/? [00:00<…

Epoch 1, global step 536: 'val_accuracy' was not in top 3

Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
# 7. 테스트 데이터 준비
test_df = pd.read_csv(test_csv_path)
test_df['image'] = test_df['img_path'].apply(lambda x: os.path.join('open', x.strip('./')))

# 8. 예측
preds = predictor.predict(test_df)

In [None]:
# 9. 제출 파일 생성
submission = pd.read_csv('open/sample_submission.csv')
submission['rock_type'] = preds
submission.to_csv('submission5.csv', index=False)

# 