# AutoGluon Train

## Imports

In [None]:
import os
import pandas as pd
from autogluon.multimodal import MultiModalPredictor


## Global Variables

In [None]:
# Set global variables.

# Path to the training data. (path that holds `train.csv` and a folder for training images.)
DATA_DIR = os.path.abspath(os.path.join(os.getcwd(), os.pardir, "data"))

# Location of the training images. (folder that holds the training images.)
TRAIN_IMAGE_DIR = os.path.join(DATA_DIR, "train")

# Location of the training data. (path to `train.csv`.)
TRAIN_DATA_PATH = os.path.join(DATA_DIR, "train.csv")

# File extension of the training images. (used to append to the image names in `train.csv`.)
TRAIN_IMG_EXT = ".jpeg"

# Path to save the trained models.
TRAIN_SAVE_PATH = os.path.join(DATA_DIR, "models")


## Load & Create Image Dataset

In [None]:
# Load training data.
df_train = pd.read_csv(TRAIN_DATA_PATH)
df_train.head()


In [None]:
# Set the image path column.
df_train["image"] = df_train["image_id"].apply(
    lambda x: os.path.join(TRAIN_IMAGE_DIR, x + TRAIN_IMG_EXT)
)
df_train.head()


In [None]:
# Drop image_id and patient_id columns as they are not needed for features.
df_train_final = df_train.drop(columns=["image_id", "patient_id", "center_id", "image_num"]).copy(deep=True)
df_train_final.head()


## MultiModalPredictor for Training

- Inspiration: https://github.com/awslabs/autogluon/tree/master/examples/automm/kaggle_pawpularity

### Build the Predictor

In [None]:
predictor = MultiModalPredictor(
    label="label",  # Target value to predict.    
    problem_type="Binary",  # Type of problem (either "Multiclass", "Binary", or "Regression").    
    path=TRAIN_SAVE_PATH,  # Path to save the trained model.
    verbosity=4,  # Verbosity levels range from 0 to 4 and control how much information is printed.
)


### Train the Predictor

In [None]:
predictor.fit(
    train_data=df_train_final,
    presets="best_quality",    
    save_path=TRAIN_SAVE_PATH,
    hyperparameters={
        "model.names": "['timm_image']",
        "model.timm_image.checkpoint_name": "swin_large_patch4_window7_224",     
        "optimization.learning_rate": "2e-5",
        "optimization.optim_type": "adamw",
        "optimization.max_epochs": 20,
        "env.precision": "32",
        "data.pos_label": "LAA",
        "data.mixup.turn_on": True,
        "data.mixup.cutmix_alpha": 0.8,
        "data.mixup.prob": 0.5,
        "data.mixup.switch_prob": 0.7,
        "data.mixup.turn_off_epoch": 7,       
        },        
    
    column_types={
        # TODO: Use these columns to evaluate any potential improvements.
        # "center_id": "categorical", 
        # "image_num": "numerical",
        "image": "image_path",
    },
    # time_limit= 30, # Time limit in seconds.    
    seed=123,
)



- `train_data` is the data used for training.
- `tuning_data` is the data for validation. If it is empty, the tuning data will be split from training data automatically.
- `save_path` indicates the specific path for model saving in a fit process.
- `hyperparameters` is a Dict which will override the default configs in the training. The configs contain five different types.
    - `model` contains the parameters which control the models used in the predictor. You can select the model you need and adjust the details. Default is selecting the models determined by the dataset automatically
    - `data` contains the configs of transforms for different types of data.
    - `env` contains the configs of the training environment. 
    - `optimization` contains the configs in the optimization process, including but not limited to max training epochs, learning rate and warm-up.
- `seed` determines the random seed.

[Source](https://github.com/awslabs/autogluon/tree/master/examples/automm/kaggle_pawpularity) 


### Save Model

> In MultiModalPredictor, some pre-trained models will be downloaded during training. These models also need to be saved for use in predicting after submission. You can specify the predictor to save a “standalone” model that can be loaded without internet access.

[Source](https://github.com/awslabs/autogluon/tree/master/examples/automm/kaggle_pawpularity#13-save-standalone-model)

In [None]:
predictor.save(path=TRAIN_SAVE_PATH, standalone=True)
