## Install AutoGluon

In [1]:
!pip3 install autogluon



# Setup

In [2]:
%matplotlib inline
import autogluon.core as ag
from autogluon.vision import ImageDataset, ImagePredictor
import pandas as pd


# Download Dataset

In [3]:
pets = ag.utils.download('https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip')
pets = ag.utils.unzip(pets)


# Create Dataset

In [4]:
train_data, _, test_data = ImageDataset.from_folders('cats_and_dogs_filtered', train='train', test='validation')
print('train #', len(train_data), 'test #', len(test_data))
train_data.head()


train # 2000 test # 1000


Unnamed: 0,image,label
0,/content/cats_and_dogs_filtered/train/cats/cat...,0
1,/content/cats_and_dogs_filtered/train/cats/cat...,0
2,/content/cats_and_dogs_filtered/train/cats/cat...,0
3,/content/cats_and_dogs_filtered/train/cats/cat...,0
4,/content/cats_and_dogs_filtered/train/cats/cat...,0


# Check Image

In [5]:
train_data.show_images()


Output hidden; open in https://colab.research.google.com to view.

# Training

In [6]:
predictor = ImagePredictor()
# since the original dataset does not provide validation split, the `fit` function splits it randomly with 90/10 ratio
predictor.fit(train_data, hyperparameters={'epochs': 2})  # you can trust the default config, we reduce the # epoch to save some build time

fit_result = predictor.fit_summary()
print('Top-1 train acc: %.3f, val acc: %.3f' %(fit_result['train_acc'], fit_result['valid_acc']))


`time_limit=auto` set to `time_limit=7200`.
Reset labels to [0, 1]
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Randomly split train_data into train[1800]/validation[200] splits.
The number of requested GPUs is greater than the number of available GPUs.Reduce the number to 1
Starting fit without HPO
modified configs(<old> != <new>): {
root.train.batch_size 32 != 16
root.train.early_stop_patience -1 != 10
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_max_value 1.0 != inf
root.train.epochs    200 != 2
root.img_cls.model   resnet101 != resnet50
root.misc.seed       42 != 666
root.misc.num_workers 4 != 2
}
Saved config to /content/82d541ce/.trial_0/config.yaml
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1_0-14fe96d1.pth" to /root/.cache/torch/hub/checkpoints/resnet50_a1_0-14fe96d1.pth
Model resnet50 created, param count:                                         23512130
AMP not enabled. Train

Top-1 train acc: 0.862, val acc: 0.965


# Predict

In [7]:
bulk_result = predictor.predict_proba(test_data)
print(bulk_result)

            0         1
0    0.833163  0.166837
1    0.798232  0.201768
2    0.635602  0.364397
3    0.815183  0.184817
4    0.901609  0.098391
..        ...       ...
995  0.247491  0.752509
996  0.099352  0.900648
997  0.252392  0.747608
998  0.067125  0.932875
999  0.168133  0.831867

[1000 rows x 2 columns]


# Save Model

In [8]:
filename = 'predictor.ag'
predictor.save(filename)


# Load Model

In [9]:
new_predictor = ImagePredictor.load('predictor.ag')

bulk_result = new_predictor.predict_proba(test_data)
print(bulk_result)

            0         1
0    0.833163  0.166837
1    0.798232  0.201768
2    0.635602  0.364397
3    0.815183  0.184817
4    0.901609  0.098391
..        ...       ...
995  0.247491  0.752509
996  0.099352  0.900648
997  0.252392  0.747608
998  0.067125  0.932875
999  0.168133  0.831867

[1000 rows x 2 columns]
