In [None]:
from fastai.vision.all import *

In [None]:
labels = pd.read_csv("../input/dog-breed-identification/labels.csv")
labels

In [None]:
labels["breed"].value_counts().plot(kind="hist");

# Data Preparation

split data set into training and validation set in a stratified manner

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_ids, valid_ids = next(split.split(labels, labels["breed"]))
labels["is_valid"] = [i in valid_ids for i in range(len(labels))]

labels["id"] = labels["id"].apply(lambda x: x + ".jpg")

preparing dataloaders

In [None]:
path = "../input/dog-breed-identification/train"

dls = ImageDataLoaders.from_df(labels, path,
                               item_tfms=Resize(460, method="squeeze"),
                               batch_tfms=[*aug_transforms(size=300, mult=1.5),
                                           Contrast(),
                                           Saturation(),
                                           Normalize.from_stats(*imagenet_stats)],
                               bs=64, valid_col="is_valid")

In [None]:
dls.show_batch()

# Training our model

compute weights for each class to deal with classes imabalance

In [None]:
label_count = labels["breed"].value_counts()
n_samples = labels.shape[0]
n_classes = len(dls.vocab)
weights = [n_samples / (n_classes * label_count[label]) for label in dls.vocab]
weights = tensor(weights, device="cuda")

train the model

In [None]:
learn = cnn_learner(dls, resnet152, loss_func=nn.CrossEntropyLoss(),
                    metrics=[accuracy, F.cross_entropy], path=".").to_fp16()

freeze the convnet part

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(3, 1e-3)

In [None]:
learn.save("freezed")
#learn = learn.load("freezed").to_fp16()

In [None]:
learn.unfreeze()
learn.lr_find()

In [None]:
learn.fit_one_cycle(5, slice(1e-6, 1e-4))

# Get predictions of test data

In [None]:
test_files = get_image_files("../input/dog-breed-identification/test")
test_dl = dls.test_dl(test_files, bs=8)

In [None]:
preds, targs = learn.tta(dl=test_dl)

In [None]:
preds = torch.softmax(preds, dim=1)
sub = pd.DataFrame({"id":test_files.map(lambda x:x.stem)})
sub[list(dls.vocab)] = preds
sub.to_csv("submission.csv", index=False)