<a href="https://colab.research.google.com/github/rmartimarly/teaching_misc/blob/main/AML_fastai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip3 install fastai --upgrade

from fastai.vision.all import *
from sklearn.metrics import accuracy_score
import numpy as np

Collecting fastai
  Downloading fastai-2.5.3-py3-none-any.whl (189 kB)
[K     |████████████████████████████████| 189 kB 5.4 MB/s 
Collecting fastcore<1.4,>=1.3.22
  Downloading fastcore-1.3.29-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 2.4 MB/s 
Collecting fastdownload<2,>=0.0.5
  Downloading fastdownload-0.0.5-py3-none-any.whl (13 kB)
Installing collected packages: fastcore, fastdownload, fastai
  Attempting uninstall: fastai
    Found existing installation: fastai 1.0.61
    Uninstalling fastai-1.0.61:
      Successfully uninstalled fastai-1.0.61
Successfully installed fastai-2.5.3 fastcore-1.3.29 fastdownload-0.0.5


In [None]:
# dataset from Drive
from google.colab import drive
drive.mount('/content/drive')
!mkdir /content/dataset/
!unzip "drive/MyDrive/AML/AML_dataset.zip" -d "/content/dataset/"

In [None]:
#loading train and validation dataset
img_path = 'dataset/'
img_size = 32
batch_size = 128

dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                   get_items=get_image_files,
                   splitter=GrandparentSplitter(train_name='train',
                                                valid_name='validation'
                                                ),
                   get_y=parent_label,
                   item_tfms=[Resize(img_size)],
                   batch_tfms=aug_transforms()
                   )
dls = dblock.dataloaders(img_path, 
                         bs=batch_size, 
                         verbose=True, 
                         device=torch.device('cuda')
                         )
print(dls.vocab)
dls.show_batch()

In [None]:
# model
learn = cnn_learner(dls, 
                    resnet34, 
                    splitter=default_split, 
                    metrics=[accuracy, error_rate], 
                    cbs=[ShowGraphCallback()], 
                    model_dir='drive/MyDrive/AML/models/'
                    )
learn.summary()

In [None]:
# automatic learning rate finder
learn.lr_find()

In [None]:
# transfer learning (warm up)
epochs = 10
lr = 1e-3

callbacks = [SaveModelCallback(monitor='accuracy', 
                               comp=np.greater, 
                               fname='best_model'
                               ),
             EarlyStoppingCallback(monitor='accuracy', 
                                   min_delta=1e-4, 
                                   patience=3
                                   )
             ]

#learn.freeze()
learn.fit_one_cycle(epochs, cbs=callbacks)

In [None]:
# fine tuning
epochs = 50
lr = 1e-4

callbacks = [SaveModelCallback(monitor='accuracy', 
                               comp=np.greater, 
                               fname='best_model'
                               ),
             EarlyStoppingCallback(monitor='accuracy', 
                                   min_delta=1e-4, 
                                   patience=5
                                   )
             ]

learn.unfreeze()
learn.fit_one_cycle(epochs, 
                    lr_max=slice(1e-4,1e-3), 
                    cbs=callbacks
                    )
learn.save('model')
learn.export('drive/MyDrive/AML/model.pkl')

In [None]:
# confusion matrix
interp = ClassificationInterpretation.from_learner(learn)
#interp.plot_top_losses(interp, 9, nrows=3)

interp.plot_confusion_matrix(figsize=(10,10))

In [None]:
# loading model
'''learn = cnn_learner(dls, 
                    resnet34, 
                    splitter=default_split, 
                    metrics=[accuracy, error_rate], 
                    cbs=[ShowGraphCallback()], 
                    model_dir='drive/MyDrive/AML/models/'
                    )
learn.load('model')'''
learn = load_learner('drive/MyDrive/AML/model.pkl')

In [None]:
# predictions on a test dataset (labelled)
test_files = get_image_files('dataset/test_labelled/')
test_dl = learn.dls.test_dl(test_files, with_labels=True)
preds, targs = learn.get_preds(dl=test_dl)

preds_class = preds.argmax(dim=1)
test_acc = accuracy_score(targs, preds_class)
print('Test accuracy: ', test_acc)

In [None]:
# predictions on a test dataset (unlabelled)
test_files = get_image_files('dataset/test_unlabelled/')
test_files = test_files.sorted()
test_dl = learn.dls.test_dl(test_files, with_labels=False)
preds, _ = learn.get_preds(dl=test_dl)

preds_class = preds.argmax(dim=1)
np.savetxt('drive/MyDrive/AML/predictions.csv', preds_class, delimiter=',')