In [None]:
!nvidia-smi

In [None]:
import gc
from os.path import join as pjoin
import os
from datetime import datetime
import time

In [None]:
from fastai.vision import ImageDataBunch,ImageList,get_transforms,models,cnn_learner,accuracy
import torch

## Prepare for loging

In [None]:
import json
import os.path
import re
import ipykernel
import requests


from requests.compat import urljoin

from notebook.notebookapp import list_running_servers

def get_notebook_name():
    """
    Return the full path of the jupyter notebook.
    """
    kernel_id = re.search('kernel-(.*).json',
                          ipykernel.connect.get_connection_file()).group(1)
    servers = list_running_servers()
    for ss in servers:
        response = requests.get(urljoin(ss['url'], 'api/sessions'),
                                params={'token': ss.get('token', '')})
        for nn in json.loads(response.text):
            if nn['kernel']['id'] == kernel_id:
                relative_path = nn['notebook']['path']
                return relative_path


from IPython.display import Javascript

script = '''
require(["base/js/namespace"],function(Jupyter) {
    Jupyter.notebook.save_checkpoint();
});
'''

def notebook_save():
    Javascript(script)
    print('This notebook has been saved')



In [None]:
# vars for models logging

TRAIN_LOG_DIR = 'train_log'
MODEL_NAME = get_notebook_name().split('.')[0]

os.makedirs(pjoin(TRAIN_LOG_DIR,MODEL_NAME),exist_ok=True)

find_version = lambda x : int(x.split('.')[0])
list_versions = sorted(list(map(find_version,os.listdir(pjoin(TRAIN_LOG_DIR,MODEL_NAME)))))
version = list_versions[-1] + 1 if len(list_versions)>0 else 0

date = datetime.today().strftime('%d-%m-%Y-%H-%M')
save_folder = f"{version:0>3d}" +'.'+date
model_save_dir = pjoin(TRAIN_LOG_DIR,MODEL_NAME,save_folder)
os.makedirs(model_save_dir,exist_ok=True)
model_save_dir


In [None]:
model_save_dir

In [None]:
print(f"""
Note for version {version} model {MODEL_NAME}:

""")

### Init data loader

In [None]:
data_path = '/home/qnkhuat/data/emotion_compilation_split'
tfms = get_transforms(do_flip=True,
                      flip_vert=False,
                      max_rotate=10,
                      max_zoom=1.1,
                     )
# ran this get erro in THcroe
data = (ImageDataBunch.from_folder(data_path,test='test',size=48,ds_tfms=tfms,bs=256)
       .normalize())

In [None]:
data.show_batch(rows=3)

## Init model

In [None]:
model = models.resnet18

In [None]:
learn = cnn_learner(data, model)

In [None]:
### THE DIRECTORY TO SAVE CHECKPOINTS
learn.model_dir = os.path.abspath(model_save_dir)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
lr=5e-2

In [None]:
learn.metrics = [accuracy]

## Start training

In [None]:
learn.fit_one_cycle(12,slice(lr))

In [None]:
#learn.fit(12,lr)

In [None]:
learn.save('stage-1')

#### Unfreeze 

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
lr=1e-5

In [None]:
learn.fit(15)

In [None]:
learn.save('stage-2')

#### Refreeze

In [None]:
learn.freeze()

In [None]:
learn.fit_one_cycle(9)

In [None]:
learn.save('stage-4')

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit(10,lr=1e-6)

In [None]:
learn.save('stage-5')

In [None]:
tta = accuracy(*learn.TTA()).item()*100
print(tta)

# Remember to save the checkpoint before run this

In [None]:
notebook_save()
time.sleep(3) # to save the checkpoint
nb_name = model_save_dir+'_acc_'+str(tta)[:]+'.html'
os.system(f"jupyter nbconvert {get_notebook_name()} --output {nb_name}")
nb_name