### Add Callbacks + Metrics
Adding more setup, by-epoch, teardown methods to learner

Using new callback `TestSetRecorder` - we run validate on a third dataset. (I still need to figure out if tfms's are being applied to this third dataset...it should if test_accuracy matches my_test_metrics on an accurate model)

Using `subcat_color_acc`, `subcat_piece_acc` we add metrics to be monitored at each epoch, for correctly predicting a piece's player-color and piece-class individually. What could this show?

In [1]:
from fastai2.vision.all import *
import sys
sys.path.append('..')

%load_ext autoreload
%autoreload 2

from modules.trainutils import (build_dl, 
                                piece_class_parse,
                                my_metrics, 
                                my_test_metrics,
                                stratify_sample)

from modules.trainutils import (subcat_color_acc,
                                subcat_piece_acc)

# from modules.learnutils import TestSetRecorder

test_path = Path('../../../other-chess-data/regulation-test-all/')
train_path = Path('../../../rf-chess-data/cropped_v1/')

In [2]:
test_dl = build_dl(test_path, n=None)
len(test_dl.train.items)

228

In [3]:
n = 200
seed=42

train_dl = ImageDataLoaders.from_name_func(
                train_path, 
                # get_image_files(train_path),
                stratify_sample(train_path, n=n, np_seed=seed),
                valid_pct=0.2, 
                seed=42,
                label_func=piece_class_parse, 
                item_tfms=RandomResizedCrop(128, min_scale=0.5),
                batch_tfms=aug_transforms(),
)

##### Setup Custom Learner
 - add new metrics
 - add third 'dl' to learner

In [4]:
learn = cnn_learner(train_dl, resnet18, 
                    metrics=[accuracy, 
                             subcat_color_acc, 
                             subcat_piece_acc,
                            ]
                   )

In [5]:
new_dl = DataLoaders(learn.dls[0], learn.dls[1], test_dl.train)
learn.dls = new_dl

In [6]:
len(learn.dls[0].items), len(learn.dls[1].items), len(learn.dls[2].items)

(154, 38, 228)

##### Build callback

In [7]:
class TestSetRecorder(Callback):
    def __init__(self, ds_idx=2, **kwargs):
        self.values = []
        self.ds_idx = ds_idx
    
    def after_epoch(self):
        old_log = self.recorder.log.copy()
        self.learn._do_epoch_validate(ds_idx=self.ds_idx, dl=None)
        self.values.append(self.recorder.log[len(old_log):])
        self.recorder.log = old_log

In [8]:
# learn.recorder.train_metrics = True
learn.add_cb(TestSetRecorder(ds_idsx=2))

<fastai2.learner.Learner at 0x7f7c68005bd0>

##### Fit

In [9]:
learn.fit_one_cycle(6)

epoch,train_loss,valid_loss,accuracy,subcat_color_acc,subcat_piece_acc,time
0,3.952985,9.220811,0.078947,0.447368,0.105263,00:07
1,3.596785,4.932112,0.078947,0.394737,0.078947,00:08
2,3.243423,2.496461,0.263158,0.578947,0.421053,00:08
3,2.932016,1.539852,0.342105,0.789474,0.421053,00:08
4,2.621784,1.154464,0.552632,0.921053,0.578947,00:08
5,2.421329,1.041189,0.578947,0.973684,0.605263,00:08


##### Examine + Format values within callbacks

In [10]:
def get_cb_index(learn, cb_name):
    return [i for i, e in enumerate(list(learn.cbs)) 
            if e.__class__.__name__ == cb_name][0]

def get_cb(learn, cb_name):
    return learn.cbs[get_cb_index(learn, cb_name)]

first_recorder = get_cb(learn, 'Recorder')
second_recorder = get_cb(learn, 'TestSetRecorder')

In [11]:
learn.cbs

(#4) [TrainEvalCallback,Recorder,ProgressCallback,TestSetRecorder]

In [12]:
df_recorder = pd.DataFrame(first_recorder.values, 
                       columns = first_recorder.metric_names[1:-1])

met_names = [e.name for e in first_recorder.metrics]
cols = ['loss']
cols += met_names
cols = ['test_' + col for col in cols]

df_second_recorder = pd.DataFrame(second_recorder.values, 
                                  columns = cols)

In [13]:
df_recorder

Unnamed: 0,train_loss,valid_loss,accuracy,subcat_color_acc,subcat_piece_acc
0,3.952985,9.220811,0.078947,0.447368,0.105263
1,3.596785,4.932112,0.078947,0.394737,0.078947
2,3.243423,2.496461,0.263158,0.578947,0.421053
3,2.932016,1.539852,0.342105,0.789474,0.421053
4,2.621784,1.154464,0.552632,0.921053,0.578947
5,2.421329,1.041189,0.578947,0.973684,0.605263


In [14]:
df_second_recorder

Unnamed: 0,test_loss,test_accuracy,test_subcat_color_acc,test_subcat_piece_acc
0,4.132629,0.100877,0.464912,0.223684
1,2.947631,0.109649,0.464912,0.232456
2,2.667915,0.144737,0.52193,0.267544
3,2.776058,0.105263,0.557018,0.223684
4,2.969409,0.114035,0.54386,0.223684
5,3.178069,0.131579,0.530702,0.22807


##### Verify this works
Does the accuracy calc'd in the SecondRecorder cb match another method?

In [15]:
test_acc_final = df_second_recorder.loc[5,'test_accuracy']
test_acc_final

0.1315789520740509

In [17]:
test_metrics = my_test_metrics(learn, test_path)
test_acc_final_2 = test_metrics[1]
test_acc_final_2

0.09649122807017543

OK, so they are slightly different, but still it's clearly from the test set, because accuracy from train/valid set is 50% by final epoch.