In [None]:
# hide
%load_ext autoreload
%autoreload 2

In [None]:
# default_exp validations

In [None]:
# export
from unittest.mock import Mock

import logging
from random import randint,random
from itertools import product

from job_offer_classifier.pipeline_classifier import Pipeline
import tensorflow as tf

In [None]:
# hide
#logging config
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Algorithm Validations
> Validations for the Sentiment Classifier. 

To assess the performance of the model, the K fold validation is incorporated. After running the *k-fold* method, the averaged scores are computed.

## Custom Log Status

In [None]:
# export
def log_status(k, evaluation, *args):
    '''Log current fold accuracy and F1 scores
    '''
    for key in ('accuracy', 'f1_score'):
        logging.info(
            'The %s score for the test set in fold %s is %s' %
            (key, k, evaluation['test'][key]) + 5 * ' '
        )
    if args:
        logging.info('\n'.join(args))
    logging.info(f'fold {k} has finished...')

An example of the custom logging

In [None]:
log_status(0,{'test':{'accuracy': 1/2.,'f1_score':1/3}},'this is extra info')

07:46:34 INFO: The accuracy score for the test set in fold 0 is 0.5     
07:46:34 INFO: The f1_score score for the test set in fold 0 is 0.3333333333333333     
07:46:34 INFO: this is extra info
07:46:34 INFO: fold 0 has finished...


## Average over Scores

In [None]:
# export
def average_scores(**scores):
    ''' For each dataset and score, calculates the average over folds \n
    '''
    sc_vals = next(iter(scores.values()))
    average = lambda sv: {
        sc: sum(scores[k][sv][sc] for k in scores) / len(scores)
        for sc in next(iter(sc_vals.values()))
    }
    return {sv: average(sv) for sv in sc_vals}

Consider the case of two folds in a train set with 'acc' and 'f1' scores

In [None]:
rnd1, rnd2, rnd3, rnd4 = [random() for _ in range(4)]
fold1 = {'train': {'acc': rnd1, 'f1': rnd2}}
fold2 = {'train': {'acc': rnd3, 'f1': rnd4}}
avg_acc, avg_f1 = (rnd1 + rnd3) / 2, (rnd2 + rnd4) / 2

train_score_avgs = {'train': {'acc': avg_acc, 'f1': avg_f1}}

assert average_scores(fold1=fold1, fold2=fold2) == train_score_avgs

In [None]:
# export
class KFoldPipe(Pipeline):
    ''' Inherents [`Pipeline`](job_offer_classfier/pipeline_classifier) 
    and incorporates the k fold validation
    '''
    def __init__(self, n_splits, **args):
        super().__init__(**args)
        self.n_splits = n_splits
        self.best_score = -1.0
        self.best_seed = None
        self.history = {}
        self.frac = 1 - (1 / n_splits)
        self.get_seeds()

    def get_seeds(self):
        ''' Array of random seeds
        '''
        self.seeds = {
            str(k + 1): randint(0, 2**32 - 1)
            for k in range(self.n_splits)
        }

    def update_best_score(self):
        '''Update the folds and seeds that correspond to
           the best score
        '''
        f1_score = self.evaluation['test']['f1_score']
        if self.best_score < f1_score:
            self.best_score = f1_score
            self.best_fold = self.current_fold
            self.best_seed = self.random_state

    def update_history(self, fold):
        ''' Update history records
        '''
        log_status(fold, self.evaluation)
        self.history[fold] = self.evaluation

    def average_evaluations(self):
        '''Average over evaluation results
        '''
        self.averages = average_scores(**self.history)

    def k_fold_validation(self):
        ''' Runs the pipeline over all the seeds.
        Stores the best seed, keeps evaluations history and averages over the scores. \n
        On each iteration: \n
           -splits the data over the seed of the current iteration \n
           -run the pipeline \n
           -update history and best scores \n
        
        '''

        for k, seed in self.seeds.items():
            self.random_state = seed
            self.current_fold = k
            self.split_dataset()
            self.pipeline()
            self.update_history(fold=k)
            self.update_best_score()
        self.average_evaluations()    

In [None]:
from nbdev.showdoc import *
show_doc(KFoldPipe.get_seeds)
show_doc(KFoldPipe.average_evaluations)
show_doc(KFoldPipe.update_best_score)
show_doc(KFoldPipe.update_history)
show_doc(KFoldPipe.k_fold_validation)


<h4 id="KFoldPipe.get_seeds" class="doc_header"><code>KFoldPipe.get_seeds</code><a href="__main__.py#L15" class="source_link" style="float:right">[source]</a></h4>

> <code>KFoldPipe.get_seeds</code>()

Array of random seeds
        

<h4 id="KFoldPipe.average_evaluations" class="doc_header"><code>KFoldPipe.average_evaluations</code><a href="__main__.py#L39" class="source_link" style="float:right">[source]</a></h4>

> <code>KFoldPipe.average_evaluations</code>()

Average over evaluation results
        

<h4 id="KFoldPipe.update_best_score" class="doc_header"><code>KFoldPipe.update_best_score</code><a href="__main__.py#L23" class="source_link" style="float:right">[source]</a></h4>

> <code>KFoldPipe.update_best_score</code>()

Update the folds and seeds that correspond to
the best score

<h4 id="KFoldPipe.update_history" class="doc_header"><code>KFoldPipe.update_history</code><a href="__main__.py#L33" class="source_link" style="float:right">[source]</a></h4>

> <code>KFoldPipe.update_history</code>(**`fold`**)

Update history records
        

<h4 id="KFoldPipe.k_fold_validation" class="doc_header"><code>KFoldPipe.k_fold_validation</code><a href="__main__.py#L44" class="source_link" style="float:right">[source]</a></h4>

> <code>KFoldPipe.k_fold_validation</code>()

Runs the pipeline over all the seeds.
Stores the best seed, keeps evaluations history and averages over the scores. 

On each iteration: 

   -splits the data over the seed of the current iteration 

   -run the pipeline 

   -update history and best scores 

Check functions in KFoldPipe

In [None]:
kfp = KFoldPipe(n_splits=4, src_file='../data/interim/payloads.csv')
kfp.evaluation = {'test': {'f1_score': 0.5,'accuracy':0.55}}
kfp.random_state = 45
kfp.current_fold = 1
kfp.update_best_score()
kfp.update_history(fold='1')

assert kfp.best_score == 0.5 and kfp.best_seed == 45
assert kfp.history == {'1': kfp.evaluation}

kfp.evaluation = {'test': {'f1_score': 0.6,'accuracy':0.7}}
kfp.update_history(fold='2')
kfp.average_evaluations()

assert kfp.averages  == {'test': {'f1_score': (0.6 + 0.5)/2,'accuracy':(0.55+0.7)/2}}

07:46:35 INFO: The accuracy score for the test set in fold 1 is 0.55     
07:46:35 INFO: The f1_score score for the test set in fold 1 is 0.5     
07:46:35 INFO: fold 1 has finished...
07:46:35 INFO: The accuracy score for the test set in fold 2 is 0.7     
07:46:35 INFO: The f1_score score for the test set in fold 2 is 0.6     
07:46:35 INFO: fold 2 has finished...


In [None]:
kfp = KFoldPipe(n_splits=3,src_file='../data/interim/payloads.csv')
kfp.k_fold_validation()

07:46:35 INFO: Using /tmp/tfhub_modules to cache modules.
07:47:15 INFO: The accuracy score for the test set in fold 1 is 0.96694213     
07:47:15 INFO: The f1_score score for the test set in fold 1 is 0.9791666439139396     
07:47:15 INFO: fold 1 has finished...
07:47:49 INFO: The accuracy score for the test set in fold 2 is 0.9586777     
07:47:49 INFO: The f1_score score for the test set in fold 2 is 0.974093273148837     
07:47:49 INFO: fold 2 has finished...
07:48:25 INFO: The accuracy score for the test set in fold 3 is 0.94214875     
07:48:25 INFO: The f1_score score for the test set in fold 3 is 0.9637305687064565     
07:48:25 INFO: fold 3 has finished...


The hightest of the F1 scores in *history* attribute coincides with *best_score*

In [None]:
assert max(
    kfp.history[str(k + 1)]['test']['f1_score'] for k in range(kfp.n_splits)
) == kfp.best_score

assert kfp.history[kfp.best_fold]['test']['f1_score'] == kfp.best_score

Create a new instance of the KFoldPipe and run the pipeline with the best seed

In [None]:
pl = KFoldPipe(n_splits=3,src_file='../data/interim/payloads.csv',random_state =kfp.best_seed)
pl.pipeline()

The evaluation in this pipeline is equal to the evaluation corresponding to the best score.

In [None]:
def min_factors(x, y):
    ''' Outputs the  minimum integer values n,m that satisfy
     *abs(x-y) < m x 10 ^ (n)*
     and restricted to n=(-4,-3,-1) and m=(1,...,9)
    '''
    equal_abs = lambda x, y, m, n: abs(x - y) < m * 10**(n)
    for n, m in product(range(-4, 0), range(1, 10)):
        if equal_abs(x, y, m, n):
            return n, m
    return None


n, m = sorted(
    min_factors(pl.evaluation[dat][sc], kfp.history[kfp.best_fold][dat][sc])
    for dat in pl.evaluation for sc in pl.evaluation['train']
)[-1]

print(f'The evaluations are equal up to a factor of {m}x10^({n})')

The evaluations are equal up to a factor of 2x10^(-1)


In [None]:
assert all(
    abs(pl.evaluation[dat][sc] - kfp.history[kfp.best_fold][dat][sc]) < m *
    10**(n) for dat in pl.evaluation for sc in pl.evaluation['train']
)

In [None]:
kfp.averages['test']

{'accuracy': 0.9559228618939718,
 'accuracy_baseline': 0.8016529083251953,
 'auc': 0.9919100801150004,
 'auc_precision_recall': 0.9979395270347595,
 'average_loss': 0.10010440647602081,
 'label/mean': 0.8016529083251953,
 'loss': 0.10010440647602081,
 'precision': 0.9791301290194193,
 'prediction/mean': 0.7820513447125753,
 'recall': 0.9656357367833456,
 'global_step': 5000.0,
 'f1_score': 0.9723301619230776}

In [None]:
# hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_merge_datasets.ipynb.
Converted 01_pipeline_classifier.ipynb.
Converted 02_validations.ipynb.
Converted index.ipynb.
