In [None]:
import os
import numpy as np
import tensorflow as tf
import math
import json
import time
from datetime import timedelta

from temporalcontext import settings
from temporalcontext.functions import read_selmap, read_folds_info, \
    get_lstm_model_filename, gen_list_of_section_files, \
    LSTMData, chunk_up, lstm_predict


hybrid_types = ['t1', 't2', 't3']

output_filename_fmt = 'scores_TS{:d}_PP{:d}.json'


In [None]:
selmap = read_selmap(os.path.join(settings.raw_data_root, 'selmap.csv'))
fold_file_idxs = read_folds_info(os.path.join(settings.raw_data_root, 'folds_info.txt'))

for fold_idx, fold_info in enumerate(fold_file_idxs):

    for lstm_exp in settings.lstm_experiments:
        print('---------- Fold {:02d}, segment_advance={:.2f}, PP={:d} ----------'.format(
            fold_idx + 1, lstm_exp['segment_advance'], lstm_exp['pp']))
        
        fold_seg_root = os.path.join(settings.project_root, settings.folds_dir,
                                     'f{:02d}'.format(fold_idx + 1),
                                     'seg_adv_{:.2f}'.format(lstm_exp['segment_advance']))
        input_root = os.path.join(fold_seg_root, settings.lstm_data_dir)
        model_dir = os.path.join(fold_seg_root, settings.models_dir)
        scores_root = os.path.join(fold_seg_root, settings.scores_dir)
        
        tf.keras.backend.clear_session()

        # Load hybrid models
        lstm_models = list()
        for m_type in hybrid_types:
            model_filepath = \
                os.path.join(model_dir,
                             get_lstm_model_filename(
                                 m_type, lstm_exp['time_steps'], lstm_exp['pp']))
            lstm_models.append(None if not os.path.exists(model_filepath)
                               else tf.keras.models.load_model(model_filepath))
        
        # Placeholders for collecting detection scores
        per_file_scores = dict()
        score_ranges = {m_type: [math.inf, -math.inf]
                        for m_type in ['cnn'] + hybrid_types}
        num_samples = 0
        process_time = 0.0
        
        # Process each valid test file
        for sec_file in gen_list_of_section_files(input_root,
                                                  [selmap[f_idx][0] for f_idx in fold_info['test']],
                                                  settings.section_suffixes):
            
            # Load data
            _, _, t1_input, t2_input, _, _ = LSTMData.read(sec_file,
                                                           lstm_exp['time_steps'],
                                                           lstm_exp['pp'])
            
            if t1_input.shape[0] < lstm_exp['time_steps']:
                continue
                
            num_samples += t1_input.shape[0]
            
            # Gather scores
            per_model_scores = dict()
            
            # CNN scores. Model was already run; just collect stored scores
            m_type = 'cnn'
            _, scores = chunk_up(t1_input, t1_input[:, 0],
                                   lstm_exp['time_steps'], lstm_exp['pp'])
            per_model_scores[m_type] = scores.tolist()
            score_ranges[m_type][0] = min(score_ranges[m_type][0], float(scores.min()))
            score_ranges[m_type][1] = max(score_ranges[m_type][1], float(scores.max()))
            
            # Hybrid models' scores. Run each detector
            start_time = time.time()
            for m_type, lstm_model, lstm_data in zip(hybrid_types,
                                                      lstm_models,
                                                      [t1_input, t2_input, np.concatenate([t1_input, t2_input], axis=1)]):
                
                if lstm_model is None:
                    per_model_scores[m_type] = []
                    continue
                
                scores = lstm_predict(lstm_model, lstm_data, lstm_exp['time_steps'], lstm_exp['pp'])
                
                per_model_scores[m_type] = scores.tolist()
                score_ranges[m_type][0] = min(score_ranges[m_type][0], float(scores.min()))
                score_ranges[m_type][1] = max(score_ranges[m_type][1], float(scores.max()))
            end_time = time.time()
            
            # Gather file-level results
            per_file_scores[sec_file[len(input_root) + 1:]] = per_model_scores
            del per_model_scores
            process_time += (end_time - start_time)
        
        del lstm_models
            
        # Save results
        os.makedirs(scores_root, exist_ok=True)
        json.dump(
            dict(scores=per_file_scores,
                 score_ranges=score_ranges),
            open(os.path.join(scores_root,
                              output_filename_fmt.format(lstm_exp['time_steps'], lstm_exp['pp'])),
                 'w')
        )

        del per_file_scores

        print('Num samples  : {:d}'.format(num_samples))
        print('Score ranges - ')
        for m_type in ['cnn'] + hybrid_types:
            print('{:5s}: [{:.4f}. {:.4f}]'.format(m_type,
                                                   score_ranges[m_type][0],
                                                   score_ranges[m_type][1]))
        print('Processed in {}'.format(timedelta(seconds=process_time)))
        print()