## Load dependencies

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import glob
import pickle
import json
import matplotlib.pyplot as plt
import sys
sys.path.append('../../')
from src.models.attention import LSTMWithAttention
from lstm import LSTM_Simple
from metrics import exact_match_metric, exact_match_metric_index
from callbacks import NValidationSetsCallback, GradientLogger
from src.models.generator import DataGenerator, DataGeneratorAttention
from src.models.utils import get_sequence_data
from tqdm import tqdm

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
print(tf.__version__)
print("GPU Available: ", tf.test.is_gpu_available())

W0816 16:04:58.048052 4706751936 deprecation_wrapper.py:118] From /Users/lewtun/git/deep-math/notebooks/baselines/lstm.py:4: The name tf.keras.layers.CuDNNLSTM is deprecated. Please use tf.compat.v1.keras.layers.CuDNNLSTM instead.



2.0.0-beta1
GPU Available:  False


In [3]:
ROOT = Path('../../')
MODELS = ROOT/'models/'
DATA = ROOT/'data/'
SETTINGS = ROOT/'settings/'

In [4]:
with open(str(SETTINGS/'settings_local.json'), 'r') as file:
    settings_dict = json.load(file)

In [5]:
settings_dict

{'math_module': 'arithmetic__add_sub',
 'train_level': '*',
 'batch_size': 1024,
 'thinking_steps': 16,
 'epochs': 1,
 'num_encoder_units': 512,
 'num_decoder_units': 2048,
 'embedding_dim': 256,
 'save_path': '/artifacts/',
 'data_path': 'data/'}

In [6]:
with open(DATA/'processed/arithmetic__add_sub-*.pkl', 'rb') as file:
    sequence_data = pickle.load(file)

In [7]:
sequence_data.keys()

dict_keys(['input_token_index', 'target_token_index', 'input_texts', 'target_texts', 'max_encoder_seq_length', 'max_decoder_seq_length', 'num_encoder_tokens', 'num_decoder_tokens', 'num_thinking_steps'])

## Define evaluation class

In [8]:
class LSTMWithAttentionEvaluator:
    
    def __init__(self, path):
        
        with open(str(path/'settings/settings_local.json'), 'r') as file:
            self.settings_dict = json.load(file)

        with open(str(path/'data/processed/arithmetic__add_sub-*.pkl'), 'rb') as file:
            self.sequence_data = pickle.load(file) 
            self.token_index = self.sequence_data['input_token_index']

        self.num_tokens = len(self.token_index)
        
        adam = Adam(lr=6e-4, beta_1=0.9, beta_2=0.995, epsilon=1e-9, decay=0.0, amsgrad=False, clipnorm=0.1)
        self.lstm = LSTMWithAttention(sequence_data['num_encoder_tokens'], 
                         sequence_data['num_decoder_tokens'], 
                         sequence_data['max_encoder_seq_length'],
                        sequence_data['max_decoder_seq_length'],
                        settings_dict['num_encoder_units'],
                        settings_dict['num_decoder_units'],
                        settings_dict['embedding_dim'])
        self.model = self.lstm.get_model()
        self.model.load_weights(str(MODELS/'model.h5'))
        self.model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=[exact_match_metric_index])
        
    def evaluate_model(self, input_texts, output_texts, teacher_forcing=True, batch_size=128, n_samples=1000):
        max_seq_length  = max([len(txt_in)+len(txt_out) for txt_in, txt_out in zip(input_texts,output_texts)])
        
        data_gen_pars = {
            "batch_size": settings_dict["batch_size"],
            "max_encoder_seq_length": sequence_data['max_encoder_seq_length'],
            "max_decoder_seq_length": sequence_data['max_decoder_seq_length'],
            "num_encoder_tokens": sequence_data['num_encoder_tokens'],
            "num_decoder_tokens": sequence_data['num_decoder_tokens'],
            "input_token_index": sequence_data['input_token_index'],
            "target_token_index": sequence_data['target_token_index'],
            "num_thinking_steps": settings_dict["thinking_steps"],
        }
        
        self.data_generator = DataGeneratorAttention(input_texts=input_texts,
                                               target_texts=output_texts,
                                               **data_gen_pars)
        
        if not teacher_forcing:
            outputs_true, outputs_preds = self.predict_without_teacher(n_samples, max_seq_length)
            exact_match = len([0 for out_true, out_preds in zip(outputs_true, outputs_preds) if out_true.strip()==out_preds.strip()])/len(outputs_true)
        
        else:
            result = self.model.evaluate_generator(self.data_generator, verbose=1)
            exact_match = result[1]
            
        return exact_match
    
    def predict_on_string(self, text, max_output_length=100):
        
        max_seq_length = len(text) + max_output_length

        
        params = {'batch_size': 1,
                  'max_seq_length': max_seq_length,
                  'num_tokens': self.num_tokens,
                  'token_index': self.token_index,
                  'num_thinking_steps': self.settings_dict["thinking_steps"]
                 }
        
        
        self.data_generator = DataGeneratorSeq(input_texts=[text],
                                               target_texts=['0'*max_output_length],
                                               **params)
        
        outputs_true, outputs_preds = self.predict_without_teacher(1, max_seq_length)
        
        return outputs_preds[0].strip()

    def predict_without_teacher(self, n_samples, max_seq_length, random=True):
        
        encoded_texts = [] 
        outputs_true = []
        if random:
            samples = np.random.choice(self.data_generator.indexes, n_samples, replace=False)
        else:
            samples = list(range(n_samples))
        for i in samples:
            input_len = len(input_texts_train[i])
            sample = self.data_generator._DataGeneratorSeq__data_generation([i])         
            input_len = len(self.data_generator.input_texts[i])
            outputs_true.append(self.data_generator.target_texts[i])
            x = sample[0][0][:input_len+self.settings_dict["thinking_steps"]+1]
            encoded_texts.append(np.expand_dims(x, axis=0))
            
        outputs_preds = self.lstm.decode_sample(encoded_texts, self.token_index, max_seq_length)
        return outputs_true, outputs_preds

## Evaluate datasets

In [9]:
lstm_eval = LSTMWithAttentionEvaluator(ROOT)

W0816 16:05:18.711623 4706751936 deprecation.py:323] From /Users/lewtun/git/deep-math/env/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:3868: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [10]:
test_sample=1024

In [11]:
sequence_data['input_texts'].keys()

dict_keys(['train', 'interpolate', 'extrapolate'])

In [12]:
lstm_eval.evaluate_model(sequence_data['input_texts']['train'][:test_sample], sequence_data['target_texts']['train'][:test_sample])



0.81069744

In [61]:
lstm_eval.evaluate_model(sequence_data['input_texts']['interpolate'][:test_sample], sequence_data['target_texts']['interpolate'][:test_sample])



0.75037444

In [62]:
lstm_eval.evaluate_model(sequence_data['input_texts']['extrapolate'][:test_sample], sequence_data['target_texts']['extrapolate'][:test_sample])



0.69989574