## Load dependencies

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import glob
import pickle
import json
import matplotlib.pyplot as plt
from lstm import LSTM_Simple
from metrics import exact_match_metric
from callbacks import NValidationSetsCallback, GradientLogger
from generator import DataGenerator, DataGeneratorSeq
from tqdm import tqdm

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
print(tf.__version__)
print("GPU Available: ", tf.test.is_gpu_available())

2.0.0-alpha0
GPU Available:  True


## Define evaluation class

In [3]:
class LSTM_Evaluator:
    
    def __init__(self, path, ):
        
        with open(str(path/'settings.json'), 'r') as file:
            self.settings_dict = json.load(file)
            
        if (path/'stoi.pkl').is_file():
            with open(str(path/'stoi.pkl'), 'rb') as file:
                self.token_index = pickle.load(file)        
        else:
            self.token_index = self.__get_stoi_from_data()
        self.num_tokens = len(self.token_index)
        
        adam = Adam(lr=6e-4, beta_1=0.9, beta_2=0.995, epsilon=1e-9, decay=0.0, amsgrad=False, clipnorm=0.1)
        self.lstm = LSTM_Simple(self.num_tokens, self.settings_dict['latent_dim'])
        _ = self.lstm.get_model()
        self.lstm.model.load_weights(str(path/'model.h5'))
        self.lstm.model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=[exact_match_metric])
        
    def evaluate_model(self, input_texts, output_texts, teacher_forcing=True, batch_size=128, n_samples=1000):
        max_seq_length  = max([len(txt_in)+len(txt_out) for txt_in, txt_out in zip(input_texts,output_texts)])
        
        params = {'batch_size': batch_size,
                  'max_seq_length': max_seq_length,
                  'num_tokens': self.num_tokens,
                  'token_index': self.token_index,
                  'num_thinking_steps': self.settings_dict["thinking_steps"]
                 }
        
        self.data_generator = DataGeneratorSeq(input_texts=input_texts,
                                               target_texts=output_texts,
                                               **params)
        
        if not teacher_forcing:
            outputs_true, outputs_preds = self.predict_without_teacher(n_samples, max_seq_length)
            exact_match = len([0 for out_true, out_preds in zip(outputs_true, outputs_preds) if out_true.strip()==out_preds.strip()])/len(outputs_true)
        
        else:
            result = self.lstm.model.evaluate_generator(self.data_generator, verbose=1)
            exact_match = result[1]
            
        return exact_match
    
    def predict_on_string(self, text, max_output_length=100):
        
        max_seq_length = len(text) + max_output_length

        
        params = {'batch_size': 1,
                  'max_seq_length': max_seq_length,
                  'num_tokens': self.num_tokens,
                  'token_index': self.token_index,
                  'num_thinking_steps': self.settings_dict["thinking_steps"]
                 }
        
        
        self.data_generator = DataGeneratorSeq(input_texts=[text],
                                               target_texts=['0'*max_output_length],
                                               **params)
        
        outputs_true, outputs_preds = self.predict_without_teacher(1, max_seq_length)
        
        return outputs_preds[0].strip()

    def predict_without_teacher(self, n_samples, max_seq_length, random=True):
        
        encoded_texts = [] 
        outputs_true = []
        if random:
            samples = np.random.choice(self.data_generator.indexes, n_samples, replace=False)
        else:
            samples = list(range(n_samples))
        for i in samples:
            input_len = len(input_texts_train[i])
            sample = self.data_generator._DataGeneratorSeq__data_generation([i])         
            input_len = len(self.data_generator.input_texts[i])
            outputs_true.append(self.data_generator.target_texts[i])
            x = sample[0][0][:input_len+self.settings_dict["thinking_steps"]+1]
            encoded_texts.append(np.expand_dims(x, axis=0))
            
        outputs_preds = self.lstm.decode_sample(encoded_texts, self.token_index, max_seq_length)
        return outputs_true, outputs_preds
        
        
    def __get_stoi_from_data(self):

        """
        This function reloads all the data that was used to train and evalute
        model to construct the string to integer map (stoi).
        """
        
        def concatenate_texts(path, pattern):
            file_paths = list(path.glob('{}*.txt'.format(pattern)))
            input_texts = []
            target_texts = []

            for file_path in file_paths:
                with open(str(file_path), 'r', encoding='utf-8') as f:
                    lines = f.read().split('\n')[:-1]

                input_texts.extend(lines[0::2])
                target_texts.extend(['\t' + target_text + '\n' for target_text in lines[1::2]])
            return input_texts, target_texts
        
        raw_path = Path(self.settings_dict['data_path'])
        interpolate_path = raw_path/'interpolate'
        extrapolate_path = raw_path/'extrapolate'
        train_easy_path = raw_path/'train-easy/'
        math_module = settings_dict["math_module"]
        train_level = settings_dict["train_level"]
        datasets = {
            'train':(raw_path, 'train-' + train_level + '/' + math_module),
            'interpolate':(interpolate_path, math_module),
            'extrapolate':(extrapolate_path, math_module)
                   }

        input_texts = {}
        target_texts = {}

        for k, v in datasets.items():
            input_texts[k], target_texts[k] = concatenate_texts(v[0], v[1])
        
        all_input_texts = sum(input_texts.values(), [])
        all_target_texts = sum(target_texts.values(), [])

        input_characters = set(''.join(all_input_texts))
        target_characters = set(''.join(all_target_texts))

        tokens = sorted(list(input_characters | target_characters))
        token_index = dict([(char, i) for i, char in enumerate(tokens)])
        
        return token_index
        
        
        



## Load datasets

In [4]:
model_path = Path('../../models/js0kldpwp1nhos/')

In [5]:
settings_path = model_path/'settings.json'

with open(str(settings_path), 'r') as file:
    settings_dict = json.load(file)


raw_path = Path(settings_dict['data_path'])
interpolate_path = raw_path/'interpolate'
extrapolate_path = raw_path/'extrapolate'
train_easy_path = raw_path/'train-easy/'

settings_dict

{'batch_size': 1024,
 'data_path': '/storage/git/deep-math/data/raw/v1.0/',
 'epochs': 1,
 'latent_dim': 2048,
 'math_module': 'arithmetic',
 'save_path': '/artifacts/',
 'saved_model': '/storage/artifacts/j4bu146wamlr9/model.h5',
 'thinking_steps': 16,
 'train_level': '*'}

In [6]:
def concatenate_texts(path, pattern):
    file_paths = list(path.glob('{}*.txt'.format(pattern)))
    
    input_texts = []
    target_texts = []

    for file_path in file_paths:
        with open(str(file_path), 'r', encoding='utf-8') as f:
            lines = f.read().split('\n')[:-1]

        input_texts.extend(lines[0::2])
        target_texts.extend(['\t' + target_text + '\n' for target_text in lines[1::2]])
        
    return input_texts, target_texts

In [30]:
def concatenate_texts_individual(path, pattern):
    file_paths = list(path.glob('{}*.txt'.format(pattern)))
    
    input_texts = {}
    target_texts = {}

    for file_path in file_paths:
        
        input_texts[file_path] = []
        target_texts[file_path] = []
        
        with open(str(file_path), 'r', encoding='utf-8') as f:
            lines = f.read().split('\n')[:-1]

        input_texts[file_path].extend(lines[0::2])
        target_texts[file_path].extend(['\t' + target_text + '\n' for target_text in lines[1::2]])
        
    return input_texts, target_texts

In [7]:
math_module = settings_dict["math_module"]
train_level = settings_dict["train_level"]

datasets = {
    'train':(raw_path, 'train-' + train_level + '/' + math_module),
    'interpolate':(interpolate_path, math_module),
    'extrapolate':(extrapolate_path, math_module)
           }

input_texts = {}
target_texts = {}

for k, v in datasets.items():
    input_texts[k], target_texts[k] = concatenate_texts(v[0], v[1])
    print('Length of set {} is {}'.format(k, len(input_texts[k])))

Length of set train is 17999982
Length of set interpolate is 90000
Length of set extrapolate is 60000


In [8]:
input_texts_train, input_texts_valid, target_texts_train, target_texts_valid = train_test_split(input_texts['train'], target_texts['train'], test_size=0.2, random_state=42)

In [9]:
print('Number of training samples:', len(input_texts_train))
print('Number of validation samples:', len(input_texts_valid))

Number of training samples: 14399985
Number of validation samples: 3599997


Data samples:

In [10]:
print('INPUT:', input_texts['train'][42])
print('OUTPUT:', target_texts['train'][42].strip())

INPUT: What is 2 - (1 + -5) - 11?
OUTPUT: -5


## Evaluate datasets

In [11]:
lstm_eval = LSTM_Evaluator(model_path)

In [19]:
test_sample=1024

In [20]:
lstm_eval.evaluate_model(input_texts_train[:test_sample], target_texts_train[:test_sample])



0.7285156

In [21]:
lstm_eval.evaluate_model(input_texts_valid[:test_sample], target_texts_valid[:test_sample])



0.7363281

In [22]:
lstm_eval.evaluate_model(input_texts['interpolate'][:test_sample], target_texts['interpolate'][:test_sample])



0.6591797

In [23]:
lstm_eval.evaluate_model(input_texts['extrapolate'][:test_sample], target_texts['extrapolate'][:test_sample])



0.20703125

## We can also test an indiviual string:

In [29]:
lstm_eval.predict_on_string('1 / 7')

'-4'