In [1]:
from functools import partial
import os
import pandas
import pandavro
import re
import numpy as np
import dvc.api
import numpy
import sys

os.chdir('../..')
sys.path.insert(1, '.')
from src.features.data_to_features_function import data_to_features

val_data = None
params = dvc.api.params_show(stages=['validate', 'generate_features'])
label_column = params['label_column']

# Load data (from multiple files)
for file in os.listdir('data/validation/'):
    if re.search("^validation_complete-", file):
        partial_df = pandavro.from_avro('data/validation/'+file).replace("None", np.NAN)
        if val_data is None:
            val_data = partial_df
        else:
            val_data = pandas.concat([val_data, partial_df], sort=False, ignore_index=True)

Keep label apart from features

In [2]:
val_data_features = val_data.drop(label_column, inplace=False, axis=1)
val_data_label = val_data.loc[:,label_column]

Generate features

In [3]:
val_data_features = data_to_features(val_data_features)

Ensure correct fields and only correct fields are present

In [4]:
import json
with open('docs/feature_schema.json', mode='r') as field_file:
    fields = json.loads(field_file.read())
    field_file.close()
val_data_features = val_data_features[[field["name"] for field in fields if field["name"]!=label_column and field["name"] in val_data_features.columns]]
for field in fields:
    if field["name"]!=label_column and field["name"] not in val_data_features.columns:
        val_data_features.loc[:, field["name"]] = 0

Perform predictions

In [5]:
import tensorflow as tf

trained_model = tf.keras.models.load_model('trained_model')
evaluation = trained_model.evaluate(np.asarray(val_data_features).astype('float32'), val_data_label, return_dict=True)



Save validation results

In [6]:
with open('metrics/validation_results.json', mode='w') as file:
    file.write(json.dumps(evaluation))
    file.close()