# Part III: Simulate batch / offline inferencing against a directory of data

Now that we have two models (a trained one and a tuned one), we can 
simulate an offline batch inference against 50 random images in our processed/real data and compare the output. This may help us eye ball the output in bulk to see the different in model perfomance.

In [None]:
import os

%env SCRATCH=../scratch
! [ -e "${SCRATCH}" ] || mkdir -p "${SCRATCH}"

scratch_path = os.environ.get('SCRATCH', './scratch')

In [None]:
%%writefile ../src/batch_inference_images.py

import os
import tensorflow as tf
import keras
import numpy as np
import pandas as pd

def predict_images(model_version, image_count):
    # Set scratch directory
    scratch_path = os.environ.get('SCRATCH', './scratch')
    
    # Load the model
    model_path = scratch_path + "/models/hand/" + model_version + "/model.savedmodel"
    model = keras.models.load_model(model_path)

    # Directory containing the images for prediction
    samples_path = scratch_path + '/processed/real'  # Replace with your image directory path

    # List of image file names
    image_files = os.listdir(samples_path)

    # Select 50 random images (or you can select all if less than 50)
    selected_images = image_files[:image_count] if len(image_files) > image_count else image_files

    # List to store predictions
    predictions = []

    # Iterate through each selected image
    for image_file in selected_images:
        img_path = os.path.join(samples_path, image_file)

        # Load image using tf.keras.utils.load_img
        img = tf.keras.utils.load_img(
            img_path,
            color_mode="grayscale",
            target_size=(96, 96),
            # interpolation='nearest',  # You can uncomment and adjust this option if needed
        )

        # Convert PIL image to numpy array
        img_array = tf.keras.utils.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Perform predictions using your model
        predictions_arr = model.predict(img_array)
        score = tf.nn.softmax(predictions_arr[0])

        # Define your class mapping
        class_mapping = {0: 'left', 1: 'right'}  # Update this mapping based on your classes
        predicted_label = class_mapping[np.argmax(score)]
        confidence = 100 * np.max(score)

        # Check if the predicted label is in the image file name
        is_label_in_image_name = predicted_label.lower() in image_file.lower()

        # Append the prediction details to the list
        predictions.append({
            'Image': image_file,
            'Predicted_Label': predicted_label,
            'Confidence': confidence,
            'Is_Label_In_Image_Name': is_label_in_image_name

        })

        print(f"This image {image_file} is predicted as {predicted_label} with a {confidence:.2f}% confidence.")

    # Perform any further processing with the predictions list (e.g., save to CSV)
    # Convert predictions to a DataFrame or save as needed


    # Convert predictions to a DataFrame
    predictions_df = pd.DataFrame(predictions)

    # Save predictions to a CSV file with the model version in the filename
    filename = f"{scratch_path}/model_v{model_version}_predictions.csv"
    predictions_df.to_csv(filename, index=False)
    print(f"Predictions saved to {filename}")

In [None]:
# load the function in the notebook
%run ../src/batch_inference_images.py

In [None]:
# Call the function with the desired model version and number of images to inference against
predict_images("1", 50)  # Replace with your model version and number of images (there are 6000 and that can take a while.)

In [None]:
# Call the function with the desired model version and number of images to inference against
predict_images("2", 50)  # Replace with your model version and number of images (there are 6000 and that can take a while.)