# Example: Predict body part positions from an MP4 file
This notebook presents an example pipeline for applying a trained LEAP network to ~360k frames read from an MP4 video.

You can download the data to reproduce the benchmarking results below.

**Input data:** [072212_163153.mp4](https://1drv.ms/v/s!AnmpIqqfwz3zgcgekCxNp-MN76p1UQ) (254 MiB)

**Output data:** [072212_163153.preds.h5](https://1drv.ms/u/s!AnmpIqqfwz3zgcgdDhQrKRsBaxvCXQ) (46.9 MiB)

The trained network can be found in the repository [models folder](https://github.com/talmo/leap/tree/master/models/BermanFlies/FlyAging-DiegoCNN_v1.0_filters%3D64_rot%3D15_lrfactor%3D0.1_lrmindelta%3D1e-05_01).

In [1]:
# Yuck -- hackish relative importing:
import os, sys
print(os.getcwd())
if os.path.exists(os.path.join(os.getcwd(), "leap", "predict_box.py")):
    leap_dir = os.path.join(os.getcwd())
elif os.path.exists(os.path.join(os.path.dirname(os.getcwd()), "leap", "predict_box.py")):
    leap_dir = os.path.join(os.path.dirname(os.getcwd()))
# leap_dir = ".." # replace this with the absolute path if imports are not working
sys.path.append(leap_dir) # add path to repository root

import numpy as np
import cv2
import h5py
from time import time

import keras
import keras.models
from leap.predict_box import convert_to_peak_outputs
from leap.utils import versions

versions(list_devices=True)

D:\OneDrive\code\leap\examples


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Platform: Windows-10-10.0.16299-SP0
h5py:
Summary of the h5py configuration
---------------------------------

h5py    2.7.1
HDF5    1.10.1
Python  3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 10:22:32) [MSC v.1900 64 bit (AMD64)]
sys.platform    win32
sys.maxsize     9223372036854775807
numpy   1.14.1

Keras: 2.1.4
Tensorflow: 1.5.0
Devices:
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8438345468058077117
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9143884186
locality {
  bus_id: 1
}
incarnation: 3667598826921976706
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 9143884186
locality {
  bus_id: 1
}
incarnation: 1568816340487803239
physical_device_desc: "device: 1, name: GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1"
]


# Parameters

In [2]:
# Media file path
video_path = "D:/tmp/072212_163153.mp4"

# Trained network path
model_path = "D:/OneDrive/code/leap/models/BermanFlies/FlyAging-DiegoCNN_v1.0_filters=64_rot=15_lrfactor=0.1_lrmindelta=1e-05_01/final_model.h5"

# Predictions output path
save_path = "D:/tmp/072212_163153.preds.h5"

# Number of frames to read before predicting (higher = faster, but limited by RAM)
chunk_size = 10000

# Number of frames to evaluate at once on the GPU (higher = faster, but limited by GPU memory)
batch_size = 64

# Processing

In [3]:
t0_all = time()

# Load model and convert to peak-coordinate output
model = convert_to_peak_outputs(keras.models.load_model(model_path))
print("Model:", model_path)
print("    Input:", str(model.input_shape))
print("    Output:", str(model.output_shape))

# model = keras.utils.multi_gpu_model(model, gpus=2)

# Open video for reading
reader = cv2.VideoCapture(video_path)
num_samples = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialize
positions_pred = []
conf_pred = []
buffer = []
samples_predicted = 0
reading_runtime = 0
prediction_runtime = 0
done = False

# Process video chunk-by-chunk
while not done:
    t0_reading = time()
    # Read and finish if no frame was retrieved
    returned_frame, I = reader.read()
    done = not returned_frame
    reading_runtime += time() - t0_reading
    
    # Add current frame to buffer
    if not done:
        buffer.append(I[...,0])
    
    # Do we have anything to predict?
    if len(buffer) >= chunk_size or (done and len(buffer) > 0):
        t0_prediction = time()
        
        # Predict on buffer
        Y = model.predict(np.stack(buffer, axis=0)[...,None], batch_size=batch_size)
        
        # Save
        positions_pred.append(Y[:,:2,:].astype("int32"))
        conf_pred.append(Y[:,2,:].squeeze())
        
        # Empty out buffer container
        buffer = []
        
        # Performance stats
        samples_predicted += len(Y)
        prediction_runtime += time() - t0_prediction
        elapsed = time() - t0_all
        fps = samples_predicted / elapsed
        print("Predicted: %d/%d frames | Elapsed: %.1f min / %.1f FPS / ETA: %.1f min" %
              (samples_predicted, num_samples, elapsed / 60, fps, (num_samples - samples_predicted) / fps / 60))
        
# Close video reader
reader.release()

# Merge arrays
positions_pred = np.concatenate(positions_pred, axis=0)
conf_pred = np.concatenate(conf_pred, axis=0)

# Report performance stats
print("Finished predicting %d frames." % samples_predicted)
print("    Prediction | Runtime: %.2f min / %.3f FPS" % (prediction_runtime / 60, samples_predicted / prediction_runtime))
print("    Reading    | Runtime: %.2f min / %.3f FPS" % (reading_runtime / 60, samples_predicted / reading_runtime))

# Save
if os.path.exists(save_path):
    os.remove(save_path)
with h5py.File(save_path, "w") as f:
        f.attrs["num_samples"] = num_samples
        f.attrs["video_path"] = video_path
        f.attrs["model_path"] = model_path

        ds_pos = f.create_dataset("positions_pred", data=positions_pred, compression="gzip", compression_opts=1)
        ds_pos.attrs["description"] = "coordinate of peak at each sample"
        ds_pos.attrs["dims"] = "(sample, [x, y], joint) === (sample, [column, row], joint)"

        ds_conf = f.create_dataset("conf_pred", data=conf_pred, compression="gzip", compression_opts=1)
        ds_conf.attrs["description"] = "confidence map value in [0, 1.0] at peak"
        ds_conf.attrs["dims"] = "(sample, joint)"

        total_runtime = time() - t0_all
        f.attrs["reading_runtime_secs"] = reading_runtime
        f.attrs["prediction_runtime_secs"] = prediction_runtime
        f.attrs["total_runtime_secs"] = total_runtime
        
    
print("Saved:", save_path)

print("Total runtime: %.1f mins" % (total_runtime / 60))
print("Total performance: %.3f FPS" % (samples_predicted / total_runtime))

Model: D:/OneDrive/code/leap/models/BermanFlies/FlyAging-DiegoCNN_v1.0_filters=64_rot=15_lrfactor=0.1_lrmindelta=1e-05_01/final_model.h5
    Input: (None, 192, 192, 1)
    Output: (None, 3, 32)
Predicted: 10000/361000 frames | Elapsed: 0.7 min / 231.6 FPS / ETA: 25.3 min
Predicted: 20000/361000 frames | Elapsed: 1.4 min / 245.6 FPS / ETA: 23.1 min
Predicted: 30000/361000 frames | Elapsed: 2.0 min / 250.4 FPS / ETA: 22.0 min
Predicted: 40000/361000 frames | Elapsed: 2.6 min / 253.0 FPS / ETA: 21.1 min
Predicted: 50000/361000 frames | Elapsed: 3.3 min / 254.4 FPS / ETA: 20.4 min
Predicted: 60000/361000 frames | Elapsed: 3.9 min / 255.5 FPS / ETA: 19.6 min
Predicted: 70000/361000 frames | Elapsed: 4.6 min / 256.2 FPS / ETA: 18.9 min
Predicted: 80000/361000 frames | Elapsed: 5.2 min / 257.3 FPS / ETA: 18.2 min
Predicted: 90000/361000 frames | Elapsed: 5.8 min / 258.3 FPS / ETA: 17.5 min
Predicted: 100000/361000 frames | Elapsed: 6.4 min / 259.2 FPS / ETA: 16.8 min
Predicted: 110000/361000 