In [102]:
import os
import shutil
from tqdm import tqdm
from random import shuffle
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from glob import glob

import tensorflow as tf
from tensorflow.keras.models import load_model

import torch

In [103]:
detection_model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)
params_model = load_model('models/params.h5')
dist_model = load_model('models/dist.h5')

Using cache found in /Users/neironeiro/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-11-19 torch 1.9.0 CPU

Fusing layers... 
Model Summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


In [104]:
def get_detection_bbox(image):
    
    results = detection_model(image).pandas().xyxy[0]
    results = results[results.name == 'airplane']
    
    try:
        x1 = int(results.values[0][0])
        y1 = int(results.values[0][1])
        x2 = int(results.values[0][2])
        y2 = int(results.values[0][3])
        return x1, y1, x2, y2
    except:
        return np.nan, np.nan, np.nan, np.nan, 

In [105]:
def get_params(image, x1, y1, x2, y2):
    
#     X = cv2.cvtColor(cv2.imread(path_to_image), cv2.COLOR_BGR2RGB)
    X = image.copy()
    X = X[int(y1-y1*0.05):int(y2+y2*0.05), int(x1-x1*0.05):int(x2+x2*0.05), :]
    X = cv2.resize(X, (300, 150))
    X = X / 255.
                
    pred = params_model.predict(X[np.newaxis, ...])[0]
    
    return 28*pred[0]-14, 5*pred[1]-2, 66*pred[2]  #  tang, kren, course

In [106]:
def get_dist(image, x1, y1, x2, y2):
            
#     X = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB)
    X = image.copy()
    X2 = np.zeros(X.shape)
    X = X[int(y1-y1*0.05):int(y2+y2*0.05), int(x1-x1*0.05):int(x2+x2*0.05), :]
    X2[int(y1-y1*0.05):int(y2+y2*0.05), int(x1-x1*0.05):int(x2+x2*0.05), :] = X.copy()
    X = cv2.resize(X2, (300, 150)) / 255.
    
    return round(10000 * dist_model.predict(X[np.newaxis, ...])[0][0], 1)

In [107]:
def predict_all(image):
    
    x1, y1, x2, y2 = get_detection_bbox(image)
    
    dist = get_dist(image, x1, y1, x2, y2)
    
    tang, kren, course = get_params(image, x1, y1, x2, y2)
    
    x_c = x2 - ((x2 - x1) / 2)
    azim = 57.2958 * np.arctan((x_c - image.shape[1]/2) / dist)
    
    y_c = y2 - ((y2 - y1) / 2)
    terr_angle = 57.2958 * np.arctan((y_c - image.shape[0]/2) / dist)
    
    result = pd.DataFrame([[x1, y1, x2, y2, dist, tang, kren, course, azim, terr_angle]],
                          columns=['x1', 'y1', 'x2', 'y2', 'dist', 'tang', 'kren', 'course', 'azim', 'terr_angle'])
    return result

In [108]:
def predict_video(video_file):

    cap = cv2.VideoCapture(video_file)
    
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('result/{}'.format(video_file), fourcc, 25.0, (frame_width, frame_height))
    
    result_df = pd.DataFrame()
        
    while(cap.isOpened()):
        
        ret, frame = cap.read()
        
        if ret == True:
            
            pred_df = predict_all(frame)
            result_df = pd.concat([result_df, pred_df], ignore_index=True)
            x1, y1, x2, y2 = pred_df[['x1', 'y1', 'x2', 'y2']].values[0]
            
            frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (0,0,255))
            frame = cv2.putText(frame, 'DIST={}'.format(round(pred_df.dist.iloc[0], 3)), 
                                (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
            frame = cv2.putText(frame, 'TANG={}'.format(round(pred_df.tang.iloc[0], 3)), 
                                (30, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
            frame = cv2.putText(frame, 'KREN={}'.format(round(pred_df.kren.iloc[0], 3)), 
                                (30, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
            frame = cv2.putText(frame, 'COURSE={}'.format(round(pred_df.course.iloc[0], 3)), 
                                (30, 140), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
            frame = cv2.putText(frame, 'AZIM={}'.format(round(pred_df.azim.iloc[0], 3)), 
                                (30, 170), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
            frame = cv2.putText(frame, 'T_ANGLE={}'.format(round(pred_df.terr_angle.iloc[0], 3)), 
                                (30, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
            
            out.write(cv2.resize(frame, (1280, 720)))
        
        else:
            break
            
    cap.release()
    out.release()
    
    return result_df

In [109]:
%%time
result_data = predict_video('AirbusA330.mp4')

CPU times: user 38.1 s, sys: 7.95 s, total: 46.1 s
Wall time: 33.4 s
