## Analyzing a scene

We are going to apply the different capabilities build in order to predict actors in a scene

### Importing libraries

In [None]:
import sys
import os
import json
import pandas as pd
import numpy as np
!pip install pandasql
from pandasql import sqldf
import matplotlib.pyplot as plt
from time import time, strftime, localtime, gmtime
sys.path.append('/opt/workspace/src/python_scripts/')
from ops_face_recognition import train_recognizer,get_probabilities_for_folder
from ops_files_operations import read_pickle_file, read_json_file, get_element_from_metadata
from ops_face_detection import process_video, get_video_embeddings,get_frame_from_video, get_embeddings_from_image
from ops_results_interpretation import get_frames_df, get_actors_probs_query, get_predicted_timeline, get_summarized_timeline

### Creating best recognizers

Based on the results obtained in Notebook "10_training_models"

In [None]:
embeddings_folder = './models/embeddings/actor_faces'
recognizer_path = './models/recognizers'

#### Linear recognizer

In [None]:
train_recognizer(
    embeddings_folder=embeddings_folder,
    model_id = None,
    C=1.0,
    kernel='linear',
    probability=True,
    save_to_pickle=True,
    output_folder=recognizer_path
)

#### RBF recognizer

In [None]:
train_recognizer(
    embeddings_folder=embeddings_folder,
    model_id = None,
    kernel='rbf',
    C=1.0,
    gamma=1.0,
    probability=True,
    save_to_pickle=True,
    output_folder=recognizer_path
)

#### Poly recognizer

In [None]:
train_recognizer(
    embeddings_folder=embeddings_folder,
    model_id = None,
    kernel='poly',
    C=1.0,
    degree=3,
    probability=True,
    save_to_pickle=True,
    output_folder=recognizer_path
)

#### Getting recognizers ids

In [None]:
recognizers_metadata_path = './models/recognizers/recognizer_metadata.json'
linear_recognizer_id = get_element_from_metadata(
    metadata_file_path=recognizers_metadata_path,
    key='kernel',
    value='linear',
    latest=True
)["recognizer_id"]
rbf_recognizer_id = get_element_from_metadata(
    metadata_file_path=recognizers_metadata_path,
    key='kernel',
    value='rbf',
    latest=True
)["recognizer_id"]
poly_recognizer_id = get_element_from_metadata(
    metadata_file_path=recognizers_metadata_path,
    key='kernel',
    value='poly',
    latest=True
)["recognizer_id"]
print(f'Linear = {linear_recognizer_id}, rbf = {rbf_recognizer_id}, poly = {poly_recognizer_id}.')

### Processing a scene

#### Obtaining scene embeddings

In [None]:
scene_path = './datasets/videos/the_final_kick.mp4'
output_path = './models/embeddings/processed_videos'
get_video_embeddings(
    video_path = scene_path,
    results_path = output_path,
    partitions=4, 
    desired_fps=4
)

#### Getting embeddings "processed_video_id"

In [None]:
probs_metadata_path = './models/embeddings/processed_videos/the_final_kick/processed_videos_metadata.json'
processed_video_id = get_element_from_metadata(
    metadata_file_path=probs_metadata_path,
    latest=True
)["processed_video_id"]
print(f'Processed video id = {processed_video_id}.')

#### Predicting results for that scene

In [None]:
scene_embeddings_path = './models/embeddings/processed_videos/the_final_kick'
results_folder = './models/results'
folder_probabilities, folder_probabilities_metadata = get_probabilities_for_folder(
    folder_path = scene_embeddings_path,
    recognizer_folder = recognizer_path,
    save_to_pickle = True,
    output_folder = results_folder,
    processed_video_id = processed_video_id,
    recognizer_id = linear_recognizer_id
)

In [None]:
results_id = folder_probabilities_metadata['results_id']
results_id

### Understanding results for one actor

The following is a walkthrough around the process to determine wether an actor is present or not in a specific timestamp of a video

#### Getting results dict

In [None]:
results_metadata_path = './models/results/probabilities_metadata.json'
processed_videos_metadata_path = './models/embeddings/processed_videos/the_final_kick/processed_videos_metadata.json'
recognizer_metadata_path = './models/recognizers/recognizer_metadata.json'

results_metadata = get_element_from_metadata(
    metadata_file_path=results_metadata_path,
    #key = 'results_id',
    #value = results_id,
    latest=True
)
pickle_path = results_metadata['pickle_path']
results = read_pickle_file(pickle_path)

processed_videos_metadata = get_element_from_metadata(
    metadata_file_path=processed_videos_metadata_path,
    key = 'processed_video_id',
    value = results_metadata['processed_video_id']
)
recognizer_metadata = get_element_from_metadata(
    metadata_file_path=recognizer_metadata_path,
    key = 'recognizer_id',
    value = results_metadata['recognizer_id']
)

This is how the results are stored:

In [None]:
results[0]["probabilities"][0:2]

#### Transforming dict into pd dataframe

In [None]:
full_probs = []
for chunk in results:
    for preds in chunk["probabilities"]:
        preds_dict= {}
        preds_dict["frame_number"] = preds["frame_number"]
        preds_dict["timestamp"] = preds["timestamp"]
        preds_dict["pred_1_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[0]
        preds_dict["pred_1_value"] = preds["predictions"][0][preds_dict["pred_1_name"]]
        preds_dict["pred_2_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[1]
        preds_dict["pred_2_value"] = preds["predictions"][0][preds_dict["pred_2_name"]]
        preds_dict["pred_3_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[2]
        preds_dict["pred_3_value"] = preds["predictions"][0][preds_dict["pred_3_name"]]
        preds_dict["pred_4_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[3]
        preds_dict["pred_4_value"] = preds["predictions"][0][preds_dict["pred_4_name"]]
        preds_dict["pred_5_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[4]
        preds_dict["pred_5_value"] = preds["predictions"][0][preds_dict["pred_5_name"]]
        preds_dict["pred_6_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[5]
        preds_dict["pred_6_value"] = preds["predictions"][0][preds_dict["pred_6_name"]]
        preds_dict["pred_7_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[6]
        preds_dict["pred_7_value"] = preds["predictions"][0][preds_dict["pred_7_name"]]
        preds_dict["pred_8_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[7]
        preds_dict["pred_8_value"] = preds["predictions"][0][preds_dict["pred_8_name"]]
        preds_dict["pred_9_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[8]
        preds_dict["pred_9_value"] = preds["predictions"][0][preds_dict["pred_9_name"]]
        preds_dict["pred_10_name"] = sorted(preds["predictions"][0], key=preds["predictions"][0].get, reverse=True)[9]
        preds_dict["pred_10_value"] = preds["predictions"][0][preds_dict["pred_10_name"]]
        full_probs.append(preds_dict)

results_df = pd.DataFrame(full_probs)

In [None]:
results_df[0:2]

#### Generating frames for the processed video

In [None]:
frames_df = get_frames_df(processed_videos_metadata)
frames_df

#### Getting probs by frame for a single actor

In [None]:
actor = '2037_cillian_murphy'
preds_dfs = []

for its in range(10):
    preds_dfs.append(sqldf(("""    
        select
            frame_number,
            coalesce(pred_{0}_value,0) as pred_{0}_value

        from frames_df
        left join results_df using(frame_number)
        where 
          (pred_{0}_name like '%{1}%' or pred_{0}_name is null)
    """).format(its+1,actor)))

q1 = ("""    
    select
        frame_number,
        coalesce(pred_1_value,0) as pred_1_value
        
    from frames_df
    left join results_df using(frame_number)
    where 
      (pred_1_name like '%{0}%' or pred_1_name is null)
""").format(actor)

q2 = ("""    
    select
        frame_number,
        coalesce(pred_2_value,0) as pred_2_value
        
    from frames_df
    left join results_df using(frame_number)
    where 
      (pred_2_name like '%{0}%' or pred_2_name is null)
""").format(actor)

q3 = get_actors_probs_query(actor)

actor_pred1 = sqldf(q1,globals())
actor_pred2 = sqldf(q2, globals())
actor_pred = sqldf(q3,globals())

#### Plotting the results

In [None]:
size = 7
f = plt.figure()
f.set_figwidth(15)
f.set_figheight(5)
plt.plot(actor_pred1['frame_number'], actor_pred1['pred_1_value'], label = "pred 1")#, s = size)
plt.plot(actor_pred2['frame_number'], actor_pred2['pred_2_value'], label = "pred 2")#, s = size)
#plt.plot(actor_pred['frame_number'], actor_pred['pred_value'], label = "overall pred")
#plt.xlim(400, 500) 
plt.xlim(900,1500) 
#plt.xlim(2050, 2100)
plt.legend()

In [None]:
size = 30
f = plt.figure()
f.set_figwidth(15)
f.set_figheight(5)
all_poss=['o','*','s','v','^','>','<','p','h','H','D','d','.','1','','']

allowed_preds = range(2)
plot_zone_limites = False
plot_vertical_zone_limits = False
plot_general_pred = True
plot_delimited_zones = True
plot_zone_metrics = True

for idx,dfs in enumerate(preds_dfs):
    if idx not in allowed_preds:
        break
    
    filtered_df = dfs[dfs[f'pred_{str(idx+1)}_value']!= 0]
    plt.scatter(
        filtered_df['frame_number'],
        filtered_df[f'pred_{str(idx+1)}_value'],
        label = f"Predicted as #{str(idx+1)}",
        marker=all_poss[idx],
        s = size)
    
    if plot_zone_limites: 
        for index, row in filtered_df.iterrows():
            plt.hlines(
                y=row[f'pred_{str(idx+1)}_value'],
                xmin=row['frame_number']-12,
                xmax=row['frame_number']+12,
                linewidth=1,
                color='gray'
            )
            if plot_vertical_zone_limits:
                plt.vlines(
                    x=row['frame_number']-12,
                    ymin=0,
                    ymax=0.53,
                    linewidth=1,
                    color='gray',
                    linestyle = 'dotted'
                )
                plt.vlines(
                    x=row['frame_number']+12,
                    ymin=0,
                    ymax=0.55,
                    linewidth=1,
                    color='gray',
                    linestyle = 'dotted'
                )


if plot_general_pred:
    plt.plot(
        actor_pred['frame_number'],
        actor_pred['pred_value'],
        label = "overall pred",
        color = 'gray',
        linestyle = 'dotted'
    )

if plot_delimited_zones:
    plt.text(
        1080,
        0.05,
        'Zone A',
        style='italic',
        bbox={
            'facecolor': 'white',
            'alpha': 0.9,
            'pad': 5
        })

    plt.fill_between(
            x= actor_pred['frame_number'], 
            y1= actor_pred['pred_value'], 
            where= (1000 < actor_pred['frame_number'])&(actor_pred['frame_number'] < 1200),
            hatch="//",
            color= "green",
            alpha= 0.2)
    
    plt.text(
        1357,
        0.05,
        'Zone B',
        style='italic',
        bbox={
            'facecolor': 'white',
            'alpha': 0.9,
            'pad': 5
        })

    plt.fill_between(
            x= actor_pred['frame_number'], 
            y1= actor_pred['pred_value'], 
            where= (1300 < actor_pred['frame_number'])&(actor_pred['frame_number'] < 1400),
            hatch="//",
            color= "blue",
            alpha= 0.2)
    
if plot_zone_metrics:
    plt.hlines(
        y=0.1,
        xmin=1020,
        xmax=1164,
        linewidth=2,
        color='red'
    )

    plt.text(
        1072,
        0.12,
        'Length: 37',
        fontsize = 10,
        color = 'white',
        bbox={
            'facecolor': 'red',
            'alpha': 1,
            'pad': 5
        })

    plt.annotate(
        'Max value: 51,1%',
        xy=(1044, 0.511),
        xytext=(970, 0.45),
        fontsize=10,
        color='white',
        bbox={
            'facecolor': 'red',
            'alpha': 1,
            'pad': 5
        },
        arrowprops=dict(
            facecolor='red',
            shrink=0.04
        ))
    
    plt.text(
        1072,
        0.25,
        'Sum: 15.0',
        fontsize = 10,
        color = 'white',
        bbox={
            'facecolor': 'red',
            'alpha': 1,
            'pad': 5
        })
    
    plt.hlines(
        y=0.1,
        xmin=1356,
        xmax=1380,
        linewidth=2,
        color='red'
    )
    
    plt.text(
        1357,
        0.115,
        'Length: 7',
        fontsize = 8,
        color='white',
        bbox={
            'facecolor': 'red',
            'alpha': 1,
            'pad': 2
        })
    
    plt.text(
        1357,
        0.15,
        'Sum: 1.85',
        fontsize = 8,
        color = 'white',
        bbox={
            'facecolor': 'red',
            'alpha': 1,
            'pad': 2
        })
    
    plt.annotate(
        'Max value: 32,2%',
        xy=(1368, 0.322),
        xytext=(1280, 0.25),
        fontsize=8,
        color='white',
        bbox={
            'facecolor': 'red',
            'alpha': 1,
            'pad': 5
        },
        arrowprops=dict(
            facecolor='red',
            shrink=0.04
        ))
    
plt.xlim(950,1450) 
plt.legend(loc='upper right')
plt.xlabel("Frame number")
plt.ylabel("Probability")

#### Checking a specific frame/timestamp

In [None]:
scene_path = './datasets/videos/the_final_kick.mp4'
frame = get_frame_from_video(video_path = scene_path,frame_number = 1050)#timestamp = '00:00:12.000')
embs, embs_metadata = get_embeddings_from_image(provided_image=frame, multiple_faces=True, display=True)

### Understanding results for all actors in a video

In [None]:
results_id = results_metadata['results_id']
predicted_tl = get_predicted_timeline(results_id,avoid_nulls = False, only_boundaries = False)
summarized_timeline = get_summarized_timeline(results_id)

In [None]:
summarized_timeline[0:8]

In [None]:
predicted_tl

In [None]:
length_threshold = 12
sum_pred_value_threshold = 4
max_pred_value_threshold = 0.6

final_timeline = sqldf("""
    select
        predicted_tl.actor,
        frame_number,
        timestamp,
        
        length,
        max_pred_value,
        sum_pred_value,
        
        case
            when length >= {0} or sum_pred_value >= {1} or max_pred_value >= {2} then pred_value
            else 0
        end as pred_value
        
    from predicted_tl 
    left join summarized_timeline on predicted_tl.actor = summarized_timeline.actor
        and predicted_tl.frame_number between summarized_timeline.frame_number_from and summarized_timeline.frame_number_to

""".format(length_threshold,sum_pred_value_threshold,max_pred_value_threshold))

f = plt.figure()
f.set_figwidth(15)
f.set_figheight(5)

for actor in pd.unique(final_timeline['actor']):
    plt.plot(final_timeline[final_timeline['actor'] == actor]['frame_number'], final_timeline[final_timeline['actor'] == actor]['pred_value'], label = actor)
#plt.xlim(400, 700) 
#plt.xlim(1000, 1200) 
#plt.xlim(1330, 1390)
#plt.xlim(300, 3000)
plt.legend()