# Assignment 4: Search Objects In A Video Footage - kelvin.ahiakpor & emmanuel.acquaye

## Computer Vision

### Imports

In [1]:
import os
import cv2
import time
import threading
import numpy as np
import pandas as pd
import tensorflow as tf
import ipywidgets as widgets  
import matplotlib.pyplot as plt

from tqdm import tqdm
from tensorflow.keras.preprocessing import image
from IPython.display import display, clear_output, Javascript
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input,decode_predictions

### Setting job timeout for computation

In [2]:
os.environ['JOBLIB_START_METHOD'] = 'loky'
os.environ['JOBLIB_TIMEOUT'] = '300'

### Custom recipes for DataFrame inspection
Object predictions will be stored in a dataframe so it can be displayed in a friendly manner in this notebook

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.precision', 3) 

# Task 1
Use a pretrained Google Inception V3 deep learning model

Downloading the pretrained weights from the ImageNet dataset which for our image classifcation purposes

In [4]:
model = InceptionV3(weights='imagenet')

**Defining a function to preprocess images for InceptionV3**

In [5]:
def preprocess_frame(frame_path):
    img = image.load_img(frame_path, target_size=(299, 299))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)  # Add batch dimension
    x = preprocess_input(x)
    return x

**Defining a function to get top 5 predictions for each frame**

In [6]:
def get_predictions(frame_path):
    x = preprocess_frame(frame_path)
    preds = model.predict(x)
    return decode_predictions(preds, top=5)[0]

## Task 2 
Ensure the implementation is general enough to accept any user-provided video

**Accepting all video formats**  

In [7]:
accepted_formats = 'video/*'

## Task 3
Allow users to upload videos through Colab or any other method  

Here we define **3** sets of functions to allow uploads whether this notebook is run in Colab or Jupyter  
**1** The first takes user input and runs the desired upload method  
**2** The second set of functions handles uploads from Google Colab  
**3** The third set handles uploads from Jupyter   

Eventually we deploy in Streamlit where we can have even more upload generalization capabilities  
**Note:** The technique used is handling imports in the function to dynamically import modules based on dependencies determined by runtime conditions

**Setting a video size threshold of 100MB**

In [8]:
memory_size_threshold = 100 * 1024 * 1024  # 100 MB

**Global variables**  
Dictionary of upload information   
Video path  
Upload Widget
Upload Status

In [9]:
upload_info = None
video_path = None

In [10]:
upload_widget = widgets.FileUpload(accept=accepted_formats, multiple=False)
upload_button = widgets.Button(description="Process Upload")

In [11]:
upload_completed = False

**Function Set 1**

In [12]:
def select_run_choice():
    global video_path
    while True:
        choice = input("Enter '1' for Google Colab upload or '2' for Jupyter upload." + 
                       " 2 is recommended however as it works across platforms : ")
        if choice == '1':
            clear_output(wait=False)
            video_path = upload_files_colab()  # Make sure video_path is updated
            break
        elif choice == '2':
            clear_output(wait=False)
            show_upload_widget() 
            break
        else:
            print("Invalid choice. Please enter '1' or '2'.")

**Function Set 2 (Google Colab)**

In [13]:
def handle_file_upload_colab(uploaded_files, memory_size_threshold):
    global video_path
    for filename in uploaded_files.keys():
        file_size = len(uploaded_files[filename])
        if file_size > memory_size_threshold:
            print(f'The file "{filename}" exceeds the memory size threshold of {memory_size_threshold / (1024 * 1024)} MB.')
            return None
        else:
            video_path = list(uploaded.keys())[0]
            print(f'User uploaded file "{filename}" with size {file_size} bytes')
            #save_file_colab(filename, uploaded_files[filename])
            return filename  # Return the filename for further processing

In [14]:
def upload_files_colab():
    from google.colab import files  # Import inside the function to avoid platform crash
    uploaded = files.upload()
    filename = handle_file_upload_colab(uploaded, memory_size_threshold)
    return filename

**Function Set 3 (Jupyter)**

In [15]:
def validate_file_upload(uploaded_files, memory_size_threshold):
    for filename, file_info in uploaded_files.items():
        file_size = file_info['metadata']['size']
        if file_size > memory_size_threshold:
            print(f'The file "{filename}" exceeds the memory size threshold of {memory_size_threshold / (1024 * 1024)} MB.')
            return None  # Indicates that the file is too large
        else:
            print(f'User uploaded file "{filename}" with size {file_size} bytes')
            save_file_jupyter(filename, file_info['content'])
            return filename  # Indicates that the file is within the limit

In [16]:
def save_file_jupyter(filename, file_content):
    with open(filename, 'wb') as f:
        f.write(file_content)
    print(f'File "{filename}" has been saved.')

In [17]:
def process_files(button):
    global upload_completed
    global video_path
    clear_output(wait=False)
    uploaded_files = upload_widget.value
    # Handle file validation and save
    video_path = validate_file_upload(uploaded_files, memory_size_threshold)
    if video_path:
        print(f"Video path set to: {video_path}")
        upload_completed = True  # Mark the upload as complete
    else:
        print("No valid video file uploaded.")
    
    if upload_completed:
        print()
        print("Upload complete. You can now proceed to the run the next cells.")

In [18]:
def show_upload_widget():
    global upload_button
    upload_button = widgets.Button(description="Upload Video")
    upload_button.on_click(process_files)
    
    # Display the widgets
    display(upload_widget)
    display(upload_button)
    
    print("Please upload a file and then click the 'Upload Video' button.")
    wait_for_video()

In [19]:
def wait_for_video():
    print("Waiting for file upload...")
    print("You have 15 seconds to upload your video")
    print("If you have already uploaded and clicked 'Upload Video' please be patient.")
    print()
    count = 1
    while not upload_completed:
        time.sleep(3)  # Check every 5 seconds
        print(f"{15 - (count*3)} seconds left...")
        count+=1
        if count == 5:
            raiseBreak()
    if video_path:
        print("File upload detected. Proceeding with processing...")

**Video upload happens here**

A threading mechanism to wait for input, stop and let user manually continue cell execution

In [20]:
class VideoUploadBreak(Exception):
    def __init__(self, message="""The program has halted here to take video,  
    No video detected yet.
    Did you click 'Upload Video'?
    You can still go ahead even though the given time elapsed
    If you're using Jupyter, clicking the button should fix the issue.
    If you're using Google Colab, run this cell again after clicking the button`"""):
        self.message = message
        super().__init__(self.message)
    
    def __str__(self):
        return self.message

In [21]:
def raiseBreak():
    raise VideoUploadBreak()

Now we can allow user to upload video

**Allowing user to upload video**

In [22]:
select_run_choice()

User uploaded file "truck.MOV" with size 8881893 bytes
File "truck.MOV" has been saved.
Video path set to: truck.MOV

Upload complete. You can now proceed to the run the next cells.


## Task 4
Split the uploaded video into frames and feed the frames into the Google Inception V3 model to detect objects

In [None]:
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    vidcap = cv2.VideoCapture(video_path)
    count = 0
    success, image = vidcap.read()
    while success:
        frame_path = os.path.join(output_folder, f"frame{count}.jpg")
        cv2.imwrite(frame_path, image)
        success, image = vidcap.read()
        count += 1
    return count

**Defining a function to clear output folder**  
We need to use this whenever user uploads a new video

In [None]:
def clear_output_folder(output_folder):
    if os.path.exists(output_folder):
        for file in os.listdir(output_folder):
            file_path = os.path.join(output_folder, file)
            if os.path.isfile(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                os.rmdir(file_path)

**Extracting frames from the video and storing in a local output folder**  
Here we clear output folder before writing the frames of the new videe so the user's search only applies to the current video

In [None]:
output_folder = 'frames'
clear_output_folder(output_folder) #clearing output folder
frame_count = extract_frames(video_path, output_folder)
print(f'Extracted {frame_count} frames from the video.')

**Getting predictions**  
Here we pass the frames into InceptionV3 and store its prediction for each frame in a dataframe 

In [None]:
predictions_list = []

# processing each frame and get predictions
for i in tqdm(range(frame_count)):
    frame_path = os.path.join(output_folder, f"frame{i}.jpg")
    predictions = get_predictions(frame_path)
    for pred in predictions:
        code, object_name, score = pred
        predictions_list.append([frame_path, code, object_name, score])

In [None]:
predictions_dict = {}
for i in tqdm(range(frame_count)):
    frame_path = os.path.join(output_folder, f"frame{i}.jpg")
    predictions = get_predictions(frame_path)
    objects = [pred[1] for pred in predictions]
    scores = [pred[2] for pred in predictions]
    predictions_dict[frame_path] = (objects, scores)

Here we store the predictions in a dataframe

In [None]:
object_predictions = pd.DataFrame(predictions_list, columns=['Frame', 'Class Code', 'Object', 'Score'])

In [None]:
object_predictions_summarized = pd.DataFrame([(frame, ', '.join(objs), ', '.join(map(str, scores))) 
                                   for frame, (objs, scores) in predictions_dict.items()],
                                  columns=['Frame', 'Objects', 'Scores'])

**Friendly displays of object predictions**

In [None]:
object_predictions_summarized.head(5)

In [None]:
object_predictions.head(10)

## Task 5
Allow users to type a search query for an object that might be in the uploaded video

In [None]:
def search_for_object(df, search_query):
    search_query = search_query.lower()
    results = df[df['Objects'].apply(lambda x: any(search_query in obj.lower() for obj in x.split(', ')))]
    return results

In [None]:
search_query = input("Enter the object you want to search for: ").strip().lower()

## Task 6
The application should return and display the frame(s) with the object searched by the user, if it exists. If the object doesn't exist, display an error message: "Object doesn't exist!!!".

**Defining a function to display a frame using matplotlib**

In [None]:
def display_frame(frame_path):
    img = cv2.imread(frame_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    plt.imshow(img)
    plt.axis('off')
    plt.show()

**Searching for objects**

In [None]:
search_results = search_for_object(object_predictions_summarized, search_query)

**Displaying them them if they exist**

In [None]:
if not search_results.empty:
    print(f'Found {search_query} in {len(search_results)} frames.')
    print(f'The frames will be displayed below')
    for _, row in search_results.iterrows():
        frame_path = row['Frame']
        display_frame(frame_path)
else:
    print("Object doesn't exist!!!")

# References

**Bibliography**  
[1]Snowflake Inc. 2024. Connect Streamlit to Google Cloud Storage - Streamlit Docs. docs.streamlit.io. Retrieved July 19, 2024 from https://docs.streamlit.io/develop/tutorials/databases/gcs  
[2]Keras Team. 2024. Keras documentation: InceptionV3. keras.io. Retrieved July 19, 2024 from https://keras.io/api/applications/inceptionv3/  
[3]TensorFlow. 2024. Load video data | TensorFlow Core. TensorFlow. Retrieved July 19, 2024 from https://www.tensorflow.org/tutorials/load_data/video#create_frames_from_each_video_file  
[4]TensorFlow. 2024. tf.keras.applications.inception_v3.decode_predictions | TensorFlow v2.16.1. TensorFlow. Retrieved July 19, 2024 from https://www.tensorflow.org/api_docs/python/tf/keras/applications/inception_v3/decode_predictions