# Defining Path


In [None]:
import sys

sys.path.append('..')
sys.path.append('../examples')

# Importing Libraries

In [None]:
# Pytorch 
import torch
import torchvision
# General Libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import re
import random

# System and Path Libraries
import os
from pathlib import Path

# Libraries for Tranforming into Yolov7 format data
from yolov7.dataset import Yolov7Dataset
from yolov7.dataset import create_yolov7_transforms
from yolov7.plotting import show_image
# Train Test Split
from sklearn.model_selection import train_test_split

# Libraries for Training:
from functools import partial
from func_to_script import script
from PIL import Image
from pytorch_accelerated.callbacks import (
    ModelEmaCallback,
    ProgressBarCallback,
    SaveBestModelCallback,
    get_default_callbacks,
    EarlyStoppingCallback
)
from pytorch_accelerated.schedulers import CosineLrScheduler
from torch.utils.data import Dataset
from yolov7 import create_yolov7_model
from yolov7.dataset import (
    Yolov7Dataset,
    create_base_transforms,
    create_yolov7_transforms,
    yolov7_collate_fn,
)
from yolov7.evaluation import CalculateMeanAveragePrecisionCallback
from yolov7.loss_factory import create_yolov7_loss
from yolov7.mosaic import MosaicMixupDataset, create_post_mosaic_transform
from yolov7.trainer import Yolov7Trainer, filter_eval_predictions
from yolov7.utils import SaveBatchesCallback, Yolov7ModelEma

######################
%load_ext autoreload
%autoreload 2

# Printing the versions
print(torch.__version__)
print(torchvision.__version__)

# Data Loading

First, let's take a look at how to load our dataset in the format that Yolov7 expects.

## Selecting a dataset

Throughout this article, we shall use the [Kaggle cars object detection dataset](https://www.kaggle.com/datasets/sshikamaru/car-object-detection); however, as our aim is to demonstrate how Yolov7 can be applied to any problem, this is really the least important part of this work. Additionally, as the images are quite similar to COCO, it will enable us to experiment with a pretrained model before we do any training.


In [7]:
from pathlib import Path
import os
import pandas as pd

In [8]:
# Defining the path for the video on which inference is needed
data_path = "C://Users//endo//Desktop//Yolov7-training-main//Yolov7-training-main//data//papilla"
video_path = data_path +"//"+ "cropped_ben_ami_exalt_1.mp4" # all_images

# Converting the Video into required format

In [9]:
# Import video
# Split it into frames and save the frames (give numbering to the frame sequentially)
# Build a dataframe, and sort it such that the rows are in increasing order of the frames
# Convert the datat into Dataset Adaptor
# Now transform the data into Yolov7 format
# Return the final list of tensors

In [10]:
import os
import random
from functools import partial
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from func_to_script import script
from PIL import Image
from pytorch_accelerated.callbacks import (
    EarlyStoppingCallback,
    SaveBestModelCallback,
    get_default_callbacks,
)
from pytorch_accelerated.schedulers import CosineLrScheduler
from torch.utils.data import Dataset

from yolov7 import create_yolov7_model
from yolov7.dataset import Yolov7Dataset, create_yolov7_transforms, yolov7_collate_fn
from yolov7.evaluation import CalculateMeanAveragePrecisionCallback
from yolov7.loss_factory import create_yolov7_loss
from yolov7.trainer import Yolov7Trainer, filter_eval_predictions

INFO:pytorch_accelerated:Setting random seeds


In [11]:
# Main code: Where the inference is ran
import cv2
import os
import numpy as np
import pandas as pd
from train_cars import CarsDatasetAdaptor
from yolov7.dataset import Yolov7Dataset
from yolov7.dataset import create_yolov7_transforms

def extract_frames_from_video(video_path, output_directory):
    
    ## Values for cropping: These values could be different if a different SUD is used (this is for Exalt D)
    xmin = 560  
    ymin = 60 
    xmax = 1680  
    ymax = 1027 
    
    my_frames_path= video_path.split(".")[-2].split("\\")[-1] + "_frames"
        # Check if the output directory exists
    if not os.path.exists(my_frames_path):
        # Create the output directory if it doesn't exist
        os.makedirs(my_frames_path)
        # Open the video file
        video = cv2.VideoCapture(video_path)

        # Initialize variables
        frame_count = 0
        success = True
        while success:
            # Read the next frame from the video
            success, frame = video.read()
            if success:
                ## Cropping the frame
                cropped_frame = frame[ymin:ymax, xmin:xmax]
                # Save the frame as an image file
                frame_path = os.path.join(my_frames_path, f"frame_{frame_count}.jpg")
                cv2.imwrite(frame_path, cropped_frame)
    #             print(f"Frame {frame_count} saved.")
                frame_count += 1
        # Release the video file
        video.release()
    else:
        print(f"Output directory '{my_frames_path}' already exists.")
    
    # Creating dataframe
    my_list= os.listdir(my_frames_path)
#     print(my_list)
    i=0
    ## Creating dataframe    
    my_df= {'image':[], 'xmin':[],'ymin':[], 'xmax':[], 'ymax':[], 'class_name':[], 'has_annotation':[], 'image_id':[], 'class_id':[]}

    for image_name in my_list:
#         print(image_name)
        i=i+1
        my_df['image'].append(image_name)
        my_df['class_name'].append('doesnt_matter')
        my_df['class_id'].append(np.nan)
        my_df['image_id'].append(i)
        my_df['has_annotation'].append(False)
        my_df['xmin'].append(np.nan)
        my_df['ymin'].append(np.nan)
        my_df['xmax'].append(np.nan)
        my_df['ymax'].append(np.nan)
        
        
#     print(my_df)
    ### Getting the corresponding bbox values from annotations based on the id from data['images]:
    # Building dataframe 
    my_final_df=  pd.DataFrame(my_df)
    my_final_df['image_id']= my_final_df['image'].apply(lambda x: int(x.split('.')[0].split('_')[1]))
    # Sorting the dataframe: this is becasue we want the video frame in sequential order
    final_df = my_final_df.sort_values(by='image_id').reset_index().drop(columns= 'index')
    display(final_df)
    
    # Converting dataframe to dataset adaptor
    my_test_ds= CarsDatasetAdaptor(my_frames_path, final_df)
    # Converting into yolov7 format (tranforming)
    target_image_size= 640
    my_test_yds= Yolov7Dataset(my_test_ds, transforms=create_yolov7_transforms(image_size=(target_image_size, target_image_size)))
    return my_test_yds


In [12]:
# Specify the path to your MP4 file
my_video_path = video_path
# Specify the output directory where frames will be saved
output_directory = data_path
# Call the function to extract frames
yolo_format_data= extract_frames_from_video(video_path, output_directory)

Unnamed: 0,image,xmin,ymin,xmax,ymax,class_name,has_annotation,image_id,class_id
0,frame_0.jpg,,,,,doesnt_matter,False,0,
1,frame_1.jpg,,,,,doesnt_matter,False,1,
2,frame_2.jpg,,,,,doesnt_matter,False,2,
3,frame_3.jpg,,,,,doesnt_matter,False,3,
4,frame_4.jpg,,,,,doesnt_matter,False,4,
...,...,...,...,...,...,...,...,...,...
1522,frame_1522.jpg,,,,,doesnt_matter,False,1522,
1523,frame_1523.jpg,,,,,doesnt_matter,False,1523,
1524,frame_1524.jpg,,,,,doesnt_matter,False,1524,
1525,frame_1525.jpg,,,,,doesnt_matter,False,1525,


In [13]:
# Defining model 
best_model = create_yolov7_model('yolov7', num_classes=1)
best_model.eval();

Transferred 555/566 items from https://github.com/Chris-hughes10/Yolov7-training/releases/download/0.1.0/yolov7_training_state_dict.pt


In [14]:
## Loading Weights
# Change the pt file (weights file) here
best_model_path= 'C:\\Users\\endo\\Desktop\\Yolov7-training-main\\Yolov7-training-main\\examples\\v7_annotations_finetune.pt'
checkpoint = torch.load(best_model_path)
state_dict = checkpoint['model_state_dict']
best_model.load_state_dict(state_dict) # Loading the weights

<All keys matched successfully>

## Running inference on Video:

In [15]:
#List of image tensors: DOing this to store all the image which are in the form of - cont
#tensors into a list sequrntially and the passing each tensor to the model to obtain ouptut (inferencd)
image_tensor_collection= []
for i in range(len(yolo_format_data)):
    image_tensor, labels, image_id, image_size = yolo_format_data[i]
    image_tensor_collection.append(image_tensor)

In [16]:
%cd "C:\Users\endo\Desktop\Yolov7-training-main\Yolov7-training-main\examples\"

C:\Users\endo\Desktop\Yolov7-training-main\Yolov7-training-main\examples


In [17]:
import cv2
import torch

# Set up video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

## CHange the video output name here 
output_inference_file_name= "output__cropped_ben_ami_exalt_1_finetune.mp4"
out = cv2.VideoWriter(output_inference_file_name, fourcc, 30.0, (640, 640)) # Set the name of the output file 

# Loop over the image tensors
for image_tensor in image_tensor_collection:
    # Perform inference on the image tensor
    with torch.no_grad():
        model_outputs = best_model(image_tensor[None])
        # Postprocess the output to get the predictions
        preds = best_model.postprocess(model_outputs, conf_thres=0., multiple_labels_per_box=False)

    # Filter the predictions using NMS and a confidence threshold
    nms_predictions = filter_eval_predictions(preds, confidence_threshold=0.1)
    
    # In some cases we can have multiple bounding boxes even after filtering which are above the set confidance threshold
    # So in that case choosing only one bounding box from them
    if len(nms_predictions[0])>1:
        nms_predictions= [nms_predictions[0][1].reshape(1,6)]
        
    # Get the predicted boxes
    pred_boxes = nms_predictions[0][:, :4].cpu().numpy()

    # Load the image as a NumPy array
    img = image_tensor.permute(1, 2, 0).cpu().numpy()

    # Scale pixel values to range of 0 to 255
    img = (img * 255).astype(np.uint8)# This was an important step, because all pixel values 
    #in the image_tensor were normalized so we need to scale it up first and then convert the format into uint8 
    #(uint8 format is important for the writing into video file)
    
    # Resizing just to make sure
    img= cv2.resize(img, (640, 640))

    # Convert color space to BGR
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # Draw the predicted boxes on the image
    for box in pred_boxes:
        xmin, ymin, xmax, ymax = box.astype(int)
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

    ## Visualization
#     window_name = 'image'
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     # Show the image with the predicted boxes
#     cv2.imshow(window_name, img)
#     cv2.waitKey(0)

    # Write frame to video
    out.write(img)


# Release the video writer and close all windows
out.release()
cv2.destroyAllWindows()


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
