<a href="https://colab.research.google.com/github/mabonmn/Google_Net-and-ResNet-Feature-Extractor/blob/main/Features_colab_P2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from torchvision import models
from torchvision import transforms
import cv2
import h5py
import numpy as np
import os
import time
from PIL import Image
from tqdm import tqdm
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
'''
Defining the Class FeaturesEXtractor
Methods:
     1._initialize_transformation:
         Define the image preprocessing. Resize, Centercrop, Convert to tensor followed by Normalization.
 
     2._initialize_major_model:
         Define GoogLeNet (import the model)
         Remove the last FC layer of the model to extract feature from Pool5 layer
         
     3._initialize_dataset:
         Create h5 file to store features of each video
         
     4._initialize_video_paths:
         Define each videos path
         
     5._is_filename_valid:
         Verify that vidoes are in the correct format '.mp4'
         
     6._get_video_name:
         Store the name of video
         
     7._extract_features:
         Function to extract feature from the the pool layer as determined from the model. 
         It is at this function where the image is preprocessed and the sent to the model for features extraction
         
     8._create_attributes:
         Define the attributes and the format of the h5 File.
         
    10.perform_extraction:
        Extract frames from vidoe and downsample to 1 frame per 15. This the calls the feature extraction function
        Data is then
    
        
'''

"\nDefining the Class FeaturesEXtractor\nMethods:\n     1._initialize_transformation:\n         Define the image preprocessing. Resize, Centercrop, Convert to tensor followed by Normalization.\n \n     2._initialize_major_model:\n         Define GoogLeNet (import the model)\n         Remove the last FC layer of the model to extract feature from Pool5 layer\n         \n     3._initialize_dataset:\n         Create h5 file to store features of each video\n         \n     4._initialize_video_paths:\n         Define each videos path\n         \n     5._is_filename_valid:\n         Verify that vidoes are in the correct format '.mp4'\n         \n     6._get_video_name:\n         Store the name of video\n         \n     7._extract_features:\n         Function to extract feature from the the pool layer as determined from the model. \n         It is at this function where the image is preprocessed and the sent to the model for features extraction\n         \n     8._create_attributes:\n         

In [None]:

class FeatureExtractor():
    
    #Initialising the class

    def __init__(self):
        self.preprocess = self._initialize_transformation()
        self.major_model = self._initialize_major_model()
        self.minor_model = self._initialize_minor_model()
        return


    # initialize the transformation function for preprocessing images
    def _initialize_transformation(self):
        transformation = transforms.Compose([transforms.Resize(256),
                                             transforms.CenterCrop(224),
                                             transforms.ToTensor(),
                                            transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])
        return transformation

    def _initialize_major_model(self):
        model = models.googlenet(pretrained = True)
        model = nn.Sequential(*list(model.children())[:-1])
        model.eval()
        if torch.cuda.is_available():
            model.to('cuda')
        return model


    def _initialize_minor_model(self):
        model = models.resnet152(pretrained=True)
        model = nn.Sequential(*list(model.children())[:-1])
        model.eval()
        if torch.cuda.is_available():
            model.to('cuda')
        return model


    def _initialize_dataset(self, save_path):
        dataset = h5py.File(save_path, 'w')
        return dataset

    def _initialize_video_paths(self, video_path):
        if os.path.isdir(video_path):
            video_paths = [f'{video_path}/{filename}' for filename in os.listdir(video_path) if self._is_filename_valid(filename)]
        else:
            video_paths = [video_path]
        return video_paths

    def _is_filename_valid(self, filename):
        return True if '.mp4' in filename else False

    def _get_video_name(self, video_path):
        return video_path.split('/')[-1].split('.')[0]


    # preprocess and extract the features from the image
    def _extract_features(self, image):
        image_tensor = self.preprocess(image)
        image_batch = image_tensor.unsqueeze(0) # create a mini-batch as expected by the model

        # move the input and model to GPU for speed if available
        if torch.cuda.is_available():
            image_batch = image_batch.to('cuda')
            self.model.to('cuda')

        with torch.no_grad():
            major_output = self.major_model(image_batch)
            minor_output = self.minor_model(image_batch)
        return major_output[0].cpu().view(-1).numpy(), minor_output[0].cpu().view(-1).numpy()


    def _create_attributes(self, video_name, fps, number_of_frames, major_features,minor_features):
        self.dataset.create_group(video_name)
        self.dataset[video_name]['name'] = video_name
        self.dataset[video_name]['fps'] = fps
        self.dataset[video_name]['number_of_frames'] = number_of_frames
        self.dataset[video_name]['number_of_downsampled_frames'] = len(major_features)
        self.dataset[video_name]['major_features'] = major_features                           # (number_of_downsampled_frames x feature_dimension)
        self.dataset[video_name]['minor_features'] = minor_features                           # (number_of_downsampled_frames x feature_dimension)

        return
    
    def _get_h5py_empty(self):
        return h5py.Empty(dtype = np.int64)


    def perform_extraction(self, save_path, video_path):
        self.dataset = self._initialize_dataset(save_path)
        self.video_paths = self._initialize_video_paths(video_path)


        for video_path in tqdm(self.video_paths):
            major_features = []
            minor_features = []
            video_capture = cv2.VideoCapture(video_path)

            video_name = self._get_video_name(video_path)
            fps = video_capture.get(cv2.CAP_PROP_FPS) #from opencv to find frames per sec
            number_of_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

            print(video_name)

            for index in range(number_of_frames):
                
                is_successful, frame = video_capture.read()
                frame=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                #Down sampled to 1 frame per 15.
                if index % 15 == 0:
                
                    major_feature,minor_feature = self._extract_features(Image.fromarray(frame))
                    '''
                    print("MAJOR FEATURES")
                    print(major_feature)
                    print("MINOR FEATURES")
                    print(minor_feature)
                    '''
                    major_features.append(major_feature)
                    minor_features.append(minor_feature)
                            
            video_capture.release()
            self._create_attributes(video_name, fps, number_of_frames, major_features,minor_features)
        return


    def get_dataset(self):
        return self.dataset
    

In [None]:
if __name__ == '__main__':  
    save_path = f'summe_gnet_{int(time.time())}.h5'
    #Dictorty Containing videos:
    video_directory = "/content/drive/MyDrive/Feature Extraction - Mabon/GoogLeNet_Features-P2"
    #Creating Object of the Features Exractor:
    feature_extractor = FeatureExtractor()
    feature_extractor.perform_extraction(save_path = save_path,
                                         video_path = video_directory)
    
    dataset = feature_extractor.get_dataset()

  0%|          | 0/1 [00:00<?, ?it/s]

1


100%|██████████| 1/1 [08:03<00:00, 483.82s/it]
