In [1]:
import os
from ensure import ensure_annotations

In [2]:
os.chdir("../")

In [3]:
!pwd

/Users/ngkuissi/Dev/Image_Search_Engine


In [4]:
from dataclasses import dataclass
from pathlib import Path
from typing import List

@dataclass(frozen=True)
class FeatureRetrivalConfig:
    root_dir: Path
    data_path: Path
    feature_dir: Path
    image_path_list_dir: Path
    image_labels: Path
    include_top: bool
    pooling: str
    input_shape: List
    target_size: List

In [5]:
from imageSearchEngine.constants import *
from imageSearchEngine.utils.file_helpers import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    @ensure_annotations
    def __init__(
        self,
        config_filepath:Path = CONFIG_FILE_PATH,
        params_filepath:Path = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    @ensure_annotations
    def get_feature_retrival_config(self) -> FeatureRetrivalConfig:
        config = self.config.feature_representation
        params = self.params.feature_representation
        create_directories([config.root_dir])

        feature_retrival_config = FeatureRetrivalConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            feature_dir = config.feature_dir,
            image_path_list_dir= config.image_path_list_dir,
            image_labels= config.image_labels,
            include_top= params.include_top,
            pooling = params.pooling,
            input_shape= params.input_shape,
            target_size= params.target_size

        )
        return feature_retrival_config

In [7]:
import os
from tqdm import tqdm
import pickle
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow .keras.applications import resnet50
import numpy as np
from imageSearchEngine.logging.logger import log
from imageSearchEngine.exception import CustomException

In [10]:
class FeatureRetrival:

    def __init__(self, config: FeatureRetrivalConfig):
        self.config = config
    
    def retrive_labels(self):
        labels = {}
        for label in os.listdir(self.config.data_path):
            image_path = os.path.join(self.config.data_path, f'{label}/images')
            labels[label] = os.listdir(image_path)
        log.info('all the labels and images were retrived')
        pickle.dump(labels, open(self.config.image_labels, 'wb'))
        log.info(f'all the labels were saved to {self.config.image_labels}')

    def retrive_path(self) -> List:
        image_path_list = []
        for label in os.listdir(self.config.data_path):
            image_path = os.path.join(self.config.data_path, f'{label}/images')
            for image in os.listdir(image_path):
                image_path_list.append(os.path.join(image_path, image))
        log.info('all the image_path were retrived')
        with open(self.config.image_path_list_dir, 'wb') as file:
            pickle.dump(image_path_list, file)
        log.info('all image paths were saved')
        return image_path_list
    
    def retrive_embedding(self):
        model = resnet50.ResNet50(
            include_top=self.config.include_top,
            input_shape=self.config.input_shape,
            pooling=self.config.pooling
        )
        log.info('got resnet50 model from tensorflow')
        image_path_list = self.retrive_path()
        image_feature = []
        for image_path in tqdm(image_path_list):
            image = load_img(image_path, target_size=(224, 224))
            image = img_to_array(image)
            image = np.expand_dims(image, axis=0)
            preprocess_image = resnet50.preprocess_input(image)
            feature = model.predict(preprocess_image, verbose=0).flatten()
            image_feature.append(feature)
        log.info('all the features were loaded')
        pickle.dump(image_feature, open(self.config.feature_dir, 'wb'))
        log.info('all the feauture representaion where saved')
        

In [11]:
try:
    config = ConfigurationManager()
    feature_retrival_config = config.get_feature_retrival_config()
    feature_retrival = FeatureRetrival(config=feature_retrival_config)
    feature_retrival.retrive_labels()
    feature_retrival.retrive_embedding()
except Exception as e:
    raise CustomException(e)

[2023-09-11 18:11:35,898: INFO: file_helpers: yaml file: config/config.yaml loaded successfully]
[2023-09-11 18:11:35,899: INFO: file_helpers: yaml file: params.yaml loaded successfully]
[2023-09-11 18:11:35,899: INFO: file_helpers: created directory at: artifacts]
[2023-09-11 18:11:35,900: INFO: file_helpers: created directory at: artifacts/embeddings]
[2023-09-11 18:11:35,952: INFO: 405414696: all the labels and images were retrived]
[2023-09-11 18:11:35,962: INFO: 405414696: all the labels were saved to artifacts/embeddings/labels.pkl]
[2023-09-11 18:11:36,824: INFO: 405414696: got resnet50 model from tensorflow]
[2023-09-11 18:11:36,905: INFO: 405414696: all the image_path were retrived]
[2023-09-11 18:11:36,915: INFO: 405414696: all image paths were saved]


100%|██████████| 100000/100000 [1:58:44<00:00, 14.04it/s]  

[2023-09-11 20:10:21,385: INFO: 405414696: all the features were loaded]





[2023-09-11 20:10:45,195: INFO: 405414696: all the feauture representaion where saved]


In [18]:
!pwd

/Users/ngkuissi/Dev/Image_Search_Engine


In [13]:
os.path.getsize('artifacts/embeddings/embedding.pkl') / (1024 ** 2)

784.600076675415