In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf

# Helper function to check if a file is an image
def is_image_file(filename):
    valid_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"]
    return any(filename.lower().endswith(ext) for ext in valid_extensions)

# Analyze dataset to count images and categorize them
def analyze_dataset(directory):
    count = 0
    categories = {}
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if is_image_file(file):
                category = subdir.split(os.sep)[-2]
                style = subdir.split(os.sep)[-1]
                label = f"{category}_{style}"
                if label not in categories:
                    categories[label] = 0
                categories[label] += 1
                count += 1
    return count, categories

# Analyze the "Train" directory
image_count, category_details = analyze_dataset('Train')
print(f"Total images: {image_count}")
for category, num_images in category_details.items():
    print(f"{category}: {num_images} images")

# Feature extraction using EfficientNetB0
def extract_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / np.linalg.norm(flattened_features)
    return normalized_features

# Compute average features for each category
def compute_average_features(directory, model):
    category_features = {}
    category_counts = {}
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if is_image_file(file):
                img_path = os.path.join(subdir, file)
                features = extract_features(img_path, model)
                category = subdir.split(os.sep)[-2] + "_" + subdir.split(os.sep)[-1]
                if category not in category_features:
                    category_features[category] = np.zeros_like(features)
                    category_counts[category] = 0
                category_features[category] += features
                category_counts[category] += 1
    for category in category_features:
        category_features[category] /= category_counts[category]
    return category_features

# Find the closest category for a given image
def find_closest_category(image_path, category_features, model):
    image_features = extract_features(image_path, model)
    nearest_category = None
    min_distance = float('inf')
    for category, features in category_features.items():
        distance = np.linalg.norm(image_features - features)
        if distance < min_distance:
            min_distance = distance
            nearest_category = category
    return nearest_category

# Extract features from all images in a specified category
def extract_features_from_category(directory, category, model):
    image_paths = []
    features_list = []
    for subdir, dirs, files in os.walk(directory):
        parts = subdir.split(os.sep)
        if len(parts) >= 2:
            constructed_category = '_'.join(parts[-2:])  # Safely join the last two parts
            if constructed_category == category:
                for file in files:
                    if is_image_file(file):
                        img_path = os.path.join(subdir, file)
                        image_paths.append(img_path)
                        features = extract_features(img_path, model)
                        features_list.append(features)
    return image_paths, features_list

# Recommend n similar images based on feature similarity
def recommend_similar_images(features, all_features, all_paths, n=10):
    neighbors = NearestNeighbors(n_neighbors=n, metric='euclidean')
    neighbors.fit(all_features)
    distances, indices = neighbors.kneighbors([features])
    return [all_paths[idx] for idx in indices.flatten()]

# Display images in a grid
def display_images(image_paths):
    plt.figure(figsize=(15, 10))
    for i, img_path in enumerate(image_paths):
        img = mpimg.imread(img_path)
        plt.subplot(2, 5, i + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(os.path.basename(img_path))
    plt.show()

# Build and fine-tune EfficientNetB0 model
def build_model():
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = GlobalMaxPooling2D()(base_model.output)
    model = Model(inputs=base_model.input, outputs=x)

    # Freeze the layers except the last few layers
    for layer in base_model.layers[:-20]:
        layer.trainable = False

    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_model()

# Callbacks for early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')

# Train the model on your training data here
# Assuming train_data and val_data are prepared
# Example:
# train_data, val_data = ...  # Prepare your data here

# Uncomment the following lines if you have train_data and val_data prepared
# model.fit(train_data, epochs=50, validation_data=val_data, callbacks=[early_stopping, model_checkpoint])

# Check if the best model weights exist and load them
if os.path.exists('best_model.keras'):
    model.load_weights('best_model.keras')
else:
    print("Best model weights not found. Ensure you have trained the model and saved the weights.")

# Compute average features and find the closest category for images
category_features = compute_average_features('Train', model)
category = find_closest_category('test-pic.jpg', category_features, model)
features = extract_features('test-pic.jpg', model)

# Extract features from all images in the closest category
image_paths, features_list = extract_features_from_category('Train', category, model)

# Recommend 10 similar images
recommended_images = recommend_similar_images(features, features_list, image_paths)
print(f"10 recommended images similar to your image in category {category}:")
for img in recommended_images:
    print(img)

# Display the recommended images
display_images(recommended_images)