In [4]:
import os
import numpy as np
#from keras.preprocessing import image
from tensorflow.keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import VGG16

# Set the path to the directory containing your JPEG files
image_directory = "/folder_path"


# Load the pre-trained VGG16 model
model = VGG16(weights='imagenet', include_top=False)

# Function to preprocess an image for prediction
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x

# Function to extract features from an image using VGG16
def extract_features(img_path):
    img = preprocess_image(img_path)
    features = model.predict(img)
    return features.flatten()

# Function to calculate the cosine similarity between two feature vectors
def cosine_similarity(a, b):
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    similarity = dot_product / (norm_a * norm_b)
    return similarity

# Get the list of JPEG files in the directory
files = [f for f in os.listdir(image_directory) if f.endswith('.jpg')]

# Select the first 12 pages for training
train_files = files[:12]

# Extract features from the training images
train_features = []
for file in train_files:
    file_path = os.path.join(image_directory, file)
    features = extract_features(file_path)
    train_features.append(features)

# Calculate the average feature vector for training set
avg_feature = np.mean(train_features, axis=0)

# Classify the remaining 2 pages
test_files = files[12:14]
for file in test_files:
    file_path = os.path.join(image_directory, file)
    features = extract_features(file_path)
    similarity = cosine_similarity(features, avg_feature)
    print(f'{file}: Similarity - {similarity}')



Ultra_enhanced_8th.jpg: Similarity - 0.9334545731544495
Ultra_enhanced_9th.jpg: Similarity - 0.8353602290153503
