In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image


2025-11-28 11:27:18.627331: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
%run createDerivation.py

In [3]:
def image_to_vector(image: np.ndarray, final_length: int = 2048, max_features_to_detect: int = 5000) -> np.ndarray | None:
    """
    Converts an image into a fixed-length 1D feature vector using ORB.

    The function detects ORB features, selects the strongest ones, and flattens
    their descriptors into a single vector.

    Args:
        image (np.ndarray): The input image (can be color or grayscale).
        final_length (int): The desired length of the output vector.
                              MUST be a multiple of 32. Defaults to 1600 (50 features * 32).
        max_features_to_detect (int): The maximum number of features for ORB to detect initially.
                                      Should be significantly larger than (final_length / 32).

    Returns:
        np.ndarray | None: A 1D NumPy array of dtype uint8 with the specified length,
                          or None if not enough features could be detected in the image.
    """
    # 1. Validate inputs
    if image is None:
        print("Error: Input image is None.")
        return None

    if final_length % 32 != 0:
        raise ValueError("Error: final_length must be a multiple of 32.")

    # Calculate the exact number of features we need to sample
    num_features_to_sample = final_length // 32

    # 2. Ensure the image is grayscale
    if len(image.shape) == 3 and image.shape[2] == 3:
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray_image = image # Assume it's already grayscale

    # 3. Initialize ORB detector
    orb = cv2.ORB_create(nfeatures=max_features_to_detect)

    # 4. Detect keypoints and compute descriptors
    kps, des = orb.detectAndCompute(gray_image, None)

    # 5. Check if enough descriptors were found
    if des is None or len(des) < num_features_to_sample:
        print(f"Warning: Found only {0 if des is None else len(des)} features, but need {num_features_to_sample}. Cannot create a vector of length {final_length}.")
        return None

    # 6. Sort features by response score (strongest first)
    kp_des_pairs = sorted(zip(kps, des), key=lambda x: x[0].response, reverse=True)

    # 7. Select the top N features
    top_pairs = kp_des_pairs[:num_features_to_sample]
    
    # We only need the descriptors from the top pairs
    _, sampled_des = zip(*top_pairs)
    
    # 8. Flatten the list of descriptors into a single 1D vector
    feature_vector = np.array(sampled_des).flatten()

    return feature_vector

model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def image_to_vector2(path: str):
    img_path = path
    # The target_size must match the size the model was trained on (224x224 for ResNet50)
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)
    img_preprocessed = preprocess_input(img_array_expanded)

    # 3. Use the model to predict (extract) features
    features = model.predict(img_preprocessed, verbose=0)
    return features[0]
def keep_all(path): return True
def path_2_features(path: str) -> str | None:
    """
    Reads an image from the given path and converts it to a string representation of its feature vector.
    in comma-separated format.
    Args:
        path (str): The file path to the image.
    Returns:
        str | None: A comma-separated string of the feature vector, or None if conversion fails
    """ 
    try:
        feature_vector = image_to_vector2(path)
        if feature_vector is None:
            return "0"
        return ','.join(map(str, feature_vector.tolist())).encode('utf-8')
    except Exception as e:
        print(f"Exception occurred while processing image at path {path}: {e}")
        return None


I0000 00:00:1764329256.261104    9607 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3282 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060, pci bus id: 0000:01:00.0, compute capability: 8.9


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step


In [4]:
file_content = ""
with open("final_variation.txt", 'r') as file:
    file_content = file.read()

filtered2_path = file_content.strip()
filtered2_df = pd.read_csv(filtered2_path)

In [5]:
processed = create_dataset_variation(filtered2_df, keep_all, path_2_features, variation_tag="orb_features_2048")
print("Created variation CSV with features:", processed)
extracted_features_path = open("final_features.txt", "w")
extracted_features_path.write(processed)
extracted_features_path.close()

2025-11-28 11:28:32.610495: I external/local_xla/xla/service/service.cc:163] XLA service 0x7f3fc8006290 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-28 11:28:32.610526: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4060, Compute Capability 8.9
2025-11-28 11:28:32.785570: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-28 11:28:33.336342: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002




I0000 00:00:1764329314.973973   33020 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Wrote 3826 rows to /home/fadhlan/Normal2/DeepLearningRepo/steps/variations/var_b7376b7ae18d395b/b7376b7ae18d395b.csv (variation b7376b7ae18d395b)
Created variation CSV with features: /home/fadhlan/Normal2/DeepLearningRepo/steps/variations/var_b7376b7ae18d395b/b7376b7ae18d395b.csv
