In [None]:
%pip install easyocr

import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from skimage.feature import hog
from skimage.feature import local_binary_pattern
import easyocr
from sklearn.feature_extraction.text import TfidfVectorizer
from imutils.object_detection import non_max_suppression
from sklearn.preprocessing import StandardScaler


CROPS_DIR = "organized_crops"
LABELS_DIR = "runs/detect/exp/labels/"
TEXT_FEATURES_FILE = "text_features.npy"
# Text Detection Parameters
INPUT_WIDTH = 320
INPUT_HEIGHT = 320
CONFIDENCE_THRESHOLD = 0.6
NMS_THRESHOLD = 0.5

# load ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

# Load Pre-trained EAST Detector
net = cv2.dnn.readNet("/content/drive/MyDrive/Object_Recognition/frozen_east_text_detection.pb")

def read_image(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img, gray

def cnn_extract_features(image_path):
    img = image.load_img(image_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = tf.keras.applications.resnet50.preprocess_input(img_array)
    features = model.predict(img_array)
    return features.flatten()

def extract_color_histogram(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # calculate 3D Histogram (H,S,V)
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 180, 0, 256, 0, 256]) #(H:0-180, S:0-256, V:0-256)
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_shape_features(gray_image):
    image = cv2.resize(gray_image, (128, 128))  # Normalize size for consistency
    fd, _ = hog(image, orientations=8, pixels_per_cell=(8, 8),
                cells_per_block=(2, 2), visualize=True, feature_vector=True)
    return fd

def extract_texture_features(gray_image):
    # Gabor Filter
    gabor_kernel = cv2.getGaborKernel((21, 21), 5.0, np.pi/4, 10.0, 0.5, 0, ktype=cv2.CV_32F)
    filtered_img = cv2.filter2D(gray_image, cv2.CV_8UC3, gabor_kernel)
    # Local Binary Patterns (LBP)
    lbp = local_binary_pattern(gray_image, P=24, R=3, method="uniform")

    return np.hstack([filtered_img.mean(), lbp.flatten()[:256]])  # Limit feature vector size

def extract_spatial_features():
    spatial_features = {}

    for label_file in os.listdir(LABELS_DIR):
        if label_file.endswith(".txt"):
            file_path = os.path.join(LABELS_DIR, label_file)
            base_name = label_file.replace(".txt", "")

            with open(file_path, "r") as f:
                for index, line in enumerate(f.readlines(), start=1):
                    data = line.strip().split()
                    x_center, y_center, width, height = map(float, data[1:])
                    if index == 1:
                      cropped_image_name = f"{base_name}.jpg"
                    else:
                      cropped_image_name = f"{base_name}{index}.jpg"
                    dist_to_center = np.sqrt((x_center - 0.5) ** 2 + (y_center - 0.5) ** 2)
                    spatial_features[cropped_image_name] = [x_center, y_center, width, height, dist_to_center]

    np.save("spatial_features.npy", spatial_features)
    print("Save successfullt to spatial_features.npy")


def decode_predictions(scores, geometry):
    """
    Decode the EAST model's output into bounding boxes.
    """
    rows, cols = scores.shape[2:4]
    rects = []
    confidences = []

    for y in range(rows):
        for x in range(cols):
            if scores[0, 0, y, x] < CONFIDENCE_THRESHOLD:
                continue

            # Extract geometries
            offset_x, offset_y = x * 4.0, y * 4.0
            angle = geometry[0, 4, y, x]
            cos, sin = np.cos(angle), np.sin(angle)
            h, w = geometry[0, 0, y, x], geometry[0, 1, y, x]

            end_x = int(offset_x + (cos * w) + (sin * h))
            end_y = int(offset_y - (sin * w) + (cos * h))
            start_x = int(end_x - w)
            start_y = int(end_y - h)

            rects.append((start_x, start_y, end_x, end_y))
            confidences.append(scores[0, 0, y, x])

    return rects, confidences

def detect_text_east(image_path):
    """
    Apply EAST text detection on an input image.
    """
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"Error: Image file '{image_path}' not found or unable to load!")

    orig = image.copy()
    h, w = image.shape[:2]
    ratio_w, ratio_h = w / INPUT_WIDTH, h / INPUT_HEIGHT

    # Resize image for the EAST model
    blob = cv2.dnn.blobFromImage(image, 1.0, (INPUT_WIDTH, INPUT_HEIGHT),
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)

    # Get EAST outputs
    scores, geometry = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

    # Decode predictions
    rects, confidences = decode_predictions(scores, geometry)
    boxes = non_max_suppression(np.array(rects), probs=confidences, overlapThresh=NMS_THRESHOLD)

    # Scale boxes back to original image size
    results = []
    for (start_x, start_y, end_x, end_y) in boxes:
        start_x, start_y, end_x, end_y = (int(start_x * ratio_w), int(start_y * ratio_h),
                                          int(end_x * ratio_w), int(end_y * ratio_h))
        results.append((start_x, start_y, end_x, end_y))

        # Ensure integer coordinates
        cv2.rectangle(orig, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)

    return results

def merge_overlapping_boxes(boxes, iou_threshold=0.3):
    """
    Merge overlapping bounding boxes based on IoU (Intersection over Union).
    """
    if len(boxes) == 0:
        return []

    # Convert to NumPy array
    boxes = np.array(boxes)

    # Sort boxes by x1 (left) coordinate
    boxes = boxes[np.argsort(boxes[:, 0])]

    merged_boxes = []
    while len(boxes) > 0:
        # Take the first box
        x1, y1, x2, y2 = boxes[0]
        boxes = boxes[1:]

        # Check overlap with other boxes
        indices_to_remove = []
        for i, (nx1, ny1, nx2, ny2) in enumerate(boxes):
            iou = compute_iou((x1, y1, x2, y2), (nx1, ny1, nx2, ny2))
            if iou > iou_threshold:  # Merge if overlap is high
                x1, y1 = min(x1, nx1), min(y1, ny1)
                x2, y2 = max(x2, nx2), max(y2, ny2)
                indices_to_remove.append(i)

        # Remove merged boxes
        boxes = np.delete(boxes, indices_to_remove, axis=0)

        # Append merged box
        merged_boxes.append((x1, y1, x2, y2))

    return merged_boxes

def compute_iou(box1, box2):
    """
    Compute IoU (Intersection over Union) between two bounding boxes.
    """
    x1, y1, x2, y2 = box1
    nx1, ny1, nx2, ny2 = box2

    # Compute intersection
    inter_x1, inter_y1 = max(x1, nx1), max(y1, ny1)
    inter_x2, inter_y2 = min(x2, nx2), min(y2, ny2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Compute union
    area_box1 = (x2 - x1) * (y2 - y1)
    area_box2 = (nx2 - nx1) * (ny2 - ny1)
    union_area = area_box1 + area_box2 - inter_area

    return inter_area / union_area if union_area > 0 else 0

def merge_vertical_boxes(boxes, y_threshold=10):
    """
    Merge vertically close text boxes to form a single bounding box.
    """
    if len(boxes) == 0:
        return []

    boxes = sorted(boxes, key=lambda x: x[1])

    merged_boxes = []
    current_box = boxes[0]

    for i in range(1, len(boxes)):
        x1, y1, x2, y2 = current_box
        nx1, ny1, nx2, ny2 = boxes[i]

        if abs(ny1 - y2) <= y_threshold:
            current_box = (min(x1, nx1), min(y1, ny1), max(x2, nx2), max(y2, ny2))
        else:
            merged_boxes.append(current_box)
            current_box = boxes[i]

    merged_boxes.append(current_box)
    return merged_boxes

def crop_text_regions(image_path, boxes, margin=5):
    image = cv2.imread(image_path)
    cropped_texts = []
    for (x1, y1, x2, y2) in boxes:
        x1, y1 = max(0, x1 - margin), max(0, y1 - margin)
        x2, y2 = min(image.shape[1], x2 + margin), min(image.shape[0], y2 + margin)
        cropped = image[y1:y2, x1:x2]
        cropped_texts.append(cropped)
    return cropped_texts

def extract_text_features():
  reader = easyocr.Reader(['en'])
  text_features_dict = {}

  for folder in os.listdir(CROPS_DIR):
      folder_path = os.path.join(CROPS_DIR, folder)
      if os.path.isdir(folder_path):
          for img_file in os.listdir(folder_path):
              img_path = os.path.join(folder_path, img_file)

              detected_boxes = detect_text_east(img_path)
              merged_boxes = merge_overlapping_boxes(detected_boxes, iou_threshold=0.3)
              merged_boxes = merge_vertical_boxes(merged_boxes, y_threshold=15)

              cropped_texts = crop_text_regions(img_path, merged_boxes)

              text_results = []
              for cropped_img in cropped_texts:
                  if isinstance(cropped_img, np.ndarray):
                      results = reader.readtext(cropped_img)
                      for bbox, text, conf in results:
                          text_results.append(text)
              text_features_dict[img_file] = text_results
  np.save("text_features.npy", text_features_dict, allow_pickle=True)

def vectorize_text_features():
    text_features = np.load(TEXT_FEATURES_FILE, allow_pickle=True).item()
    vectorizer = TfidfVectorizer(max_features=50)
    text_list = [" ".join(text_features.get(img, [])) for img in text_features.keys()]
    text_vectors = vectorizer.fit_transform(text_list).toarray()
    np.save("text_features_vectorized.npy", text_vectors)

def combine_all_features():
    feature_files = ["cnn_features.npy", "color_features.npy", "shape_features.npy",
                     "texture_features.npy", "spatial_features.npy", "text_features_vectorized.npy"]

    missing_files = [f for f in feature_files if not os.path.exists(f)]
    if missing_files:
        print(f"Missing files: {missing_files}")
        return

    cnn_features = np.load("cnn_features.npy", allow_pickle=True).item()
    color_features = np.load("color_features.npy", allow_pickle=True).item()
    shape_features = np.load("shape_features.npy", allow_pickle=True).item()
    texture_features = np.load("texture_features.npy", allow_pickle=True).item()
    spatial_features = np.load("spatial_features.npy", allow_pickle=True).item()
    text_vectors = np.load("text_features_vectorized.npy")

    text_features = np.load("text_features.npy", allow_pickle=True).item()
    text_keys = list(text_features.keys())

    all_image_keys = (set(cnn_features.keys()) & set(color_features.keys()) &
                      set(shape_features.keys()) & set(texture_features.keys()) &
                      set(spatial_features.keys()) & set(text_features.keys()))

    all_image_keys = sorted(list(all_image_keys))

    text_vectors_ordered = [text_vectors[text_keys.index(img)] if img in text_keys else np.zeros((50,)) for img in all_image_keys]


    cnn_dim = 2048
    color_dim = 512
    shape_dim = 7200
    texture_dim = 257
    spatial_dim = 5
    text_dim = 50

    combined_features = []
    image_names = []

    for i, filename in enumerate(all_image_keys):
        cnn_feat = cnn_features.get(filename, np.zeros((cnn_dim,)))
        color_feat = color_features.get(filename, np.zeros((color_dim,)))
        shape_feat = shape_features.get(filename, np.zeros((shape_dim,)))
        texture_feat = texture_features.get(filename, np.zeros((texture_dim,)))
        spatial_feat = spatial_features.get(filename, np.zeros((spatial_dim,)))
        text_feat = text_vectors_ordered[i]

        final_feature = np.concatenate([cnn_feat, color_feat, shape_feat, texture_feat, spatial_feat, text_feat])
        combined_features.append(final_feature)
        image_names.append(filename)

    np.save("final_combined_features.npy", np.array(combined_features))

    print("Feature fusion complete, save as `final_combined_features.npy`")

def combined_all_features_without_text():

  cnn_features = np.load("cnn_features.npy", allow_pickle=True).item()
  color_features = np.load("color_features.npy", allow_pickle=True).item()
  shape_features = np.load("shape_features.npy", allow_pickle=True).item()
  texture_features = np.load("texture_features.npy", allow_pickle=True).item()
  spatial_features = np.load("spatial_features.npy", allow_pickle=True).item()

  all_image_keys = set(cnn_features.keys()) & set(color_features.keys()) & set(shape_features.keys()) & set(texture_features.keys()) & set(spatial_features.keys())

  all_image_keys = sorted(list(all_image_keys))

  cnn_dim = 2048
  color_dim = 512
  shape_dim = 7200
  texture_dim = 257
  spatial_dim = 5

  combined_features = []
  image_names = []

  for filename in all_image_keys:
      cnn_feat = cnn_features.get(filename, np.zeros((cnn_dim,)))
      color_feat = color_features.get(filename, np.zeros((color_dim,)))
      shape_feat = shape_features.get(filename, np.zeros((shape_dim,)))
      texture_feat = texture_features.get(filename, np.zeros((texture_dim,)))
      spatial_feat = spatial_features.get(filename, np.zeros((spatial_dim,)))

      final_feature = np.concatenate([cnn_feat, color_feat, shape_feat, texture_feat, spatial_feat])
      combined_features.append(final_feature)
      image_names.append(filename)

  np.save("final_combined_features.npy", np.array(combined_features))
  np.save("image_names.npy", np.array(image_names))

  print("Sucessfully combine all features")

def normalized_all_features():

    cnn_features = np.load("cnn_features.npy", allow_pickle=True).item()
    color_features = np.load("color_features.npy", allow_pickle=True).item()
    shape_features = np.load("shape_features.npy", allow_pickle=True).item()
    texture_features = np.load("texture_features.npy", allow_pickle=True).item()
    spatial_features = np.load("spatial_features.npy", allow_pickle=True).item()

    all_image_keys = set(cnn_features.keys()) & set(color_features.keys()) & \
                     set(shape_features.keys()) & set(texture_features.keys()) & set(spatial_features.keys())

    all_image_keys = sorted(list(all_image_keys))

    cnn_dim = 2048
    color_dim = 512
    shape_dim = 7200
    texture_dim = 257
    spatial_dim = 5

    cnn_data, color_data, shape_data, texture_data, spatial_data = [], [], [], [], []
    image_names = []

    for filename in all_image_keys:
        cnn_data.append(cnn_features.get(filename, np.zeros((cnn_dim,))))
        color_data.append(color_features.get(filename, np.zeros((color_dim,))))
        shape_data.append(shape_features.get(filename, np.zeros((shape_dim,))))
        texture_data.append(texture_features.get(filename, np.zeros((texture_dim,))))
        spatial_data.append(spatial_features.get(filename, np.zeros((spatial_dim,))))
        image_names.append(filename)

    cnn_data = np.array(cnn_data)
    color_data = np.array(color_data)
    shape_data = np.array(shape_data)
    texture_data = np.array(texture_data)
    spatial_data = np.array(spatial_data)

    scaler = StandardScaler()
    cnn_data = scaler.fit_transform(cnn_data)
    color_data = scaler.fit_transform(color_data)*2
    shape_data = scaler.fit_transform(shape_data)
    texture_data = scaler.fit_transform(texture_data)
    spatial_data = scaler.fit_transform(spatial_data)*2

    np.save("cnn_only_features.npy", cnn_data)
    np.save("color_only_features.npy", color_data)
    np.save("shape_only_features.npy", shape_data)
    np.save("texture_only_features.npy", texture_data)
    np.save("spatial_only_features.npy", spatial_data)

    np.save("image_names.npy", np.array(image_names))

    # Define feature combinations
    feature_combinations = [
        (np.hstack([color_data, spatial_data]), "color_spatial_features"),
        (np.hstack([cnn_data, color_data]), "cnn_color_features"),
        (np.hstack([cnn_data, shape_data]), "cnn_shape_features"),
        (np.hstack([cnn_data, texture_data]), "cnn_texture_features"),
        (np.hstack([cnn_data, spatial_data]), "cnn_spatial_features"),
        (np.hstack([color_data, shape_data]), "color_shape_features"),
        (np.hstack([color_data, texture_data]), "color_texture_features"),
        (np.hstack([color_data, spatial_data]), "color_spatial_features"),
        (np.hstack([cnn_data, color_data, shape_data]), "cnn_color_shape_features"),
        (np.hstack([cnn_data, color_data, texture_data]), "cnn_color_texture_features"),
        (np.hstack([cnn_data, color_data, spatial_data]), "cnn_color_spatial_features"),
        (np.hstack([cnn_data, shape_data, texture_data]), "cnn_shape_texture_features"),
        (np.hstack([cnn_data, shape_data, spatial_data]), "cnn_shape_spatial_features"),
        (np.hstack([cnn_data, texture_data, spatial_data]), "cnn_texture_spatial_features"),
        (np.hstack([cnn_data, color_data, shape_data, texture_data]), "cnn_color_shape_texture_features"),
        (np.hstack([cnn_data, color_data, shape_data, spatial_data]), "cnn_color_shape_spatial_features"),
        (np.hstack([color_data, shape_data, texture_data, spatial_data]),"color_shape_texture_spatial_features"),
        (np.hstack([cnn_data, color_data, shape_data, texture_data, spatial_data]), "all_features"),
    ]

    # Save each combination as a separate .npy file
    for features, name in feature_combinations:
        np.save(f"{name}.npy", features)
        print(f"Saved {name}.npy successfully!")


def normalized_all_features_with_text():

    cnn_features = np.load("cnn_features.npy", allow_pickle=True).item()
    color_features = np.load("color_features.npy", allow_pickle=True).item()
    shape_features = np.load("shape_features.npy", allow_pickle=True).item()
    texture_features = np.load("texture_features.npy", allow_pickle=True).item()
    spatial_features = np.load("spatial_features.npy", allow_pickle=True).item()
    text_features = np.load("text_features.npy", allow_pickle=True).item()
    text_vectors = np.load("text_features_vectorized.npy")

    text_keys = list(text_features.keys())
    all_image_keys = set(cnn_features.keys()) & set(color_features.keys()) & set(shape_features.keys()) & \
                     set(texture_features.keys()) & set(spatial_features.keys()) & set(text_features.keys())
    all_image_keys = sorted(list(all_image_keys))

    cnn_data, color_data, shape_data, texture_data, spatial_data, text_data = [], [], [], [], [], []
    image_names = []

    cnn_dim = 2048
    color_dim = 512
    shape_dim = 7200
    texture_dim = 257
    spatial_dim = 5
    text_dim = 50  # TF-IDF max_features

    for i, filename in enumerate(all_image_keys):
        cnn_data.append(cnn_features.get(filename, np.zeros((cnn_dim,))))
        color_data.append(color_features.get(filename, np.zeros((color_dim,))))
        shape_data.append(shape_features.get(filename, np.zeros((shape_dim,))))
        texture_data.append(texture_features.get(filename, np.zeros((texture_dim,))))
        spatial_data.append(spatial_features.get(filename, np.zeros((spatial_dim,))))
        if filename in text_keys:
            idx = text_keys.index(filename)
            text_data.append(text_vectors[idx])
        else:
            text_data.append(np.zeros((text_dim,)))
        image_names.append(filename)

    scaler = StandardScaler()
    cnn_data = scaler.fit_transform(cnn_data)
    color_data = scaler.fit_transform(color_data) * 2
    shape_data = scaler.fit_transform(shape_data)
    texture_data = scaler.fit_transform(texture_data)
    spatial_data = scaler.fit_transform(spatial_data) * 2
    text_data = scaler.fit_transform(text_data)

    # Save only TEXT
    np.save("text_only_features.npy", text_data)

    # Save combinations that include text
    np.save("cnn_text_features.npy", np.hstack([cnn_data, text_data]))
    np.save("cnn_color_text_features.npy", np.hstack([cnn_data, color_data, text_data]))
    np.save("all_features_with_text.npy", np.hstack([cnn_data, color_data, shape_data, texture_data, spatial_data, text_data]))

    np.save("image_names_with_text.npy", np.array(image_names))

    print("Store complete")


def main():
    cnn_features_dict = {}
    color_features_dict = {}
    shape_features_dict = {}
    texture_features_dict = {}
    for folder in os.listdir(CROPS_DIR):
      folder_path = os.path.join(CROPS_DIR, folder)
      if os.path.isdir(folder_path):
          for img_file in os.listdir(folder_path):
              img_path = os.path.join(folder_path, img_file)
              img, gray = read_image(img_path)
              cnn_features_dict[img_file] = cnn_extract_features(img_path)
              color_features_dict[img_file] = extract_color_histogram(img)
              shape_features_dict[img_file] = extract_shape_features(gray)
              texture_features_dict[img_file] = extract_texture_features(gray)

    np.save("cnn_features.npy", cnn_features_dict, allow_pickle=True)
    print("CNN feature is extracted successfully！")

    np.save("color_features.npy", color_features_dict, allow_pickle=True)
    print("Color feature is extracted successfully！")

    np.save("shape_features.npy", shape_features_dict, allow_pickle=True)
    print("Shape feature is extracted successfully！")

    np.save("texture_features.npy", texture_features_dict, allow_pickle=True)
    print("Texture feature is extracted successfully！")

    extract_spatial_features()

    print("\n Running normalized_all_features() (baseline)...")
    normalized_all_features()

    print("\n Extracting and vectorizing text features...")
    extract_text_features()
    vectorize_text_features()

    print("\n Running normalized_all_features_with_text()...")
    normalized_all_features_with_text()

    print("\nAll features (with and without text) have been saved.")

if __name__ == "__main__":
    main()