In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
from sklearn.cluster import KMeans
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

  warn(


In [2]:
# Function to load and preprocess images from the dataset
def load_and_preprocess_data(data_dir, image_size=(256, 256)):
    images = []
    for image_file in os.listdir(data_dir):
        image_path = os.path.join(data_dir, image_file)
        image = cv2.imread(image_path)
        image = cv2.resize(image, image_size)
        images.append(image)
    return np.array(images)

# Load and preprocess the dataset (replace 'data_dir' with your dataset path)
data_dir = "img/not_labeled_stones"
images = load_and_preprocess_data(data_dir)

# Data augmentation using Keras ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_images = []
for image in images:
    augmented_image = datagen.random_transform(image)
    augmented_images.append(augmented_image)

augmented_images = np.array(augmented_images)

In [4]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense

def create_encoder(input_shape, embedding_size):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    x = GlobalAveragePooling2D()(base_model.output)
    embeddings = Dense(embedding_size)(x)
    model = Model(inputs=base_model.input, outputs=embeddings)
    return model

# Set the input shape and embedding size for the encoder
input_shape = (256, 256, 3)
embedding_size = 128

# Create the encoder
encoder = create_encoder(input_shape, embedding_size)

In [5]:
import tensorflow.keras.backend as K

def contrastive_loss(y_true, y_pred, margin=1.0):
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)

In [6]:
# Prepare data for contrastive learning (augmented images and their copies)
augmented_data = np.concatenate((augmented_images, augmented_images))

# Create labels for contrastive learning (1 for original images, 0 for copies)
contrastive_labels = np.concatenate((np.ones(len(augmented_images)), np.zeros(len(augmented_images))))

# Compile the model with the contrastive loss
encoder.compile(optimizer='adam', loss=contrastive_loss)

# Train the model
epochs = 10
batch_size = 32

for epoch in range(epochs):
    encoder.fit(augmented_data, contrastive_labels, batch_size=batch_size, epochs=1, shuffle=True)

: 

In [None]:
#Bounding Box Generation
import numpy as np

def generate_bounding_boxes(embeddings, cluster_ids):
    bounding_boxes = []

    for cluster_id in np.unique(cluster_ids):
        if cluster_id == -1:
            continue
        indices = np.where(cluster_ids == cluster_id)[0]
        x_min, y_min = np.min(embeddings[indices], axis=0)
        x_max, y_max = np.max(embeddings[indices], axis=0)
        bounding_box = [x_min, y_min, x_max, y_max]
        bounding_boxes.append(bounding_box)

    return bounding_boxes


In [None]:
# Get embeddings for the entire dataset
embeddings = encoder.predict(images)

# Apply K-means clustering to group embeddings
num_clusters = 5  # You can adjust the number of clusters based on your dataset
kmeans = KMeans(n_clusters=num_clusters)
cluster_ids = kmeans.fit_predict(embeddings)

# Generate bounding boxes for each detected object
bounding_boxes = generate_bounding_boxes(embeddings, cluster_ids)

# bounding_boxes will be a list of bounding boxes, where each bounding box is represented as [x_min, y_min, x_max, y_max].

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Visualize detected objects and bounding boxes
def visualize_objects_with_bboxes(images, cluster_ids):
    for i, image in enumerate(images):
        plt.imshow(image)
        ax = plt.gca()
        for cluster_id in np.unique(cluster_ids):
            if cluster_id == -1:
                continue
            indices = np.where(cluster_ids == cluster_id)[0]
            x_min, y_min = np.min(embeddings[indices], axis=0)
            x_max, y_max = np.max(embeddings[indices], axis=0)
            x_min, x_max = int(x_min), int(x_max)
            y_min, y_max = int(y_min), int(y_max)
            rect = patches.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
        plt.show()

# Visualize the detected objects and bounding boxes
visualize_objects_with_bboxes(images, cluster_ids)