In [None]:
!pip install ipyfilechooser
!pip install scikit-image
import os
import numpy as np
from PIL import Image
from skimage.metrics import structural_similarity as ssim
import pandas as pd
from IPython.display import display, HTML
from ipywidgets import widgets
from ipyfilechooser import FileChooser
 
# Helper function to calculate similarity
def calculate_similarity(image1_path, image2_path):
    image1 = Image.open(image1_path).convert('L')
    image2 = Image.open(image2_path).convert('L')

    image1 = image1.resize((256, 256))
    image2 = image2.resize((256, 256))

    image1_np = np.array(image1)
    image2_np = np.array(image2)

    similarity_index, _ = ssim(image1_np, image2_np, full=True)
    return similarity_index

# Widget to upload an image
file_chooser = FileChooser('/lakehouse/default/Files/upload_files') 
display(file_chooser)

def find_sole_images(root_dir):
    sole_images = []
    valid_extensions = {'.png', '.jpg', '.jpeg', '.jfif', '.bmp', '.gif', '.tiff'}
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            file_name, file_extension = os.path.splitext(file)
            if file_name.lower() == 'sole' and file_extension.lower() in valid_extensions:
                sole_images.append(os.path.join(subdir, file))
    return sole_images

def compare_uploaded_image(file_chooser, root_dir):

    uploaded_image_path = file_chooser.selected

    if uploaded_image_path:
        print(f"Selected file: {uploaded_image_path}")
        
        
        sole_images = find_sole_images(root_dir)

        similarities = []
        
        for sole_image in sole_images:
            similarity = calculate_similarity(uploaded_image_path, sole_image)
            # Add the similarity score and the sole image path to the results  
            similarities.append((os.path.relpath(sole_image, root_dir), similarity))
        
        # Sort the results by similarity in descending order - the most likely
        similarities.sort(key=lambda x: x[1], reverse=True)
        
        # Create a DataFrame to store the results
        df = pd.DataFrame(similarities, columns=['File Path', 'Similarity'])
        
        # Save the results to a CSV file
        csv_path = '/lakehouse/default/Files/data/similarity_results.csv'
        df.to_csv(csv_path, index=False)
        
        # Display the results as a table
        display(HTML(df.to_html(index=False)))
        
        print(f'Results saved to {csv_path}')
    else:
        print("Please upload an image first.")

root_dir = '/lakehouse/default/Files/Shoe'  # Need to check this is the right file path 

compare_button = widgets.Button(description="Compare Images")
display(compare_button)

def on_compare_button_clicked(b):
    compare_uploaded_image(file_chooser, root_dir)

compare_button.on_click(on_compare_button_clicked)


In [None]:
!pip install tensorflow ipyfilechooser scikit-image opencv-python-headless

import os
import numpy as np
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from IPython.display import display, HTML
from ipywidgets import widgets
from ipyfilechooser import FileChooser
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from skimage import feature
import cv2  # Import OpenCV

# Load the ResNet50 model
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Helper function to calculate similarity using edge detection and CNN feature extraction
def calculate_similarity(image1_path, image2_path):
    def preprocess_and_extract_features(img_path):
        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224, 224))
        img_data = image.img_to_array(img)
        
        # Convert to grayscale and apply edge detection
        gray_img = cv2.cvtColor(img_data, cv2.COLOR_BGR2GRAY)
        edges = feature.canny(gray_img).astype(float)

        # Convert back to 3 channels
        edges = np.stack([edges, edges, edges], axis=-1)

        # Preprocess for ResNet
        edges = np.expand_dims(edges, axis=0)
        edges = preprocess_input(edges)

        # Extract features using CNN
        features = model.predict(edges)
        return features

    features1 = preprocess_and_extract_features(image1_path)
    features2 = preprocess_and_extract_features(image2_path)

    similarity = cosine_similarity(features1, features2)[0][0]
    return similarity

# Widget to upload an image
file_chooser = FileChooser('/lakehouse/default/Files/upload_files')
display(file_chooser)

def find_sole_images(root_dir):
    sole_images = []
    valid_extensions = {'.png', '.jpg', '.jpeg', '.jfif', '.bmp', '.gif', '.tiff'}
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            file_name, file_extension = os.path.splitext(file)
            if file_name.lower() == 'sole' and file_extension.lower() in valid_extensions:
                sole_images.append(os.path.join(subdir, file))
    return sole_images

def compare_uploaded_image(file_chooser, root_dir):
    uploaded_image_path = file_chooser.selected

    if uploaded_image_path:
        print(f"Selected file: {uploaded_image_path}")
        
        sole_images = find_sole_images(root_dir)
        similarities = []
        
        for sole_image in sole_images:
            similarity = calculate_similarity(uploaded_image_path, sole_image)
            # Add the similarity score and the sole image path to the results  
            similarities.append((os.path.relpath(sole_image, root_dir), similarity))
        
        # Sort the results by similarity in descending order - the most likely
        similarities.sort(key=lambda x: x[1], reverse=True)
        
        # Create a DataFrame to store the results
        df = pd.DataFrame(similarities, columns=['File Path', 'Similarity'])
        
        # Save the results to a CSV file
        csv_path = '/lakehouse/default/Files/data/similarity_results.csv'
        df.to_csv(csv_path, index=False)
        
        # Display the results as a table
        display(HTML(df.to_html(index=False)))
        
        print(f'Results saved to {csv_path}')
    else:
        print("Please upload an image first.")

root_dir = '/lakehouse/default/Files/Shoe'  # Ensure this is the right file path 

compare_button = widgets.Button(description="Compare Images")
display(compare_button)

def on_compare_button_clicked(b):
    compare_uploaded_image(file_chooser, root_dir)

compare_button.on_click(on_compare_button_clicked)


In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from IPython.display import display, HTML
from ipywidgets import widgets
from ipyfilechooser import FileChooser
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from skimage import feature
import cv2

# Load the ResNet50 model
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Helper function to calculate similarity using edge detection and CNN feature extraction
def calculate_similarity(image1_path, image2_path):
    def preprocess_and_extract_features(img_path):
        img = image.load_img(img_path, target_size=(224, 224))
        img_data = image.img_to_array(img)

        gray_img = cv2.cvtColor(img_data, cv2.COLOR_BGR2GRAY)
        edges = feature.canny(gray_img).astype(float)

        edges = np.stack([edges, edges, edges], axis=-1)
        edges = np.expand_dims(edges, axis=0)
        edges = preprocess_input(edges)

        features = model.predict(edges)
        return features

    features1 = preprocess_and_extract_features(image1_path)
    features2 = preprocess_and_extract_features(image2_path)

    similarity = cosine_similarity(features1, features2)[0][0]
    return similarity

# file chooser to upload an image
file_chooser = FileChooser('/lakehouse/default/Files/upload_files')
display(file_chooser)

def find_sole_images(root_dir):
    sole_images = []
    valid_extensions = {'.png', '.jpg', '.jpeg', '.jfif', '.bmp', '.gif', '.tiff'}
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            file_name, file_extension = os.path.splitext(file)
            if file_name.lower() == 'sole' and file_extension.lower() in valid_extensions:
                sole_images.append(os.path.join(subdir, file))
    return sole_images

def compare_uploaded_image(file_chooser, root_dir):
    uploaded_image_path = file_chooser.selected

    if uploaded_image_path:
        print(f"Selected file: {uploaded_image_path}")
        
        sole_images = find_sole_images(root_dir)
        similarities = []
        
        for sole_image in sole_images:
            similarity = calculate_similarity(uploaded_image_path, sole_image)
            similarities.append((os.path.relpath(sole_image, root_dir), similarity))
        
        similarities.sort(key=lambda x: x[1], reverse=True)
        
        df = pd.DataFrame(similarities, columns=['File Path', 'Similarity'])
        
        csv_path = '/lakehouse/default/Files/data/similarity_results.csv'
        df.to_csv(csv_path, index=False)
        
        display(HTML(df.to_html(index=False)))
        
        print(f'Results saved to {csv_path}')
        
        # Create widgets for feedback to help improve the model 
        feedback_button = widgets.Button(description="Submit Correction")
        correction_text = widgets.Text(description="Correct File Path:")
        display(feedback_button, correction_text)

        def on_feedback_button_clicked(b):
            correct_path = correction_text.value
            if correct_path:
                with open('/lakehouse/default/Files/data/corrections.csv', 'a') as f: ## we might want to change the location and create a folder for it 
                    f.write(f"{uploaded_image_path},{correct_path}\n")
                print(f"Correction submitted: {correct_path}")
            else:
                print("Please enter a correction.")

        feedback_button.on_click(on_feedback_button_clicked)
    else:
        print("Please upload an image first.")

root_dir = '/lakehouse/default/Files/Shoe'

compare_button = widgets.Button(description="Compare Images")
display(compare_button)

def on_compare_button_clicked(b):
    compare_uploaded_image(file_chooser, root_dir)

compare_button.on_click(on_compare_button_clicked)


In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Load corrections for learning
corrections = pd.read_csv('/lakehouse/default/Files/data/corrections.csv', header=None, names=['incorrect_path', 'correct_path'])

# Prepare training data
train_data = []
train_labels = []

for _, row in corrections.iterrows():
    train_data.append(row['correct_path'])
    train_labels.append(row['correct_path'].split('/')[-2])  # Assuming label is the second last folder name

# Create an ImageDataGenerator
datagen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=0.2)

train_generator = datagen.flow_from_dataframe(
    pd.DataFrame({'filename': train_data, 'class': train_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_dataframe(
    pd.DataFrame({'filename': train_data, 'class': train_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# Fine-tune the ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(len(train_generator.class_indices), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(train_generator, validation_data=validation_generator, epochs=10)

# Save the fine-tuned model
model.save('/lakehouse/default/Files/data/fine_tuned_model.h5') ##might need to change this file path too 
