## Step 1: Clean Data Loader for Regression

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.image import load_img, img_to_array

class RegressionDataLoader:
    def __init__(self, data_path='./Materials_data'):
        self.data_path = data_path
        self.param_scaler = MinMaxScaler()
        
    def extract_parameters_from_folder(self, folder_name):
        """Extract continuous parameters from folder name format: 1-20.20.20.20-1"""
        try:
            # Remove any prefix before the first number
            parts = folder_name.split('-')
            if len(parts) >= 2:
                # Extract the parameter string (e.g., "20.20.20.20")
                param_str = parts[1]
                # Split into individual parameters
                params = [float(x) for x in param_str.split('.')]
                if len(params) == 4:
                    return params  # [弯曲强度, 强度, 形变强度, 形变率]
        except:
            pass
        return None
    
    def load_dual_view_images(self, folder_path):
        """Load top-view (-1) and side-view (-2) images from folder"""
        top_images = []
        side_images = []
        
        # Find all image files in folder
        if not os.path.exists(folder_path):
            return [], []
            
        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                if filename.endswith('-1.jpg'):
                    # Top view image
                    img_path = os.path.join(folder_path, filename)
                    try:
                        img = load_img(img_path, target_size=(224, 224))
                        img_array = img_to_array(img) / 255.0
                        top_images.append(img_array)
                    except:
                        continue
                elif filename.endswith('-2.jpg'):
                    # Side view image
                    img_path = os.path.join(folder_path, filename)
                    try:
                        img = load_img(img_path, target_size=(224, 224))
                        img_array = img_to_array(img) / 255.0
                        side_images.append(img_array)
                    except:
                        continue
        
        return top_images, side_images
    
    def load_regression_data(self):
        """Load all data for regression training"""
        top_images = []
        side_images = []
        parameters = []
        folder_names = []
        
        print("Loading regression data from folder structure...")
        
        # Scan all folders in data path
        for folder_name in os.listdir(self.data_path):
            folder_path = os.path.join(self.data_path, folder_name)
            
            if not os.path.isdir(folder_path):
                continue
                
            # Extract parameters from folder name
            params = self.extract_parameters_from_folder(folder_name)
            if params is None:
                continue
                
            # Load images from this folder
            folder_top_images, folder_side_images = self.load_dual_view_images(folder_path)
            
            # Match top and side images (assuming same count)
            min_count = min(len(folder_top_images), len(folder_side_images))
            
            for i in range(min_count):
                top_images.append(folder_top_images[i])
                side_images.append(folder_side_images[i])
                parameters.append(params)
                folder_names.append(folder_name)
                
        # Convert to numpy arrays
        top_images = np.array(top_images)
        side_images = np.array(side_images)
        parameters = np.array(parameters)
        
        # Normalize parameters to [0,1] range for training
        parameters_normalized = self.param_scaler.fit_transform(parameters)
        
        print(f"Loaded {len(top_images)} samples")
        print(f"Parameter ranges: {np.min(parameters, axis=0)} to {np.max(parameters, axis=0)}")
        print(f"Parameter names: ['弯曲强度', '强度', '形变强度', '形变率']")
        
        return top_images, side_images, parameters_normalized, parameters, folder_names

# Test the data loader
data_loader = RegressionDataLoader()
top_imgs, side_imgs, params_norm, params_orig, folders = data_loader.load_regression_data()

# Display sample
if len(top_imgs) > 0:
    print(f"\nSample data:")
    print(f"Top image shape: {top_imgs[0].shape}")
    print(f"Side image shape: {side_imgs[0].shape}")
    print(f"Parameters (original): {params_orig[0]}")
    print(f"Parameters (normalized): {params_norm[0]}")
    print(f"Folder: {folders[0]}")

### Step 2: Create Pairs for Siamese Network

In [73]:
import random

def create_pairs(images):
    image_pairs = []
    labels = []
    # Pair images within the same group (similar) and across groups (dissimilar)
    for i in range(len(images)):
        for j in range(i+1, len(images)):
            if images[i].group_name == images[j].group_name:
                label = 1  # Similar
            else:
                label = 0  # Dissimilar
            image_pairs.append((images[i], images[j]))
            labels.append(label)
    return image_pairs, labels

image_pairs, labels = create_pairs(images)


### Step 3: Preprocess and Load Image Data

In [75]:
def preprocess_image_pair(image_pair):
    img1 = image_pair[0].load_top_image()  # Load the top view for the first image
    img2 = image_pair[1].load_side_image()  # Load the side view for the second image
    return img1, img2

# Process all image pairs
left_images = []
right_images = []

for pair in image_pairs:
    img1, img2 = preprocess_image_pair(pair)
    left_images.append(img1)
    right_images.append(img2)

# Convert lists to numpy arrays for model input
left_images = np.array(left_images)
right_images = np.array(right_images)
labels = np.array(labels)


2024-11-10 00:08:39.526199: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at whole_file_read_ops.cc:116 : NOT_FOUND: ./Materials_data/第七组/30.40.40.20-2.jpg; No such file or directory


NotFoundError: {{function_node __wrapped__ReadFile_device_/job:localhost/replica:0/task:0/device:CPU:0}} ./Materials_data/第七组/30.40.40.20-2.jpg; No such file or directory [Op:ReadFile]

### Step 4: Define and Compile the Siamese Network Model

In [None]:
def build_base_model():
    model = tf.keras.Sequential([
        layers.Conv2D(64, (10, 10), activation='relu', input_shape=(105, 105, 3)),
        layers.MaxPooling2D(),
        layers.Conv2D(128, (7, 7), activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, (4, 4), activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(256, (4, 4), activation='relu'),
        layers.Flatten(),
        layers.Dense(4096, activation='sigmoid')
    ])
    return model

def build_siamese_model():
    input_left = layers.Input(name="left_input", shape=(105, 105, 3))
    input_right = layers.Input(name="right_input", shape=(105, 105, 3))

    base_model = build_base_model()

    output_left = base_model(input_left)
    output_right = base_model(input_right)

    # Compute L1 distance between features
    l1_distance = tf.abs(output_left - output_right)
    output = layers.Dense(1, activation='sigmoid')(l1_distance)

    siamese_model = Model(inputs=[input_left, input_right], outputs=output)
    return siamese_model

# Compile the model
siamese_model = build_siamese_model()
siamese_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


### Step 5: Train the model

In [4]:
# Train the model
siamese_model.fit([left_images, right_images], labels, batch_size=32, epochs=10)


### Step 6: Make Predictions

In [5]:
def predict_similarity(img_data1, img_data2):
    img1 = img_data1.load_image()
    img2 = img_data2.load_image()
    img1 = np.expand_dims(img1, axis=0)
    img2 = np.expand_dims(img2, axis=0)
    return siamese_model.predict([img1, img2])[0][0]

# Example usage
similarity_score = predict_similarity(images[0], images[1])
print("Similarity score:", similarity_score)
