In [111]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics.pairwise import cosine_similarity
from collections import Counter
from sklearn.preprocessing import LabelEncoder

# Step 1: Load and Preprocess Images

In [112]:
import imghdr

def load_images_from_folder(folder):
    images = []
    labels = []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if os.path.isdir(label_path):
            for file in os.listdir(label_path):
                file_path = os.path.join(label_path, file)
                # Check if the file is a valid image
                if imghdr.what(file_path) in ['jpeg', 'png', 'jpg']:
                    try:
                        img = load_img(file_path, target_size=(128, 128), color_mode="grayscale")
                        img_array = img_to_array(img) / 255.0  # Normalize pixel values
                        images.append(img_array)
                        labels.append(label)
                    except Exception as e:
                        print(f"Error loading image {file_path}: {e}")
    return np.array(images), np.array(labels)


# Load data
data_folder = './Materials_data'
images, labels = load_images_from_folder(data_folder)

# Shuffle data to ensure proper distribution
images, labels = shuffle(images, labels, random_state=42)

# Encode string labels to integers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the encoded labels
images_train, images_val, labels_train, labels_val = train_test_split(
    images, encoded_labels, test_size=0.2, random_state=42, stratify=encoded_labels
)

# Verify class distribution
print("Training set class distribution:", Counter(labels_train))
print("Validation set class distribution:", Counter(labels_val))



Training set class distribution: Counter({0: 37, 7: 33, 12: 33, 1: 33, 5: 33, 13: 32, 18: 29, 11: 29, 15: 29, 2: 29, 19: 29, 16: 29, 9: 28, 8: 28, 14: 28, 17: 26, 6: 26, 10: 26, 4: 24, 21: 24, 20: 24, 3: 22, 22: 20, 23: 19, 24: 16, 40: 16, 25: 16, 29: 16, 31: 16, 38: 16, 35: 16, 33: 16, 39: 16, 44: 16, 27: 16, 34: 16, 43: 16, 42: 16, 32: 16, 37: 16, 28: 16, 41: 16, 36: 16, 30: 16, 26: 16})
Validation set class distribution: Counter({0: 10, 5: 9, 12: 9, 7: 9, 13: 8, 1: 8, 19: 7, 11: 7, 15: 7, 9: 7, 18: 7, 14: 7, 16: 7, 2: 7, 8: 7, 17: 6, 3: 6, 6: 6, 21: 6, 10: 6, 4: 6, 20: 6, 22: 5, 23: 5, 39: 4, 29: 4, 26: 4, 35: 4, 33: 4, 34: 4, 41: 4, 31: 4, 38: 4, 40: 4, 24: 4, 36: 4, 44: 4, 27: 4, 43: 4, 28: 4, 37: 4, 25: 4, 42: 4, 32: 4, 30: 4})


In [113]:
images, labels = load_images_from_folder(data_folder)
print(f"Number of images: {len(images)}")
print(f"Number of labels: {len(labels)}")
print(f"Unique labels: {set(labels)}")

Number of images: 1258
Number of labels: 1258
Unique labels: {'六-0.20.40.20', '1-50.20.20.20-2', '三-90.60.20.20', '九-60.80.40.20', '1-60.20.20.20-1', '1-40.20.20.20-2', '十三-40.60.60.20', '十五-100.100.60.20', '六-10.20.40.20', '十一-70.20.60.20', '三-10.60.20.20', '1-50.20.20.20-1', '八-30.60.40.20', '1-10.20.20.20-2', '2-0.40.20.20-2', '五-100.100.20.20', '1-0.20.20.20-1', '十五-20.100.40.20', '2-0.40.20.20-1', '十二-40.40.60.20', '1-0.20.20.20-2', '七-40.40.40.20', '十-0.100.40.20', '1-100.20.20.20-2', '1-90.20.20.20-1', '五-20.100.20.20', '六-90.20.40.20', '十四-70.80.60.20', '1-100.20.20.20-1', '二-50.40.20.20', '1-90.20.20.20-2', '1-80.20.20.20-2', '四-10.80.20.20', '1-70.20.20.20.1', '1-60.20.20.20-2', '1-30.20.20.20-1', '四-90.80.20.20', '1-80.20.20.20-1', '1-70.20.20.20-2', '1-10.20.20.20-1', '1-20.20.20.20-2', '1-30.20.20.20-2', '1-20.20.20.20-1', '十-90.100.40.20', '1-40.20.20.20-1'}


# Step 2: Build CNN Model

In [114]:
# Step 3: Build CNN Model
def build_model():
    num_classes = len(set(encoded_labels))
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(256, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_model()

# Step 4: Data Augmentation and Training
data_gen = ImageDataGenerator()

train_gen = data_gen.flow(images_train, labels_train, batch_size=32)
val_gen = data_gen.flow(images_val, labels_val, batch_size=32)

model.fit(train_gen, validation_data=val_gen, epochs=100)

# Step 5: Feature Extraction and Similarity Calculation
feature_extractor = Sequential(model.layers[:-1])  # Remove the output layer for feature extraction

def extract_features(image):
    return feature_extractor.predict(image[np.newaxis, ...])





Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300


KeyboardInterrupt: 

# Step 6: Find Similar Material

In [103]:
def format_material_details(label):
    # Parse the label to extract material details
    parts = label.split("-")  # Example: '1-20.20.20.20-1'
    material_id = parts[0]
    attributes = parts[1].split(".")
    bend_strength = attributes[0]
    strength = attributes[1]
    deformation_strength = attributes[2]
    deformation_rate = attributes[3]
    face = "正面" if parts[-1] == "1" else "背面"
    return f"材料{material_id}，弯曲强度：{bend_strength}，强度：{strength}，变形强度：{deformation_strength}，变形率：{deformation_rate}, {face}"

def find_most_similar(input_image_path):
    input_img = load_img(input_image_path, target_size=(128, 128), color_mode="grayscale")
    input_array = img_to_array(input_img) / 255.0
    input_features = extract_features(input_array)

    similarities = []
    for idx, image in enumerate(images):
        image_features = extract_features(image)
        similarity = cosine_similarity(input_features.reshape(1, -1), image_features.reshape(1, -1))[0][0]
        similarities.append((similarity, labels[idx]))

    # Sort similarities in descending order
    similarities = sorted(similarities, key=lambda x: x[0], reverse=True)

    # Extract unique labels with their percentage matches
    unique_results = []
    seen_labels = set()
    for similarity, label in similarities:
        if label not in seen_labels:
            seen_labels.add(label)
            percentage = similarity * 100  # Convert to percentage
            unique_results.append((label, percentage))
        if len(unique_results) == 3:  # Limit to top 3 unique labels
            break

    # Format results
    formatted_results = []
    for i, (label, percentage) in enumerate(unique_results, start=1):
        material_details = format_material_details(label)
        formatted_results.append(f"{i}. 相似度：{percentage:.2f}%：{material_details}")
    
    return formatted_results

In [106]:
# Path to the input test image
input_image_path = './第一组测试素材/1-10.20.20.20-2.jpg'

# Call the function to find the top 3 most similar unique materials
top_3_results = find_most_similar(input_image_path)

# Print the results
print("最相似材料：")
for result in top_3_results:
    print(result)

最相似材料：
1. 相似度：91.13%：材料1，弯曲强度：50，强度：20，变形强度：20，变形率：20, 背面
2. 相似度：75.45%：材料1，弯曲强度：10，强度：20，变形强度：20，变形率：20, 背面
3. 相似度：44.01%：材料1，弯曲强度：60，强度：20，变形强度：20，变形率：20, 背面


In [109]:
def test_folder(folder_path):
    total_images = 0
    correct_predictions = 0
    incorrect_results = []  # To log incorrect predictions

    # Loop through all images in the folder
    for file in os.listdir(folder_path):
        if file.endswith('.jpg') or file.endswith('.png'):  # Ensure only image files are processed
            total_images += 1
            image_path = os.path.join(folder_path, file)

            # Extract the true label from the filename
            true_label = file.split('-')[0]  # Assuming label is before the first '-'

            # Predict the most similar material
            top_3_results = find_most_similar(image_path)

            # Extract the predicted label from the top result
            top_result = top_3_results[0]
            predicted_label = top_result.split("：")[-1].split("，")[0].strip("材料")  # Extract the predicted label

            # Check if the prediction is correct
            if true_label == predicted_label:
                correct_predictions += 1
            else:
                incorrect_results.append((file, true_label, predicted_label))

    # Calculate accuracy
    accuracy = (correct_predictions / total_images) * 100 if total_images > 0 else 0

    # Print results
    print(f"Total Images Tested: {total_images}")
    print(f"Correct Predictions: {correct_predictions}")
    print(f"Incorrect Predictions: {len(incorrect_results)}")
    print(f"Accuracy: {accuracy:.2f}%")

    # Log incorrect results
    if incorrect_results:
        print("\nIncorrect Predictions:")
        for file, true_label, predicted_label in incorrect_results:
            print(f"File: {file}, True Label: {true_label}, Predicted Label: {predicted_label}")

    return accuracy

In [110]:
folder_path = "./第一组测试素材"
accuracy = test_folder(folder_path)
print(f"Overall Test Accuracy: {accuracy:.2f}%")

Total Images Tested: 23
Correct Predictions: 0
Incorrect Predictions: 23
Accuracy: 0.00%

Incorrect Predictions:
File: 1-10.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-100.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 正面
File: 1-10.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 正面
File: 1-100.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-90.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-60.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 正面
File: 1-90.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-60.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-80.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-70.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 背面
File: 28.20.20.20-1.jpg, True Label: 28.20.20.20, Predicted Label: 20, 正面
File: 1-80.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 背面
File: 2-0.40.20.20-1.jpg, True Label: 2, Predicted Label: 20, 正面
File: 1-70.20.20.20-