In [12]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics.pairwise import cosine_similarity
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import random
import shutil
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.utils.class_weight import compute_class_weight

# Step 1: Load and Preprocess Images

In [4]:
import imghdr

def load_images_from_folder(folder):
    images = []
    labels = []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if os.path.isdir(label_path):
            for file in os.listdir(label_path):
                file_path = os.path.join(label_path, file)
                # Check if the file is a valid image
                if imghdr.what(file_path) in ['jpeg', 'png', 'jpg']:
                    try:
                        img = load_img(file_path, target_size=(128, 128), color_mode="grayscale")
                        img_array = img_to_array(img) / 255.0  # Normalize pixel values
                        images.append(img_array)
                        labels.append(label)
                    except Exception as e:
                        print(f"Error loading image {file_path}: {e}")
    return np.array(images), np.array(labels)


# Load data
data_folder = './Materials_data'
images, labels = load_images_from_folder(data_folder)

# Shuffle data to ensure proper distribution
images, labels = shuffle(images, labels, random_state=42)

# Encode string labels to integers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the encoded labels
images_train, images_val, labels_train, labels_val = train_test_split(
    images, encoded_labels, test_size=0.2, random_state=42, stratify=encoded_labels
)

# Verify class distribution
print("Training set class distribution:", Counter(labels_train))
print("Validation set class distribution:", Counter(labels_val))



Training set class distribution: Counter({0: 37, 5: 33, 12: 33, 7: 33, 1: 33, 13: 32, 18: 29, 2: 29, 15: 29, 19: 29, 11: 29, 16: 29, 8: 28, 14: 28, 9: 28, 17: 26, 6: 26, 10: 26, 20: 24, 4: 24, 21: 24, 3: 22, 89: 20, 90: 19, 48: 16, 118: 16, 40: 16, 24: 16, 25: 16, 44: 16, 79: 16, 149: 16, 173: 16, 97: 16, 144: 16, 54: 16, 102: 16, 129: 16, 99: 16, 101: 16, 103: 16, 60: 16, 160: 16, 168: 16, 68: 16, 147: 16, 72: 16, 83: 16, 94: 16, 51: 16, 122: 16, 70: 16, 176: 16, 56: 16, 152: 16, 136: 16, 123: 16, 110: 16, 114: 16, 171: 16, 28: 16, 178: 16, 62: 16, 157: 16, 64: 16, 87: 16, 159: 16, 166: 16, 132: 16, 45: 16, 151: 16, 91: 16, 146: 16, 52: 16, 76: 16, 170: 16, 109: 16, 27: 16, 115: 16, 80: 16, 141: 16, 153: 16, 108: 16, 42: 16, 165: 16, 150: 16, 67: 16, 111: 16, 112: 16, 161: 16, 142: 16, 140: 16, 26: 16, 116: 16, 22: 16, 172: 16, 75: 16, 78: 16, 119: 16, 57: 16, 92: 16, 177: 16, 30: 16, 128: 16, 117: 16, 105: 16, 138: 16, 69: 16, 86: 16, 47: 16, 98: 16, 63: 16, 156: 16, 95: 16, 100: 16,

In [5]:
images, labels = load_images_from_folder(data_folder)
print(f"Number of images: {len(images)}")
print(f"Number of labels: {len(labels)}")
print(f"Unique labels: {set(labels)}")

Number of images: 3938
Number of labels: 3938
Unique labels: {'11-60.20.60.20', '6-50.20.40.20', '2-100.40.20.20', '6-80.20.40.20', '13-30.60.60.200', '10-10.100.40.20', '8-10.60.40.20', '1-90.20.20.20-2', '1-60.20.20.20-2', '12-100.40.60.20', '13-10.60.60.20', '13-20.60.60.20', '3-90.60.20.20', '8-90.60.40.20', '7-80.40.40.20', '9-30.80.40.20', '4-50.80.20.20', '4-0.80.20.20', '5-40.100.20.20', '6-0.20.40.20', '11-90.20.60.20', '8-20.60.40.20', '3-100.60.20.20', '15-0.100.60.20', '15-80.100.60.20', '10-40.100.40.20', '13-0.60.60.20', '10-60.100.40.20', '2-40.40.20.20', '4-60.80.20.20', '8-50.60.40.20', '13-90.60.60.20', '14-60.80.60.20', '5-80.100.20.20', '1-30.20.20.20-1', '3-20.60.20.20', '1-90.20.20.20-1', '1-20.20.20.20-2', '3-60.60.20.20', '2-90.40.20.20', '7-90.40.40.20', '2-60.40.20.20', '5-10.100.20.20', '9-40.80.40.20', '1-70.20.20.20.1', '9-80.80.40.20', '13-60.60.60.20', '14-0.80.60.20', '1-0.20.20.20-1', '12-10.40.60.20', '12-20.40.60.20', '7-60.40.40.20', '2-20.40.20.20',

# Step 2: Build CNN Model

In [18]:
# Step 3: Build CNN Model
def build_model():
    num_classes = len(set(encoded_labels))
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(256, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.00005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_model()

# Step 4: Data Augmentation and Training
data_gen = ImageDataGenerator()

train_gen = data_gen.flow(images_train, labels_train, batch_size=32)
val_gen = data_gen.flow(images_val, labels_val, batch_size=32)

model.fit(train_gen, validation_data=val_gen, epochs=600)

# Step 5: Feature Extraction and Similarity Calculation
feature_extractor = Sequential(model.layers[:-1])  # Remove the output layer for feature extraction

def extract_features(image):
    return feature_extractor.predict(image[np.newaxis, ...])





Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

# Step 6: Find Similar Material

In [119]:
def format_material_details(label):
    # Parse the label to extract material details
    parts = label.split("-")  # Example: '1-20.20.20.20-1'
    material_id = parts[0]
    attributes = parts[1].split(".")
    bend_strength = attributes[0]
    strength = attributes[1]
    deformation_strength = attributes[2]
    deformation_rate = attributes[3]
    face = "正面" if parts[-1] == "1" else "背面"
    return f"材料{material_id}，弯曲强度：{bend_strength}，强度：{strength}，变形强度：{deformation_strength}，变形率：{deformation_rate}, {face}"

def find_most_similar(input_image_path):
    input_img = load_img(input_image_path, target_size=(128, 128), color_mode="grayscale")
    input_array = img_to_array(input_img) / 255.0
    input_features = extract_features(input_array)

    similarities = []
    for idx, image in enumerate(images):
        image_features = extract_features(image)
        similarity = cosine_similarity(input_features.reshape(1, -1), image_features.reshape(1, -1))[0][0]
        similarities.append((similarity, labels[idx]))

    # Sort similarities in descending order
    similarities = sorted(similarities, key=lambda x: x[0], reverse=True)

    # Extract unique labels with their percentage matches
    unique_results = []
    seen_labels = set()
    for similarity, label in similarities:
        if label not in seen_labels:
            seen_labels.add(label)
            percentage = similarity * 100  # Convert to percentage
            unique_results.append((label, percentage))
        if len(unique_results) == 3:  # Limit to top 3 unique labels
            break

    # Format results
    formatted_results = []
    for i, (label, percentage) in enumerate(unique_results, start=1):
        material_details = format_material_details(label)
        formatted_results.append(f"{i}. 相似度：{percentage:.2f}%：{material_details}")
    
    return formatted_results

In [1]:
# Path to the input test image
result_string = ""
for path in os.listdir('./第一组测试素材'):
    input_image_path = os.path.join('./第一组测试素材', path)
    # Call the function to find the top 3 most similar unique materials
    top_3_results = find_most_similar(input_image_path)

    # Print the results
    result_string += "True label:" + path + "\n"
    result_string += "最相似材料:"
    for result in top_3_results:
        result_string += result
    result_string += "\n"

print(result_string)
# input_image_path = './第一组测试素材/1-10.20.20.20-2.jpg'

# # Call the function to find the top 3 most similar unique materials
# top_3_results = find_most_similar(input_image_path)

# # Print the results
# print("最相似材料：")
# for result in top_3_results:
#     print(result)

NameError: name 'os' is not defined

In [109]:
def test_folder(folder_path):
    total_images = 0
    correct_predictions = 0
    incorrect_results = []  # To log incorrect predictions

    # Loop through all images in the folder
    for file in os.listdir(folder_path):
        if file.endswith('.jpg') or file.endswith('.png'):  # Ensure only image files are processed
            total_images += 1
            image_path = os.path.join(folder_path, file)

            # Extract the true label from the filename
            true_label = file.split('-')[0]  # Assuming label is before the first '-'

            # Predict the most similar material
            top_3_results = find_most_similar(image_path)

            # Extract the predicted label from the top result
            top_result = top_3_results[0]
            predicted_label = top_result.split("：")[-1].split("，")[0].strip("材料")  # Extract the predicted label

            # Check if the prediction is correct
            if true_label == predicted_label:
                correct_predictions += 1
            else:
                incorrect_results.append((file, true_label, predicted_label))

    # Calculate accuracy
    accuracy = (correct_predictions / total_images) * 100 if total_images > 0 else 0

    # Print results
    print(f"Total Images Tested: {total_images}")
    print(f"Correct Predictions: {correct_predictions}")
    print(f"Incorrect Predictions: {len(incorrect_results)}")
    print(f"Accuracy: {accuracy:.2f}%")

    # Log incorrect results
    if incorrect_results:
        print("\nIncorrect Predictions:")
        for file, true_label, predicted_label in incorrect_results:
            print(f"File: {file}, True Label: {true_label}, Predicted Label: {predicted_label}")

    return accuracy

In [110]:
folder_path = "./第一组测试素材"
accuracy = test_folder(folder_path)
print(f"Overall Test Accuracy: {accuracy:.2f}%")

Total Images Tested: 23
Correct Predictions: 0
Incorrect Predictions: 23
Accuracy: 0.00%

Incorrect Predictions:
File: 1-10.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-100.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 正面
File: 1-10.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 正面
File: 1-100.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-90.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-60.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 正面
File: 1-90.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-60.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-80.20.20.20-2.jpg, True Label: 1, Predicted Label: 20, 背面
File: 1-70.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 背面
File: 28.20.20.20-1.jpg, True Label: 28.20.20.20, Predicted Label: 20, 正面
File: 1-80.20.20.20-1.jpg, True Label: 1, Predicted Label: 20, 背面
File: 2-0.40.20.20-1.jpg, True Label: 2, Predicted Label: 20, 正面
File: 1-70.20.20.20-