## Разделение данных

In [None]:
import os
import shutil

source_dir = '/kaggle/input/pallet-classification/classification_dataset'
destination_dir_bottom = '/kaggle/working/pallet_bottom_dataset'
destination_dir_side = '/kaggle/working/pallet_side_dataset'

os.makedirs(destination_dir_bottom, exist_ok=True)
os.makedirs(destination_dir_side, exist_ok=True)

# Функция для копирования файлов из исходной папки в целевую
def copy_class_files(source, dest):
    for file_name in os.listdir(source):
        src_file = os.path.join(source, file_name)
        dest_file = os.path.join(dest, file_name)
        shutil.copy2(src_file, dest_file)

# Копируем файлы для pallet_bottom
bottom_classes = ['good_pallet', 'replace_pallet']
for class_name in bottom_classes:
    source_class_path = os.path.join(source_dir, 'pallet_bottom', class_name)
    dest_class_path = os.path.join(destination_dir_bottom, class_name)
    os.makedirs(dest_class_path, exist_ok=True)
    copy_class_files(source_class_path, dest_class_path)

# Копируем файлы для pallet_side
side_classes = ['good_pallet', 'replace_pallet']
for class_name in side_classes:
    source_class_path = os.path.join(source_dir, 'pallet_side', class_name)
    dest_class_path = os.path.join(destination_dir_side, class_name)
    os.makedirs(dest_class_path, exist_ok=True)
    copy_class_files(source_class_path, dest_class_path)

print("Разделение датасета завершено.")


## Балансировка классов

In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array

dataset_dir = '/kaggle/working/pallet_bottom_dataset'
class_folders = ['good_pallet', 'replace_pallet']

target_count = 50

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

for class_folder in class_folders:
    folder_path = os.path.join(dataset_dir, class_folder)
    images = os.listdir(folder_path)
    current_count = len(images)
    num_to_add = target_count - current_count

    print(f"Папка '{class_folder}', текущее количество: {current_count}, нужно добавить: {num_to_add}")

    i = 0
    while i < num_to_add:
        img_name = images[i % len(images)]
        img_path = os.path.join(folder_path, img_name)

        img = load_img(img_path)
        img_array = img_to_array(img)
        img_array = img_array.reshape((1,) + img_array.shape)

        for batch in datagen.flow(img_array, batch_size=1, save_to_dir=folder_path, save_prefix='aug', save_format='jpeg'):
            i += 1
            if i >= num_to_add:
                break

print("Аугментация завершена. Теперь в каждой папке по 50 изображений.")


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.optimizers import AdamW
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
import random


seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Using GPU: {gpus[0]}")
    except RuntimeError as e:
        print(e)
else:
    print("GPU not available, using CPU.")

data_dir = '/kaggle/working/pallet_bottom_dataset'

img_size = (224, 224)
batch_size = 2
epochs = 50

def create_data_generators(data_dir, img_size, batch_size):
    datagen = ImageDataGenerator(
        rescale=1.0 / 255,
        validation_split=0.2,
        horizontal_flip=True,
        zoom_range=0.3,
        rotation_range=30,
        brightness_range=[0.3, 1.3]
    )

    train_generator = datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='binary',
        subset='training'
    )

    val_generator = datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='binary',
        subset='validation'
    )

    return train_generator, val_generator

train_gen, val_gen = create_data_generators(data_dir, img_size, batch_size)

def build_efficientnetb3_model(input_shape, dropout_rate=0.5, l2_lambda=0.0002):
    inputs = Input(shape=input_shape)
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_tensor=inputs)

    # Определение количества слоев для разморозки
    num_layers = len(base_model.layers)
    num_unfreeze = int(num_layers * 0.25)

    # Заморозка нижних 75% слоев
    for layer in base_model.layers[:num_layers - num_unfreeze]:
        layer.trainable = False

    # Разморозка верхних 25% слоев
    for layer in base_model.layers[num_layers - num_unfreeze:]:
        layer.trainable = True

    # Добавление дополнительных слоев
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    outputs = Dense(1, activation='sigmoid', kernel_regularizer=l2(l2_lambda))(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

model = build_efficientnetb3_model(input_shape = (img_size[0], img_size[1], 3))
model.compile(
    optimizer=AdamW(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

class MetricsCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        val_gen.reset()
        val_pred_prob = self.model.predict(val_gen)
        val_pred = (val_pred_prob > 0.5).astype(int)
        val_true = val_gen.classes

        precision = precision_score(val_true, val_pred, average='binary')
        recall = recall_score(val_true, val_pred, average='binary')
        f1 = f1_score(val_true, val_pred, average='binary')
        cm = confusion_matrix(val_true, val_pred)

        print(f"\nEpoch {epoch + 1} - Additional Metrics:")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("Confusion Matrix:")
        print(cm)
        print(f"False Negatives (FN): {cm[1, 0]}, False Positives (FP): {cm[0, 1]}")

history = model.fit(
    train_gen,
    epochs=epochs,
    validation_data=val_gen,
    callbacks=[MetricsCallback()]
)

val_gen.reset()
val_pred_prob = model.predict(val_gen)
val_pred = (val_pred_prob > 0.5).astype(int)
val_true = val_gen.classes

print("\nFinal Classification Report:")
print(classification_report(val_true, val_pred, target_names=['good_pallet', 'replace_pallet']))


In [None]:
import keras
keras.saving.save_model(model, 'pallet_bottom_classifier_mn_optuna.h5')