In [None]:
from google.colab import files
import os
import zipfile

# Kaggle API Key 업로드
files.upload()

# Kaggle API Key 설정
os.makedirs('/root/.kaggle', exist_ok=True)
!cp kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

# Kaggle API 설치
# !pip install kaggle

# 첫 번째 데이터셋 다운로드 및 압축 해제
# !kaggle datasets download -d nexuswho/tomatod -p /content/tomato_dataset_1
# !unzip -q /content/tomato_dataset_1/tomatod.zip -d /content/tomato_dataset_1

# 두 번째 데이터셋 다운로드 및 압축 해제
!kaggle datasets download -d nexuswho/laboro-tomato -p /content/tomato_dataset_2
!unzip -q /content/tomato_dataset_2/laboro-tomato.zip -d /content/tomato_dataset_2

# 통합할 폴더 생성
os.makedirs('/content/tomato_dataset/Images', exist_ok=True)
os.makedirs('/content/tomato_dataset/labels', exist_ok=True)

# 파일 복사 및 이름 변경 함수 수정
# def copy_and_rename_1(src_folder, dst_folder, prefix, process_labels=False):
#     for root, dirs, files in os.walk(src_folder):
#         for file in files:
#             src_path = os.path.join(root, file)
#             relative_path = os.path.relpath(root, src_folder)
#             dst_dir = os.path.join(dst_folder, os.path.dirname(relative_path))
#             os.makedirs(dst_dir, exist_ok=True)
#             dst_path = os.path.join(dst_dir, f"{prefix}_{file}")

#             if process_labels:
#                 with open(src_path, 'r') as f:
#                     lines = f.readlines()
#                 with open(dst_path, 'w') as f:
#                     for line in lines:
#                         parts = line.strip().split()
#                         class_id = int(parts[0])
#                         if class_id in [2]:
#                             parts[0] = '0'
#                         elif class_id in [1]:
#                             parts[0] = '1'
#                         else:
#                             parts[0] = '2'
#                         f.write(' '.join(parts) + '\n')
#             else:
#                 os.rename(src_path, dst_path)

def copy_and_rename_2(src_folder, dst_folder, prefix, process_labels=False):
    for root, dirs, files in os.walk(src_folder):
        for file in files:
            src_path = os.path.join(root, file)
            relative_path = os.path.relpath(root, src_folder)
            dst_dir = os.path.join(dst_folder, os.path.dirname(relative_path))
            os.makedirs(dst_dir, exist_ok=True)
            dst_path = os.path.join(dst_dir, f"{prefix}_{file}")

            if process_labels:
                with open(src_path, 'r') as f:
                    lines = f.readlines()
                with open(dst_path, 'w') as f:
                    for line in lines:
                        parts = line.strip().split()
                        class_id = int(parts[0])
                        if class_id in [0, 3]:
                            parts[0] = '0'
                        elif class_id in [1, 4]:
                            parts[0] = '1'
                        else:
                            parts[0] = '2'
                        f.write(' '.join(parts) + '\n')
            else:
                os.rename(src_path, dst_path)

# 첫 번째 데이터셋 파일 복사 및 이름 변경
# copy_and_rename_1('/content/tomato_dataset_1/images/train', '/content/tomato_dataset/Images', 'dataset1_train')
# copy_and_rename_1('/content/tomato_dataset_1/labels/train', '/content/tomato_dataset/labels', 'dataset1_train', process_labels=True)

# copy_and_rename_1('/content/tomato_dataset_1/images/val', '/content/tomato_dataset/Images', 'dataset1_val')
# copy_and_rename_1('/content/tomato_dataset_1/labels/val', '/content/tomato_dataset/labels', 'dataset1_val', process_labels=True)

# 두 번째 데이터셋 파일 복사 및 이름 변경 (train 폴더)
copy_and_rename_2('/content/tomato_dataset_2/train/images', '/content/tomato_dataset/Images', 'dataset2_train')
copy_and_rename_2('/content/tomato_dataset_2/train/labels', '/content/tomato_dataset/labels', 'dataset2_train', process_labels=True)

# 두 번째 데이터셋 파일 복사 및 이름 변경 (val 폴더)
copy_and_rename_2('/content/tomato_dataset_2/val/images', '/content/tomato_dataset/Images', 'dataset2_val')
copy_and_rename_2('/content/tomato_dataset_2/val/labels', '/content/tomato_dataset/labels', 'dataset2_val', process_labels=True)

# 결과 확인
images_count = len(os.listdir('/content/tomato_dataset/Images'))
labels_count = len(os.listdir('/content/tomato_dataset/labels'))

print(f"Total number of images: {images_count}")
print(f"Total number of labels: {labels_count}")


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/nexuswho/laboro-tomato
License(s): CC-BY-NC-SA-4.0
Downloading laboro-tomato.zip to /content/tomato_dataset_2
100% 1.53G/1.53G [00:26<00:00, 15.5MB/s]
100% 1.53G/1.53G [00:26<00:00, 63.3MB/s]
Total number of images: 804
Total number of labels: 804


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import numpy as np
import shutil  # 파일 압축을 위한 모듈
from google.colab import drive, files

# Google Drive 연동
drive.mount('/content/drive')

# 데이터 경로 설정
processed_dir = '/content/tomato_dataset'

# 데이터 증강 및 전처리를 위한 ImageDataGenerator 설정
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# DenseNet121 모델 불러오기 (ImageNet 사전 학습된 가중치 사용)
def create_model():
    conv_base = DenseNet121(weights='imagenet', include_top=False, input_shape=(256, 256, 3), pooling='avg')
    conv_base.trainable = False

    model = Sequential()
    model.add(conv_base)
    model.add(BatchNormalization())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.35))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.35))
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# 데이터 제너레이터 설정
train_generator = datagen.flow_from_directory(
    os.path.join(processed_dir, 'train'),
    target_size=(256, 256),
    batch_size=16,
    class_mode='categorical'  # 다중 분류이므로 class_mode를 categorical로 설정
)
valid_generator = datagen.flow_from_directory(
    os.path.join(processed_dir, 'val'),
    target_size=(256, 256),
    batch_size=16,
    class_mode='categorical'  # 다중 분류이므로 class_mode를 categorical로 설정
)

# 모델 생성
model = create_model()

# 체크포인트 설정
checkpoint_dir = os.path.join('/content/drive/MyDrive/checkpoints', 'best_checkpoint')
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, 'cp-{epoch:04d}.weights.h5')

# ModelCheckpoint 콜백
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='accuracy',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# EarlyStopping 설정
early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=1, restore_best_weights=True)

# 모델 학습
history = model.fit(
    train_generator,
    epochs=50,
    validation_data=valid_generator,
    callbacks=[early_stopping, model_checkpoint]
)

# 검증 정확도 평가
loss, accuracy = model.evaluate(valid_generator)
print(f"Validation Accuracy: {accuracy}")

# 가장 좋은 모델 저장
best_model_path = os.path.join('/content/drive/MyDrive/checkpoints', 'best_model.keras')
model.save(best_model_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 2530 images belonging to 3 classes.
Found 465 images belonging to 3 classes.
Epoch 1/50
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308ms/step - accuracy: 0.4310 - loss: 1.2920
Epoch 1: accuracy improved from -inf to 0.49526, saving model to /content/drive/MyDrive/checkpoints/best_checkpoint/cp-0001.weights.h5
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 440ms/step - accuracy: 0.4314 - loss: 1.2911 - val_accuracy: 0.6731 - val_loss: 0.7745
Epoch 2/50
[1m158/159[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 262ms/step - accuracy: 0.6108 - loss: 0.8717
Epoch 2: accuracy improved from 0.49526 to 0.64585, saving model to /content/drive/MyDrive/checkpoints/best_checkpoint/cp-0002.weights.h5
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 324ms/step - accuracy: 0.6112 - loss: 0.8708 - val

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>