In [4]:
import os
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50V2, EfficientNetB0, InceptionV3
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, GlobalAveragePooling2D, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

import seaborn as sns
import gc

In [5]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [6]:
# GPU 설정
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=7680)]  # 7.5GB
        )
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

print("GPU Available: ", tf.config.list_physical_devices('GPU'))

# 메모리 정리 콜백
class MemoryCleanupCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        gc.collect()
        tf.keras.backend.clear_session()

1 Physical GPUs, 1 Logical GPUs
GPU Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [8]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

def load_all_data(base_dir, img_size=(299, 299)):
    images = []
    labels = []
    categories = ['NORMAL', 'PNEUMONIA']
    
    for category in categories:
        class_num = categories.index(category)
        for subset in ['train', 'test', 'val']:
            subset_dir = os.path.join(base_dir, subset, category)
            for img in os.listdir(subset_dir):
                try:
                    img_path = os.path.join(subset_dir, img)
                    img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    new_array = cv2.resize(img_array, img_size)
                    images.append(new_array)
                    labels.append(class_num)
                except Exception as e:
                    print(f"Error loading image {img}: {e}")
    
    return np.array(images), np.array(labels)

def split_data(X, y, test_size=0.2, val_size=0.2):
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=test_size, stratify=y, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=val_size / (1 - test_size), stratify=y_train_val, random_state=42)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [9]:
base_dir = "datasets/Pneumonia" # 폴더 접근 경로
X, y = load_all_data(base_dir,)  # X에는 모든 이미지, y에는 모든 이미지에 대한 label종류(Class 종류)
X_train, y_train, X_val, y_val, X_test, y_test = split_data(X, y)

In [10]:
print(f"Train data shape: {X_train.shape}, Train labels shape: {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, Validation labels shape: {y_val.shape}")
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")

Train data shape: (3513, 299, 299), Train labels shape: (3513,)
Validation data shape: (1171, 299, 299), Validation labels shape: (1171,)
Test data shape: (1172, 299, 299), Test labels shape: (1172,)


In [12]:
def check_class_distribution(y):
    unique, counts = np.unique(y, return_counts=True)
    for cls, count in zip(unique, counts):
        print(f"Class {cls}: {count} ({count/len(y)*100:.2f}%)")

print("\nTrain set class distribution:")
check_class_distribution(y_train)
print("\nValidation set class distribution:")
check_class_distribution(y_val)
print("\nTest set class distribution:")
check_class_distribution(y_test)


Train set class distribution:
Class 0: 950 (27.04%)
Class 1: 2563 (72.96%)

Validation set class distribution:
Class 0: 316 (26.99%)
Class 1: 855 (73.01%)

Test set class distribution:
Class 0: 317 (27.05%)
Class 1: 855 (72.95%)


In [14]:
X_train = X_train.reshape(X_train.shape[0], 299, 299, 1).astype('float32') / 255
X_val = X_val.reshape(X_val.shape[0], 299, 299, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 299, 299, 1).astype('float32') / 255

In [21]:
from tensorflow.keras.utils import to_categorical
# 레이블을 원-핫 인코딩으로 변환
y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

In [22]:
# Vanilla CNN
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(299,299,1)),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    Flatten(), # 가장 큰 특징들을 모아, 완전 연결 계층으로 보내기 위해 1차원으로 펼치는 작업
    Dense(64, activation='relu'),
    Dropout(.5), # general case: 0.2~0.3
    Dense(2, activation='softmax')  # 출력값 2개
])

In [23]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [24]:
history = model.fit(X_train,y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(X_val, y_val),
                    verbose=1)

Epoch 1/10


2024-07-08 17:29:31.393028: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 309ms/step - accuracy: 0.7144 - loss: 0.6121 - val_accuracy: 0.7301 - val_loss: 1.3017
Epoch 2/10
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 284ms/step - accuracy: 0.7310 - loss: 0.6085 - val_accuracy: 0.7301 - val_loss: 0.6492
Epoch 3/10
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 281ms/step - accuracy: 0.7322 - loss: 0.5889 - val_accuracy: 0.7301 - val_loss: 0.6431
Epoch 4/10
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 282ms/step - accuracy: 0.7380 - loss: 0.5855 - val_accuracy: 0.7301 - val_loss: 0.6336
Epoch 5/10
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 279ms/step - accuracy: 0.7374 - loss: 0.5843 - val_accuracy: 0.7301 - val_loss: 0.6530
Epoch 6/10
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 280ms/step - accuracy: 0.7373 - loss: 0.5836 - val_accuracy: 0.7301 - val_loss: 0.6499
Epoch 7/10
[1m110/11