<a href="https://colab.research.google.com/github/sazaqa0901/test/blob/main/deepfake_resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

-중간발표 전에 짠 코드
-데이터 분할 동욱이가 짠 코드 썼어

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
zip_file_name = '/content/drive/MyDrive/기학기/Dataset.zip'
extraction_dir = '/content/dataset'

with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extraction_dir)

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (GlobalAveragePooling2D, Dense, Dropout)
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import os
import glob
import numpy as np
from sklearn.model_selection import train_test_split

In [4]:
MODEL_SAVE_PATH = '/content/drive/MyDrive/기학기/best_alexnet_model.h5'
face_real_dir = '/content/dataset/Dataset/Train/Real'
face_fake_dir = '/content/dataset/Dataset/Train/Fake'

real_paths = glob.glob(os.path.join(face_real_dir, "*.*"))
fake_paths = glob.glob(os.path.join(face_fake_dir, "*.*"))

all_paths = real_paths + fake_paths
labels = [0] * len(real_paths) + [1] * len(fake_paths)

In [5]:
IMG_SIZE = 224
EPOCHS = 20
BATCH_SIZE = 64
NUM_SAMPLES = 1000 # 사용할 총 샘플 수
AUTOTUNE = tf.data.AUTOTUNE

In [6]:
 #--- 데이터 개수 샘플링 ---
_, target_paths, _, target_labels = train_test_split(
        all_paths, labels,
        test_size=NUM_SAMPLES,
        random_state=42,
        stratify=labels
    )
# --- 7 : 3으로 분할 ---
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
        target_paths, target_labels,
        test_size=0.3,
        random_state=42,
        stratify=target_labels
    )
# --- 2 : 1로 분할 ---
val_paths, test_paths, val_labels, test_labels = train_test_split(
        temp_paths, temp_labels,
        test_size=(1/3),
        random_state=42,
        stratify=temp_labels
    )

In [7]:
def load_and_resize_images(image_path, label):
    image = tf.io.read_file(image_path)             #이미지 읽어오기
    image = tf.image.decode_jpeg(image, channels=3) #jpg
    image = tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE) #resize+padding(이미지 비율 유지)
    image = tf.cast(image, tf.float32) / 255.0      #정규화
    return image, label

#데이터 증강
def data_augmentation(image, label):
    image = tf.image.random_flip_left_right(image)               #좌우 반전
    image = tf.image.random_brightness(image, max_delta=0.2)     #밝기 조절
    image = tf.image.random_contrast(image, 0.8, 1.2)            #대비 조절
    #image = tf.image.random_crop(image, size=[IMG_SIZE, IMG_SIZE, 3])  #무작위로 자르기
    return image, label

def create_dataset(paths, labels, is_training=True):
    """
    이미지 경로 리스트로부터 tf.data.Dataset 파이프라인을 생성합니다.
    """
    # 1. 경로 리스트로부터 데이터셋 생성
    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    # 2. 실시간 로드 및 전처리 (병렬 처리)
    dataset = dataset.map(load_and_resize_images, num_parallel_calls=AUTOTUNE)
    if is_training:
        # 3. 학습용 데이터셋: 셔플 및 데이터 증강
        dataset = dataset.shuffle(buffer_size=len(paths))
        # 데이터 증강
        dataset = dataset.map(data_augmentation, num_parallel_calls=AUTOTUNE)
    # 4. 배치 나누기
    dataset = dataset.batch(BATCH_SIZE)
    # 5. Prefetch: GPU가 연산하는 동안 CPU가 다음 배치를 준비
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)

    return dataset

train_ds = create_dataset(train_paths, train_labels, is_training=True)
val_ds = create_dataset(val_paths, val_labels, is_training=False)
test_ds = create_dataset(test_paths, test_labels, is_training=False)

In [8]:
#resnet
base_model = ResNet50(
    weights='imagenet',    #ImageNet 사전학습 가중치
    include_top=False,     #분류층 제외함
    input_shape=(224, 224, 3)
)

#사전학습된 가중치(나중에 fine-tuning?)
#base_model.trainable = False

#마지막 20~40 layers만 풀어서 재학습
base_model.trainable = True
for layer in base_model.layers[:-40]:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)

x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

#컴파일
model.compile(
    optimizer=RMSprop(learning_rate=le-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

#콜백
callbacks = [ModelCheckpoint(MODEL_SAVE_PATH,
                             monitor='val_loss',
                             save_best_only=True,
                             mode='min'),
             EarlyStopping( #val_loss 개선X -> 학습 중단
                            monitor='val_loss',
                            patience=5,
                            restore_best_weights=True)
            ]

#정확도 기준
#EarlyStopping(
#    monitor='val_accuracy',
#    patience=5,
#    mode='max',
#    restore_best_weights=True,
#)

# 모델학습
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

print("✅ 학습 완료!")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.4990 - loss: 0.8891 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 23s/step - accuracy: 0.4997 - loss: 0.8899 - val_accuracy: 0.5000 - val_loss: 0.6977
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 20s/step - accuracy: 0.5522 - loss: 0.7216 - val_accuracy: 0.5000 - val_loss: 0.6983
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.6426 - loss: 0.6594 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 21s/step - accuracy: 0.6420 - loss: 0.6601 - val_accuracy: 0.5150 - val_loss: 0.6926
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.6624 - loss: 0.6146 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 21s/step - accuracy: 0.6606 - loss: 0.6164 - val_accuracy: 0.5150 - val_loss: 0.6921
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18s/step - accuracy: 0.6588 - loss: 0.6094 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 22s/step - accuracy: 0.6607 - loss: 0.6077 - val_accuracy: 0.5350 - val_loss: 0.6907
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 21s/step - accuracy: 0.6635 - loss: 0.6290 - val_accuracy: 0.5000 - val_loss: 0.7033
Epoch 7/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.7156 - loss: 0.5665 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 22s/step - accuracy: 0.7174 - loss: 0.5637 - val_accuracy: 0.5000 - val_loss: 0.6898
Epoch 8/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.7340 - loss: 0.5271 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 21s/step - accuracy: 0.7331 - loss: 0.5283 - val_accuracy: 0.5200 - val_loss: 0.6881
Epoch 9/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.6996 - loss: 0.5595 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 20s/step - accuracy: 0.7015 - loss: 0.5582 - val_accuracy: 0.5250 - val_loss: 0.6808
Epoch 10/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 21s/step - accuracy: 0.7370 - loss: 0.5117 - val_accuracy: 0.5250 - val_loss: 0.7128
Epoch 11/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.7487 - loss: 0.4651 



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 21s/step - accuracy: 0.7499 - loss: 0.4642 - val_accuracy: 0.5850 - val_loss: 0.6518
Epoch 12/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 21s/step - accuracy: 0.7866 - loss: 0.4538 - val_accuracy: 0.5300 - val_loss: 0.9474
Epoch 13/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 20s/step - accuracy: 0.7252 - loss: 0.5236 - val_accuracy: 0.5300 - val_loss: 1.3009
Epoch 14/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 21s/step - accuracy: 0.8090 - loss: 0.4043 - val_accuracy: 0.5250 - val_loss: 2.0173
Epoch 15/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 21s/step - accuracy: 0.8065 - loss: 0.4525 - val_accuracy: 0.5400 - val_loss: 1.5894
Epoch 16/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 21s/step - accuracy: 0.7802 - loss: 0.4134 - v

In [10]:
# 테스트 평가
from tensorflow.keras.models import load_model

try:
    best_model = load_model(MODEL_SAVE_PATH)
    best_model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])
    print("저장된 모델 로드 성공.")
    test_loss, test_accuracy = best_model.evaluate(test_ds)
    print(f"\n===== 최종 테스트 결과 =====")
    print(f"  Test Loss: {test_loss:.4f}")
    print(f"  Test Accuracy: {test_accuracy * 100:.2f}%")
except Exception as e:
    print(f"모델 로드 또는 평가 중 오류 발생: {e}")



저장된 모델 로드 성공.
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 6s/step - accuracy: 0.6217 - loss: 0.6336

===== 최종 테스트 결과 =====
  Test Loss: 0.6430
  Test Accuracy: 62.00%
