In [None]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50

In [None]:
# Define directories for train, test, and validation data
train_dir = '/content/drive/MyDrive/MIS 548 Porject Dataset/Train/'
test_dir = '/content/drive/MyDrive/MIS 548 Porject Dataset/Test/'
val_dir = '/content/drive/MyDrive/MIS 548 Porject Dataset/Validation/'

In [None]:
# List files in the train directory
print("Files in train directory:")
print(os.listdir(train_dir))

# List files in the test directory
print("Files in test directory:")
print(os.listdir(test_dir))

# List files in the validation directory
print("Files in validation directory:")
print(os.listdir(val_dir))

Files in train directory:
['Fake', 'Real']
Files in test directory:
['Real', 'Fake']
Files in validation directory:
['Fake', 'Real']


In [None]:
# Count the number of files in each subdirectory of the train directory
train_real_files = len(os.listdir(os.path.join(train_dir, 'Real')))
train_fake_files = len(os.listdir(os.path.join(train_dir, 'Fake')))

# Count the number of files in each subdirectory of the train directory
test_real_files = len(os.listdir(os.path.join(test_dir, 'Real')))
test_fake_files = len(os.listdir(os.path.join(test_dir, 'Fake')))

# Count the number of files in each subdirectory of the train directory
val_real_files = len(os.listdir(os.path.join(val_dir, 'Real')))
val_fake_files = len(os.listdir(os.path.join(val_dir, 'Fake')))

# Print the counts
print("Number of real images in train directory:", train_real_files)
print("Number of fake images in train directory:", train_fake_files)

# Print the counts
print("Number of real images in train directory:", test_real_files)
print("Number of fake images in train directory:", test_fake_files)

# Print the counts
print("Number of real images in train directory:", val_real_files)
print("Number of fake images in train directory:", val_fake_files)


In [None]:
# Randomly sample 5,000 images from each folder
train_images = random.sample(os.listdir(train_dir), 10000)
test_images = random.sample(os.listdir(test_dir), 2000)
val_images = random.sample(os.listdir(val_dir), 2000)

ValueError: Sample larger than population or is negative

In [None]:
# Define image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 32

In [None]:
# Create data generators for train, test, and validation data
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({"filename": train_images}),
    directory=train_dir,
    x_col="filename",
    y_col=None,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=True
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({"filename": test_images}),
    directory=test_dir,
    x_col="filename",
    y_col=None,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({"filename": val_images}),
    directory=val_dir,
    x_col="filename",
    y_col=None,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)

In [None]:
# Load the pre-trained ResNet50 model without the top classification layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

In [None]:
# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Add custom classification layers on top of ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

In [None]:
# Define the model
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_images) // batch_size,
    epochs=10,
    validation_data=val_generator,
    validation_steps=len(val_images) // batch_size
)

In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(test_generator)
print("Test Accuracy:", test_accuracy)

分隔線

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.resnet.preprocess_input(image)
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the ResNet model architecture
base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=10,
          validation_data=val_dataset)

Epoch 1/10


In [None]:
# Save the model
model.save("resnet_model.h5")

  saving_api.save_model(


In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.9596732258796692
Test Accuracy: 0.7229999899864197


分隔線 - ResNet 參數調整 1

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing and augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_dataset = train_datagen.flow_from_directory(
    os.path.join(data_dir, "Train"),
    #target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=True
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input
)

val_dataset = val_datagen.flow_from_directory(
    os.path.join(data_dir, "Validation"),
    #target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

Found 140110 images belonging to 2 classes.
Found 39428 images belonging to 2 classes.


In [None]:
# Define the ResNet model architecture
base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Fine-tune the model
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Compile the model with a custom learning rate scheduler
initial_learning_rate = 0.001
epochs = 10

lr_schedule = LearningRateScheduler(lambda epoch: initial_learning_rate * 0.9 ** epoch)

model.compile(optimizer=Adam(learning_rate=initial_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=epochs,
          validation_data=val_dataset,
          callbacks=[lr_schedule])

Epoch 1/10
 577/4379 [==>...........................] - ETA: 25:48:20 - loss: 0.5019 - accuracy: 0.7506

In [None]:
# Evaluate the model
test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input
)

test_dataset = test_datagen.flow_from_directory(
    os.path.join(data_dir, "Test"),
    #target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


分隔線 - ResNet 參數調整 2

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing and augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_dataset = train_datagen.flow_from_directory(
    os.path.join(data_dir, "Train"),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=True
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input
)

val_dataset = val_datagen.flow_from_directory(
    os.path.join(data_dir, "Validation"),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

Found 140110 images belonging to 2 classes.
Found 39428 images belonging to 2 classes.


In [None]:
# Define the ResNet model architecture
base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Fine-tune the model
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Compile the model with a custom learning rate scheduler and early stopping
initial_learning_rate = 0.001
epochs = 5

lr_schedule = LearningRateScheduler(lambda epoch: initial_learning_rate * 0.9 ** epoch)
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

model.compile(optimizer=Adam(learning_rate=initial_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(train_dataset,
                    epochs=epochs,
                    validation_data=val_dataset,
                    callbacks=[lr_schedule, early_stopping])

Epoch 1/5
 673/4379 [===>..........................] - ETA: 4:01:38 - loss: 0.4915 - accuracy: 0.7550

KeyboardInterrupt: 

In [None]:
# Evaluate the model
test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input
)

test_dataset = test_datagen.flow_from_directory(
    os.path.join(data_dir, "Test"),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

VGG 16

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.vgg16.preprocess_input(image)
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the VGG16 model architecture
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7aa7379b94b0>

In [None]:
# Save the model
model.save("vgg16_model.h5")

  saving_api.save_model(


In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Test Loss: 0.6931784152984619
Test Accuracy: 0.5


ResNet 101




In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet101
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.resnet.preprocess_input(image)
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the ResNet101 model architecture
base_model = ResNet101(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7aa684194ca0>

In [None]:
# Save the model
model.save("resnet101_model.h5")

In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.36679160594940186
Test Accuracy: 0.8700000047683716


Inception V3

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (299, 299))  # InceptionV3 input size
    image = tf.keras.applications.inception_v3.preprocess_input(image)
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the GoogLeNet (InceptionV3) model architecture
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7aa684282980>

In [None]:
# Save the model
model.save("inceptionv3_model.h5")

In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Test Loss: 0.4633050262928009
Test Accuracy: 0.8669999837875366


EfficientNet

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))  # EfficientNet input size
    image = tf.keras.applications.efficientnet.preprocess_input(image)
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the EfficientNet model architecture
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7be06e1f2ce0>

In [None]:
# Save the model
model.save("efficientnet_model.h5")

  saving_api.save_model(


In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.37379613518714905
Test Accuracy: 0.8809999823570251


Based on the provided output, here are some insights that can help identify which technique might be more suitable:

**Training and Validation Performance:**

The training accuracy starts at 93.45% and increases to 98.67% over the epochs.
The validation accuracy starts at 94.90% and fluctuates slightly but remains relatively high, ranging between 94.05% and 95.75%.
This indicates that the model is learning well from the training data and generalizing reasonably well to unseen validation data.

**Test Performance:**

The test accuracy is 88.10%, which is lower than the validation accuracy.
This suggests that there might be some overfitting occurring, as the model is not performing as well on unseen test data as it did on the validation set.
Given these insights, the following techniques may be suitable to improve test accuracy:

* Regularization: Since the training accuracy is significantly higher than the test accuracy, it indicates that the model might be overfitting. Regularization techniques such as dropout or weight decay can help reduce overfitting and improve generalization to the test set.
* Data Augmentation: If the validation accuracy remains stable but the test accuracy is significantly lower, it might suggest that the model is not exposed to enough variations in the training data. Data augmentation techniques can help introduce more diversity into the training data, potentially leading to better generalization to unseen test data.
* Learning Rate Scheduling: If the training and validation accuracy plateaus or fluctuates significantly, adjusting the learning rate schedule may help stabilize the training process and improve convergence, which can indirectly lead to better test accuracy.

Given these insights, a combination of regularization techniques and data augmentation may be particularly effective in improving test accuracy while maintaining the same number of sampling data.

EfficientNetB1

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB1  # Change import statement
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))  # EfficientNet input size
    image = tf.keras.applications.efficientnet.preprocess_input(image)
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the EfficientNetB1 model architecture
base_model = EfficientNetB1(weights='imagenet', include_top=False)  # Change model architecture
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb1_notop.h5


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7d1cdc692320>

In [None]:
# Save the model
model.save("efficientnetB1_model.h5")

  saving_api.save_model(


In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Test Loss: 0.8706620335578918
Test Accuracy: 0.8550000190734863


EfficientNetB2

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB2  # Change import statement
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.efficientnet.preprocess_input(image)  # Change preprocessing function
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the EfficientNetB2 model architecture
base_model = EfficientNetB2(weights='imagenet', include_top=False)  # Change model architecture
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb2_notop.h5


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7d1bb3e112a0>

In [None]:
# Save the model
model.save("efficientnet_b2_model.h5")

In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Test Loss: 0.393665075302124
Test Accuracy: 0.8769999742507935


EfficientNetB3

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3  # Update import statement
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import shuffle

In [None]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Path to the directory containing the dataset
data_dir = "/content/drive/MyDrive/MIS 548 Porject Dataset"

In [None]:
# Data sampling and loading
def sample_data(directory, sample_size):
    files = os.listdir(directory)
    sampled_files = random.sample(files, sample_size)
    return [os.path.join(directory, file) for file in sampled_files]

def load_data(directory, sample_size):
    real_samples = sample_data(os.path.join(directory, "Real"), sample_size // 2)
    fake_samples = sample_data(os.path.join(directory, "Fake"), sample_size // 2)
    all_samples = real_samples + fake_samples
    labels = [1] * (sample_size // 2) + [0] * (sample_size // 2)
    all_samples, labels = shuffle(all_samples, labels)
    return all_samples, labels

train_samples, train_labels = load_data(os.path.join(data_dir, "Train"), 10000)
val_samples, val_labels = load_data(os.path.join(data_dir, "Validation"), 2000)

In [None]:
# Data preprocessing
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.efficientnet.preprocess_input(image)  # Update preprocessing function
    return image

def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    return preprocess_image(image), label

In [None]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_samples, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image)
train_dataset = train_dataset.shuffle(buffer_size=len(train_samples)).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((val_samples, val_labels))
val_dataset = val_dataset.map(load_and_preprocess_image)
val_dataset = val_dataset.batch(32)

In [None]:
# Define the EfficientNetB3 model architecture
base_model = EfficientNetB3(weights='imagenet', include_top=False)  # Update model architecture
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_dataset,
          epochs=5,
          validation_data=val_dataset)

Epoch 1/5

In [None]:
# Save the model
model.save("efficientnet_b3_model.h5")

In [None]:
# Evaluate the model
test_samples, test_labels = load_data(os.path.join(data_dir, "Test"), 2000)
test_dataset = tf.data.Dataset.from_tensor_slices((test_samples, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.batch(32)

loss, accuracy = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)