In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/
!git clone https://{token}@github.com/kytomic/fake-image-classification.git
# Please Enter Your GitHub Token for Cloning the Project
token = ''


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/fake-image-classification/

In [None]:
import torch
from torch import nn
import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

In [None]:
# Using GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

# Data Preparation
## real vs fake images (casia dataset)
https://www.kaggle.com/code/shaft49/real-vs-fake-images-casia-dataset

Please download the dataset from the link on the discord forum as there is requirements for the file structure.

In [None]:
import requests
import zipfile
from pathlib import Path

# Setup path to data folder
data_path = Path("data/")
image_path = data_path / "real_and_fake_face"

if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    print(f"Did not find {image_path} directory")

In [None]:
train_dir = image_path / "training"
test_dir = image_path / "test"

train_dir, test_dir

In [None]:
import random
from PIL import Image

random.seed(42)
image_path_list = list(image_path.glob("*/*/*.jpg"))
random_image_path = random.choice(image_path_list)
image_class = random_image_path.parent.stem
img = Image.open(random_image_path)


print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
img

In [None]:
data_transform = transforms.Compose([
    transforms.Resize(size=(256, 256)),
    transforms.ToTensor()
])

In [None]:
train_data = datasets.ImageFolder(root=train_dir, transform=data_transform, target_transform=None)
test_data = datasets.ImageFolder(root=test_dir, transform=data_transform)

In [None]:
class_names = train_data.classes
class_names

In [None]:
train_dataloader = DataLoader(dataset=train_data,
                              batch_size=32,
                              num_workers=1,
                              shuffle=True)

test_dataloader = DataLoader(dataset=test_data,
                             batch_size=32,
                             num_workers=1,
                             shuffle=False)

# Model Creation


In [None]:
class FakeImageModel(nn.Module):
  def __init__(self, input_shape, hidden_units, output_shape):
    super().__init__()
    self.block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2, 2)
    )
    self.block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2, 2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*64*64, out_features=output_shape)
    )

  def xavier_initialization(self, m):
    if isinstance(m, nn.Conv2d):
      nn.init.xavier_uniform(m.weight)
      m.bias.data.fill_(0.01)

  def init_weights(self):
    self.block_1.apply(self.xavier_initialization)
    self.block_2.apply(self.xavier_initialization)


  def forward(self, x):
    out = self.block_1(x)
    # print('Block 1: ', out.shape)

    out = self.block_2(out)
    # print('Block 2: ', out.shape)

    out = self.classifier(out)
    # print('Classifier: ', out.shape)

    return out

torch.manual_seed(42)
model = FakeImageModel(input_shape=3, hidden_units=16, output_shape=len(class_names))
model.init_weights()
model

In [None]:
class FakeImageModel(nn.Module):
  def __init__(self, input_shape, hidden_units, output_shape):
    super().__init__()
    self.block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.LeakyReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.LeakyReLU(),
    )
    self.block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.LeakyReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        # nn.ReLU(),
    )
    self.block_3 = nn.Sequential(
        nn.LeakyReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=2, stride=2),
        nn.LeakyReLU(),
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*32*32, out_features=output_shape)
    )

  def xavier_initialization(self, m):
    if isinstance(m, nn.Conv2d):
      nn.init.xavier_uniform(m.weight)
      m.bias.data.fill_(0.01)

  def init_weights(self):
    self.block_1.apply(self.xavier_initialization)
    self.block_2.apply(self.xavier_initialization)
    self.block_3.apply(self.xavier_initialization)


  def forward(self, x):
    out = self.block_1(x)
    # print('Block 1: ', out.shape)

    residual = out
    out = self.block_2(out)
    out += residual
    out = self.block_3(out)

    # print('Block 2: ', out.shape)
    out = self.block_2(out)
    out = self.block_3(out)

    # print('Block 2: ', out.shape)
    residual = out
    out = self.block_2(out)
    out += residual
    out = self.block_3(out)

    # print('Second Block 2: ', out.shape)
    out = self.classifier(out)
    # print('Classifier: ', out.shape)

    return out

torch.manual_seed(42)
model = FakeImageModel(input_shape=3, hidden_units=10, output_shape=len(class_names))
model.init_weights()
model

# Training and Evaluation

In [None]:
def accuracy(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100

    return acc

In [None]:
# Setup loss function and optimization algorithm
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), 0.1)

In [None]:
# Training Loop
from tqdm.auto import tqdm
torch.manual_seed(42)
epochs = 10
i = 0

for epoch in tqdm(range(epochs)):
  print(f'Epoch: {epoch} -----')
  train_loss, train_acc = 0, 0

  for batch, (X, y) in enumerate(train_dataloader):
    model.train()
    y_pred = model(X)
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()
    train_acc += accuracy(y_true=y, y_pred=y_pred.argmax(dim=1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 400 == 0:
      print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)
  print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%\n")

  model.eval()
  with torch.inference_mode():
    test_loss, test_acc = 0.0, 0.0

    for X, y in test_dataloader:
      test_pred = model(X)
      test_loss += loss_fn(test_pred, y).item()
      test_acc += accuracy(y_true=y, y_pred=test_pred.argmax(dim=1))

    test_loss /= len(test_dataloader)
    test_acc /= len(test_dataloader)
    print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [None]:
model.eval()
with torch.inference_mode():
  test_loss, test_acc = 0.0, 0.0
  for X, y in test_dataloader:
    test_pred = model(X)
    test_loss += loss_fn(test_pred, y)
    test_acc += accuracy(y_true=y, y_pred=test_pred.argmax(dim=1))

  test_loss /= len(test_dataloader)
  test_acc /= len(test_dataloader)
  print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [None]:
from pathlib import Path

# Create models directory (if it doesn't already exist)
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# Create model save path
MODEL_NAME = "fake-image-model.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model.state_dict(), f=MODEL_SAVE_PATH)

In [None]:
# Note: loading model will error if the shapes here aren't the same as the saved version
loaded_model = FakeImageModel(input_shape=3, hidden_units=10, output_shape=len(class_names))

# Load in the saved state_dict()
loaded_model.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

# Send model to GPU
loaded_model = loaded_model.to(device)

In [None]:
!git status