<a href="https://colab.research.google.com/github/skj092/Real-vs-Fake-image/blob/main/Fake_Image_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading the Dataset

In [23]:
# %%bash
!mkdir ~/.kaggle

!chmod 600 /root/.kaggle/kaggle.json

!cp kaggle.json ~/.kaggle

!kaggle datasets download -d sophatvathana/casia-dataset

mkdir: cannot create directory ‘/root/.kaggle’: File exists
casia-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [2]:
!unzip -q /content/casia-dataset.zip

# Data Preprocessing

In [14]:
# converting image to ela image
from glob import glob 
from tqdm import tqdm
from PIL import Image, ImageChops, ImageEnhance
import os 

def convert_to_ela_image(image_path, quality=90):
    """Converts an image to an ELA image.
    :param image_path: Path to the image
    :param quality: Quality of the image to be saved
    :return: ELA image
    """
    # Save the image at the given quality
    temp_file = 'temp.jpg'
    im = Image.open(image_path)
    im.save(temp_file, 'JPEG', quality=quality)

    # Open the saved image and the original image
    saved = Image.open(temp_file)
    orignal = Image.open(image_path)

    # Find the absolute difference between the images
    diff = ImageChops.difference(orignal, saved)

    # Normalize the difference by multiplying with a scale factor and convert to grayscale
    extrema = diff.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    scale = 255.0 / max_diff
    diff = ImageEnhance.Brightness(diff).enhance(scale)

    # Remove the temporary file
    os.remove(temp_file)

    return diff

In [19]:
# all iamges
images = glob('/content/CASIA2/*/*.jpg')
print('Total images: ', len(images))

Total images:  9418


In [22]:
# converting images to ela images
for image in tqdm(images):
    # converting image to ela image
    ela_image = convert_to_ela_image(image)
    # creating a new directory for ela images
    if not os.path.exists('/content/CASIA2_ELA'):
      os.mkdir('/content/CASIA2_ELA')
    ela_image.save('/content/CASIA2_ELA/' + image.split('/')[-1])

100%|██████████| 9418/9418 [03:19<00:00, 47.12it/s]


# Creating Custom PyTorch DataLoader

In [80]:
# importing the datasets 
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from glob import glob
from torchvision import transforms
from PIL import Image

# creating a class for the dataset
class CASIA2_ELA(Dataset):
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = image.split("\\")[-1].split("_")[0]
        label = 1 if label == "Tp" else 0
        image = Image.open(image)
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.float32)

# testing the dataset
def test():
    all_images = glob("/content/CASIA2_ELA/*.jpg")
    print('total images: ', len(all_images))
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    dataset = CASIA2_ELA(all_images, transform=transform)
    print(dataset[0][0].shape)
    print(dataset[0][1])

if __name__ == "__main__":
    test()

total images:  9418
torch.Size([3, 224, 224])
tensor(0.)


In [81]:
# from dataset import CASIA2_ELA
from torchvision import models
from torch import nn, optim
import torch 

device = device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_images = glob("/content/CASIA2_ELA/*.jpg")
print('total images: ', len(all_images))
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
dataset = CASIA2_ELA(all_images, transform=transform)
# split the dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# create the dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

total images:  9418


**Testing Dataloader**

In [82]:
for xb, yb in train_loader:
  print(xb.shape, yb.shape)
  break 

torch.Size([32, 3, 224, 224]) torch.Size([32])


# Model

In [83]:
# create the model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(512, 1)
model = model.to(device)

# Training Function

In [None]:
# train the model
def train(model, train_loader, test_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch in train_loader:
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels.unsqueeze(1))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        print(f"Epoch: {epoch+1}/{epochs}.. Train loss: {train_loss:.3f}")

        model.eval()
        test_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                images, labels = batch
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels.unsqueeze(1))
                test_loss += loss.item()
        test_loss /= len(test_loader)
        print(f"Epoch: {epoch+1}/{epochs}.. Test loss: {test_loss:.3f}")
    
    return model

In [86]:
# define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# model = train(model, train_loader, test_loader, criterion, optimizer, epochs=10)

# # save the model
# torch.save(model.state_dict(), "model.pth")

# # load the model
# model.load_state_dict(torch.load("model.pth"))

**Testing training loop**

In [96]:
for xb, yb in train_loader:
  xb, yb = xb.to(device), yb.to(device)
  print(xb.shape, yb.shape)
  yb_ = model(xb)
  loss = criterion(yb_, yb.unsqueeze(1))
  loss.backward()
  batch_acc = accuracy_score(yb.cpu().detach().numpy(), yb_.cpu().detach().numpy().round())
  print('loss', loss)
  print('acc', batch_acc)
  break

torch.Size([32, 3, 224, 224]) torch.Size([32])
loss tensor(0., device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
acc 0.0


In [89]:
from sklearn.metrics import accuracy_score
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    train_loss, val_loss, train_acc, val_acc = [], [], [], []
    for epoch in range(num_epochs):
        model.train()
        for xb, yb in tqdm(train_loader):
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            yb_ = model(xb)
            loss = criterion(yb_, yb.unsqueeze(1))
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
            batch_acc = accuracy_score(yb.cpu().detach().numpy(), yb_.cpu().detach().numpy().round())
            train_acc.append(batch_acc)
        model.eval()
        with torch.no_grad():
            for xb, yb in tqdm(val_loader):
                xb, yb = xb.to(device), yb.to(device)
                yb_ = model(xb)
                loss = criterion(yb_, yb.unsqueeze(1))
                val_loss.append(loss.item())
                batch_acc = accuracy_score(yb.cpu().detach().numpy(), yb_.cpu().detach().numpy().round())
                val_acc.append(batch_acc)
        print(f'Epoch: {epoch+1}, Train Loss: {torch.tensor(train_loss).mean():.4f}, Train Accuracy: {torch.tensor(train_acc).mean():.4f}, Val Loss: {torch.tensor(val_loss).mean():.4f}, Val Accuracy: {torch.tensor(val_acc).mean():.4f}')
      

In [91]:
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=10, device=device)

100%|██████████| 236/236 [00:53<00:00,  4.44it/s]
100%|██████████| 59/59 [00:10<00:00,  5.52it/s]


Epoch: 1, Train Loss: 0.0000, Train Accuracy: 0.0000, Val Loss: 0.0000, Val Accuracy: 0.0000


100%|██████████| 236/236 [00:51<00:00,  4.58it/s]
100%|██████████| 59/59 [00:09<00:00,  6.43it/s]


Epoch: 2, Train Loss: 0.0000, Train Accuracy: 0.0000, Val Loss: 0.0000, Val Accuracy: 0.0000


 59%|█████▉    | 139/236 [00:30<00:21,  4.62it/s]


KeyboardInterrupt: ignored