In [1]:
import os
import torch
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

import torch.nn as nn 
import torch.optim as optim
from torchvision import models

from sklearn.model_selection import train_test_split

In [5]:
#Path to image folder
current_working_dir = os.getcwd()
image_folder = f"{current_working_dir}/Manipulated"

In [6]:
# transforms.Compase([]) - function that chains multiple image transformations togetther 
# transforms.Resize(()) - resizes the input image to a fixed size 
# transforms.ToTensor() - this converst the image from PIL image/numpy array to a Pytorch Tensor

#So, the entire "transform" object will first resize to 256x256 pixels then convert it to a tensor with pixel values scalled between 0 and 1

transform = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ToTensor(),
])

In [7]:
#gets the names of the subfolders

# for f in os.listdir(image_folder) - iterates from the folder and assgins f to all the items inside 
# if os.path.isdir(os.path.join(image_folder, f)) - so this is a boolean statement, saying make a path by joining the mainfolder and each item in the folder, we check to see if that joined path is a directory and if it is we return "f" to the list variable (so we get the name of the directory)

subfolders = [f for f in os.listdir(image_folder) if os.path.isdir(os.path.join(image_folder, f))]

In [8]:
print(subfolders)

['1a8jd2', '1a84zh', '1af2jv', '1aeur2', '1a5h1p', '1a5x44', '1acw36', '1a9tss', '1a3oag', '1a69n6', '1aa8xl', '1a4zdz', '1a41rr', '1aeqsl', '1a6upj', '1aafqb', '1ad1a0', '1a16mu', '1a9l4s', '1ac1g7', '1a1ogs', '1aczjh', '1a07yi', '1aa6sn', '1a4dqp']


In [10]:
original_images = []
manipulated_images = []
original_labels = []
manipulated_labels = []

for subfolder in subfolders:
    subfolder_path = os.path.join(image_folder, subfolder)

    #get the original image
    original_image_path = os.path.join(subfolder_path, f"{subfolder}_orig.jpg")

    if os.path.exists(original_image_path):
        original_image = Image.open(original_image_path)
        original_image_tensor = transform(original_image)

        original_images.append(original_image_tensor)
        original_labels.append(0) #Label 0 for the original images
    
        #Now Deal with the Manipulated Images
        manipulated_files = [f for f in os.listdir(subfolder_path) if f.endswith("_0.jpg")]

        #Processing the Manipulated Files
        for manipulated_file in manipulated_files:
            manipulated_file_path = os.path.join(subfolder_path, manipulated_file)

            #Open the image from the path and transform using function
            manipulated_image = Image.open(manipulated_file_path)
            manipulated_image_tensor = transform(manipulated_image)

            manipulated_images.append(manipulated_image_tensor)
            manipulated_labels.append(1)
    else:
        print(f"OG image not found in {subfolder}")


#Stack Tensors like a Matrix 
original_stacked = torch.stack(original_images)
manipulated_stacked = torch.stack(manipulated_images)


#Combine original and manipulated images into one tensor
all_the_images = torch.cat((original_stacked, manipulated_stacked), dim=0)

#Combine original and manipulated labels into one tensor
all_the_labels = torch.tensor(original_labels + manipulated_labels)

train_images, test_images, train_labels, test_labels = train_test_split(all_the_images,all_the_labels, test_size=0.2, random_state=23)

#Make Tensorflow Dataset - creating pairs (original tensor, manipulated tensor)
train_dataset = TensorDataset(train_images,train_labels)
test_dataset = TensorDataset(test_images,test_labels)

#Make DataLoader for batches and shuffling 
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=True)


In [11]:
#Load pretrained Vgg16 model 
model = models.vgg16(weights=True)



In [12]:
# Change the final layer to output 2 classes(manipulated and original)
model.classifier[6] = nn.Linear(model.classifier[6].in_features,2)

In [13]:
#Loss Function and Optimizer 

#Loss Function 
loss_function = nn.CrossEntropyLoss()

#Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [15]:
#Device either to use GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#To use either GPU or CPU
model = model.to(device)

In [16]:
#Training 

total_iterations = 2

for iteration in range(total_iterations):
    #set model to training mode
    model.train() 
    
    #loss across all the batches in each iteration
    running_loss = 0.0
    #number of correction predictions in each iteration
    total_correct = 0
    #number of samples processed in each iteration
    total_processed = 0

    #DataLoader Loop
        # Go through dataloader, get the index of the current batch and the batch data(inputs, labels), and the training data in batches inputs(images) and labels(correct answer)
    for inputs, labels in train_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        #Model Training
        optimizer.zero_grad()  # parameter gradients to zero
        outputs = model(inputs) #forward pass
        max_scores, predicted = torch.max(outputs, 1) #get the predictions
        cal_loss = loss_function(outputs, labels) #calculate the loss
        cal_loss.backward() #backward pass
        optimizer.step() # optimize the weights

        #Updating the running loss, and accuracy
        running_loss += cal_loss.item()
        total_correct += (predicted == labels).sum().item()
        total_processed += labels.size(0)
    
    iteration_loss = running_loss / len(train_dataloader)
    iteration_accuracy = total_correct / total_processed

    print(f"Iteration: {iteration}")


Iteration: 0
Iteration: 1


In [17]:
#Test

model.eval()
number_of_correct = 0
total_test = 0

with torch.no_grad(): #disable gradient computation while testing
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        max_scores, predicted = torch.max(outputs, 1)
        number_of_correct += (predicted == labels).sum().item()
        total_test += labels.size(0)

test_accuracy = number_of_correct/total_test
print(test_accuracy)

0.8181818181818182
