In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit

import glob
import os

import librosa
import librosa.display


import torch
from torch import nn
from torchvision import models, transforms, datasets
from torchvision.models import ResNet18_Weights

from time import time
from tqdm import tqdm

In [None]:
seed = 12
np.random.seed(seed)

path_imgs1 = Path('./BinaryClassifiedSpectrograms/')
path_imgs2 = Path('./PitchShiftedBinaryClassifiedSpectrograms/')
path_imgs3 = Path('./TempoShiftedBinaryClassifiedSpectrograms/')
path_imgs4 = Path('./TempoAndPitchShiftedBinaryClassifiedSpectrograms/')

batch_size = 32

hop_length = 512

n_fft = 2048

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(device)

In [None]:
import gc
gc.collect()

In [None]:
%%time

# Define Tranforms
train_transforms = transforms.Compose([
    # transforms.Resize(224),
    transforms.ToTensor(),
    
    # Resnet18_Weights.DEFAULT mean and std
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    # transforms.Resize(224),
    transforms.ToTensor(),

    # Resnet18_Weights.DEFAULT mean and std
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the data
# dataset = datasets.ImageFolder(path_imgs, transform = train_transforms)
train_dataset1 = datasets.ImageFolder(path_imgs1, transform = train_transforms)
val_dataset1 = datasets.ImageFolder(path_imgs1, transform = test_transforms)
test_dataset1 = datasets.ImageFolder(path_imgs1, transform = test_transforms)

train_dataset2 = datasets.ImageFolder(path_imgs2, transform = train_transforms)
val_dataset2 = datasets.ImageFolder(path_imgs2, transform = test_transforms)
test_dataset2 = datasets.ImageFolder(path_imgs2, transform = test_transforms)

train_dataset3 = datasets.ImageFolder(path_imgs3, transform = train_transforms)
val_dataset3 = datasets.ImageFolder(path_imgs3, transform = test_transforms)
test_dataset3 = datasets.ImageFolder(path_imgs3, transform = test_transforms)

train_dataset4 = datasets.ImageFolder(path_imgs4, transform = train_transforms)
val_dataset4 = datasets.ImageFolder(path_imgs4, transform = test_transforms)
test_dataset4 = datasets.ImageFolder(path_imgs4, transform = test_transforms)

torch.manual_seed(1)
num_train_samples1 = len(train_dataset1)
num_train_samples2 = len(train_dataset2)
num_train_samples3 = len(train_dataset3)
num_train_samples4 = len(train_dataset4)
# num_train_samples = 5000

# Permute the data
indices1 = torch.randperm(num_train_samples1)
indices2 = torch.randperm(num_train_samples2)
indices3 = torch.randperm(num_train_samples3)
indices4 = torch.randperm(num_train_samples4)

# Split the data into Train and Validation
train_testval_split = 0.2
train_split1 = int(num_train_samples1 * train_testval_split)
val_split1 = int(train_split1 * 0.5)
train_split2 = int(num_train_samples2 * train_testval_split)
val_split2 = int(train_split2 * 0.5)
train_split3 = int(num_train_samples3 * train_testval_split)
val_split3 = int(train_split3 * 0.5)
train_split4 = int(num_train_samples4 * train_testval_split)
val_split4 = int(train_split4 * 0.5)

train_subset1 = torch.utils.data.Subset(train_dataset1, indices1[train_split1:])
val_subset1 = torch.utils.data.Subset(val_dataset1, indices1[val_split1:train_split1])
test_subset1 = torch.utils.data.Subset(test_dataset1, indices1[:val_split1])
train_subset2 = torch.utils.data.Subset(train_dataset2, indices2[train_split2:])
val_subset2 = torch.utils.data.Subset(val_dataset2, indices2[val_split2:train_split2])
test_subset2 = torch.utils.data.Subset(test_dataset2, indices2[:val_split2])
train_subset3 = torch.utils.data.Subset(train_dataset3, indices3[train_split3:])
val_subset3 = torch.utils.data.Subset(val_dataset3, indices3[val_split3:train_split3])
test_subset3 = torch.utils.data.Subset(test_dataset3, indices3[:val_split3])
train_subset4 = torch.utils.data.Subset(train_dataset4, indices4[train_split4:])
val_subset4 = torch.utils.data.Subset(val_dataset4, indices4[val_split4:train_split4])
test_subset4 = torch.utils.data.Subset(test_dataset4, indices4[:val_split4])

print(f"Length of Train1:{len(train_subset1)}; Length of Val:{len(val_subset1)}; Length of Test:{len(test_subset1)}")
print(f"Length of Train2:{len(train_subset2)}; Length of Val:{len(val_subset2)}; Length of Test:{len(test_subset2)}")
print(f"Length of Train3:{len(train_subset3)}; Length of Val:{len(val_subset3)}; Length of Test:{len(test_subset3)}")
print(f"Length of Train4:{len(train_subset4)}; Length of Val:{len(val_subset4)}; Length of Test:{len(test_subset4)}")

# Make DataLoaders 
train_dataloader1 = torch.utils.data.DataLoader(
    dataset=train_subset1, 
    batch_size=batch_size,
    shuffle=True
)

val_dataloader1 = torch.utils.data.DataLoader(
    dataset=val_subset1,
    batch_size=batch_size,
    shuffle=True
)

train_dataloader2 = torch.utils.data.DataLoader(
    dataset=train_subset2, 
    batch_size=batch_size,
    shuffle=True
)

val_dataloader2 = torch.utils.data.DataLoader(
    dataset=val_subset2,
    batch_size=batch_size,
    shuffle=True
)

train_dataloader3 = torch.utils.data.DataLoader(
    dataset=train_subset3, 
    batch_size=batch_size,
    shuffle=True
)

val_dataloader3 = torch.utils.data.DataLoader(
    dataset=val_subset3,
    batch_size=batch_size,
    shuffle=True
)

train_dataloader4 = torch.utils.data.DataLoader(
    dataset=train_subset4, 
    batch_size=batch_size,
    shuffle=True
)

val_dataloader4 = torch.utils.data.DataLoader(
    dataset=val_subset4,
    batch_size=batch_size,
    shuffle=True
)

# Classes
classes1 = train_dataloader1.dataset.dataset.classes
classes2 = train_dataloader2.dataset.dataset.classes
classes3 = train_dataloader3.dataset.dataset.classes
classes4 = train_dataloader4.dataset.dataset.classes


In [None]:
# Load a Pretrained Model
resnet = models.resnet18(weights=ResNet18_Weights.DEFAULT)

# Fix the trainable parameters
for parameter in resnet.parameters():
    parameter.requires_grad = False
    
    
# Number of Input Features in the Last Fully Connected Layer
in_features = resnet.fc.in_features

# Replacing the Last Fully Connected Layer
fc = nn.Linear(in_features=in_features, out_features=len(classes2))
# fc = nn.Linear(in_features=in_features, out_features=2)
resnet.fc = fc


# Updating the Weights and Bias of the last layer
params_to_update = []
for name, param in resnet.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

# Define the Loss and Optimizer Functions
criterion = nn.CrossEntropyLoss()
# criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params_to_update, lr=0.001)

In [None]:
def train(model, criterion, optimizer, train_dataloader, test_dataloader, print_every,num_epoch):
    
    steps = 0
    train_losses, val_losses = [], []
    
    model.to(device)
    for epoch in tqdm(range(num_epoch)):
        running_loss = 0
        correct_train = 0
        total_train = 0
        start_time = time()
        iter_time = time()
        
        model.train()
        for i, (images, labels) in enumerate(train_dataloader):
            steps += 1
            images = images.to(device)
            labels = labels.to(device)
            # labels = torch.nn.functional.one_hot(labels)

            # Forward pass
            output = model(images)
            loss = criterion(output, labels)

            correct_train += (torch.max(output, dim=1)[1] == labels).sum()
            total_train += labels.size(0)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
            # Logging
            if steps % print_every == 0:
                print(f'Epoch [{epoch + 1}]/[{num_epoch}]. Batch [{i + 1}]/[{len(train_dataloader)}].', end=' ')
                print(f'Train loss {running_loss / steps:.3f}.', end=' ')
                print(f'Train acc {correct_train / total_train * 100:.3f}.', end=' ')
                with torch.no_grad():
                    model.eval()
                    correct_val, total_val = 0, 0
                    val_loss = 0
                    for images, labels in test_dataloader:
                        images = images.to(device)
                        labels = labels.to(device)
                        output = model(images)
                        loss = criterion(output, labels)
                        val_loss += loss.item()
                        
                        correct_val += (torch.max(output, dim=1)[1] == labels).sum()
                        total_val += labels.size(0)

                print(f'Val loss {val_loss / len(test_dataloader):.3f}. Val acc {correct_val / total_val * 100:.3f}.', end=' ')
                print(f'Took {time() - iter_time:.3f} seconds')
                iter_time = time()
                
                
                train_losses.append(running_loss / total_train)
                val_losses.append(val_loss / total_val)


        print(f'Epoch took {time() - start_time}') 
        torch.save(model, f'base_checkpoint_{correct_val / total_val * 100:.2f}')
        
    return model, train_losses, val_losses

In [None]:
resnet.load_state_dict(torch.load('base_final_state_dict', weights_only=True))
resnet.eval()

In [None]:
print_every = 25
num_epoch = 20

resnet, train_losses2, val_losses2 = train(
    model=resnet,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader2,
    test_dataloader=val_dataloader2,
    print_every=print_every,
    num_epoch=num_epoch
)


plt.plot(train_losses2, label='Training loss')
plt.plot(val_losses2, label='Validation loss')
plt.legend(frameon=False)
plt.show()


In [None]:
torch.save(resnet.state_dict(), 'pitch_shifted_final_state_dict')

In [None]:
resnet.load_state_dict(torch.load('pitch_shifted_final_state_dict', weights_only=True))
resnet.eval()

# Fix the trainable parameters
for parameter in resnet.parameters():
    parameter.requires_grad = False
    
    
# Number of Input Features in the Last Fully Connected Layer
in_features = resnet.fc.in_features

# Replacing the Last Fully Connected Layer
fc = nn.Linear(in_features=in_features, out_features=len(classes3))
# fc = nn.Linear(in_features=in_features, out_features=1)
resnet.fc = fc


# Updating the Weights and Bias of the last layer
params_to_update = []
for name, param in resnet.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [None]:
print_every = 25
num_epoch = 20

resnet, train_losses2, val_losses2 = train(
    model=resnet,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader3,
    test_dataloader=val_dataloader3,
    print_every=print_every,
    num_epoch=num_epoch
)


plt.plot(train_losses3, label='Training loss')
plt.plot(val_losses3, label='Validation loss')
plt.legend(frameon=False)
plt.show()


In [None]:
torch.save(resnet.state_dict(), 'tempo_shifted_final_state_dict')

In [None]:
resnet.load_state_dict(torch.load('tempo_shifted_final_state_dict', weights_only=True))
resnet.eval()

# Fix the trainable parameters
for parameter in resnet.parameters():
    parameter.requires_grad = False
    
    
# Number of Input Features in the Last Fully Connected Layer
in_features = resnet.fc.in_features

# Replacing the Last Fully Connected Layer
fc = nn.Linear(in_features=in_features, out_features=len(classes4))
# fc = nn.Linear(in_features=in_features, out_features=1)
resnet.fc = fc


# Updating the Weights and Bias of the last layer
params_to_update = []
for name, param in resnet.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [None]:
print_every = 25
num_epoch = 20

resnet, train_losses2, val_losses2 = train(
    model=resnet,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader4,
    test_dataloader=val_dataloader4,
    print_every=print_every,
    num_epoch=num_epoch
)


plt.plot(train_losses4, label='Training loss')
plt.plot(val_losses4, label='Validation loss')
plt.legend(frameon=False)
plt.show()


In [None]:
torch.save(resnet.state_dict(), 'tempo_and_pitch_shifted_final_state_dict')

In [None]:
resnet.load_state_dict(torch.load('tempo_and_pitch_shifted_final_state_dict', weights_only=True))
resnet.eval()

In [None]:
pred_classes = ["deepfake", "human"]

y_test = []
y_pred = []
for img, label in test_subset4:
    img = torch.Tensor(img)
    img = img.to(device)
    resnet.eval()
    prediction = resnet(img[None])
    
    final_pred = classes4[torch.max(prediction, dim=1)[1]]
    # final_pred = torch.max(prediction, dim=1)[1]

    print(classes4[label], final_pred)
    
    y_test.append(classes4[label])
    y_pred.append(final_pred)

In [None]:
print("Accuracy:",(100*(np.array(y_test) == np.array(y_pred)).sum()/len(y_test)))

In [None]:
from sklearn.metrics import f1_score

f1_score(y_test, y_pred, pos_label="human")

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(y_test, y_pred, labels=["deepfake", "human"])

tn, fp, fn, tp = cm.ravel()

cmd = ConfusionMatrixDisplay(cm, display_labels=["deepfake", "human"])
cmd.plot()
plt.show()

print("True Negative: ", tn, end= '\t|\t')
print("False Positive: ", fp)
print("-" * 55)
print("False Negative: ", fn, end='\t|\t')
print("True Positive: ", tp)

In [None]:
TPR = tp / (tp + fn)
FPR = fp / (fp + tn)
PPV = tp / (tp + fp)
Specificity = tn / (fp + tn)

print(TPR) # True Positive Rate / recall / sensitivity
print(FPR) # False Positive Rate / fall-out
print(PPV) # Positive Predictive Value / Precision
print(Specificity)