In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(55)
if torch.cuda.is_available():
    torch.cuda.manual_seed(55)


from sklearn.model_selection import train_test_split

class CSVDataset(Dataset):
    def __init__(self, data):
        self.X = data.iloc[:, :-1].values 
        self.y = data.iloc[:, -1].values   

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = torch.tensor(self.X[idx], dtype=torch.float32 , device = device)
        y = torch.tensor(self.y[idx], dtype=torch.float32 , device = device)
        return X, y


csv_file ="BALANCED-DATA.csv"
data = pd.read_csv(csv_file)
train_data, temp_data = train_test_split(data, test_size=0.2, random_state=42,shuffle=True)

val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42 , shuffle=True)

print(f"Train size: {len(train_data)}, Validation size: {len(val_data)}, Test size: {len(test_data)}")

train_dataset = CSVDataset(train_data)
val_dataset = CSVDataset(val_data)
test_dataset = CSVDataset(test_data)

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [3]:
from deepFake_model import DeepFakeDetectionModel

model = DeepFakeDetectionModel(n_layers= 5 , n_hidden= 256, bias= True,drop_out= 0.1)

model = model.to(device)

In [6]:
from torch import optim
import torch.nn as nn

lr = 0.001 
criterion = nn.BCELoss() 
optimizer = optim.AdamW(model.parameters(), lr=lr)

In [None]:
num_epochs = 1_000
confusion_interval = 100  #interval at which we perform confusion matrix


print("""
TP | FN
-------
FP | TN
""")

for epoch in range(num_epochs+1):
    
    if epoch > 200 and epoch < 500:
        lr = 0.0005
    elif epoch >= 500:
        lr = 0.0001
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        
        
    train_confusion_mat = np.zeros((2,2) , dtype=int)
    lossf = 0
    model.train()
    for X, y in train_loader:
        
        outputs = model(X)
        loss = criterion(outputs.squeeze(), y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % confusion_interval == 0:
            preds = (outputs > 0.5).float().cpu().detach().numpy()
            train_confusion_mat += confusion_matrix(y.cpu().detach().numpy() , preds)
        
        
        lossf += loss.item()
        
    if epoch % confusion_interval == 0:    
        val_confusion_mat = np.zeros((2,2) , dtype=int)
        model.eval()
        with torch.no_grad():
            for X,y in val_loader:
                outputs = model(X)
                preds = (outputs > 0.5).float().cpu().detach().numpy()
                val_confusion_mat += confusion_matrix(y.cpu().detach().numpy() , preds)


    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {lossf:.4f}")
    if epoch % confusion_interval == 0:
        print(f"""train confusion matrix : {train_confusion_mat[0]}   val confusion matrix:  {val_confusion_mat[0]}
                        {train_confusion_mat[1]}                        {val_confusion_mat[1]}""")
        print(f"train accuracy : {(train_confusion_mat[0,0]+train_confusion_mat[1,1]) / np.sum(train_confusion_mat):.3f} , validation accuray : {(val_confusion_mat[0,0]+val_confusion_mat[1,1]) / np.sum(val_confusion_mat):.3f}")
        


In [None]:
test_confusion_mat = np.zeros((2,2) , dtype=int)
model.eval()
with torch.no_grad():
    for X,y in test_loader:
        outputs = model(X)
        preds = (outputs > 0.5).float().cpu().detach().numpy()
        test_confusion_mat += confusion_matrix(y.cpu().detach().numpy() , preds)


print(f"test confusion matrix : ")
print(test_confusion_mat)
print(f"test accuracy : {(test_confusion_mat[0,0]+test_confusion_mat[1,1]) / np.sum(test_confusion_mat):.3f}")


In [3]:

## 128 batch size , 3 layers , 128 hidden neurons , 0.1 dropout
#convergence at 400 : 50 loss
#train accuracy : 0.934 , validation accuray : 0.897 , test accuracy : 0.894


## 128 batch size , 3 layers , 256 hidden neurons , 0.2 dropout
#convergence at 500 epoch : 45 loss
#at (1000) train accuracy : 0.943 , validation accuray : 0.929 , test accuracy : 0.921

## 128 batch size , 5 layers , 256 hidden neurons , 0.1 dropout 
#convergence at 600 epoch : 9 loss
#train accuracy : 0.993 , validation accuray : 0.941 ,test accuracy : 0.939





In [12]:
args = {
    'n_layers':5,
    "n_hidden":256,
    "bias":True,
    "drop_out":0.1,
    "epochs":1000
}

In [14]:
torch.save({
    "model":model.state_dict(),
    "args":args
           } , '/kaggle/working/deepFakeDetection.pth')