In [1]:
import torch
import cv2
import os
import pandas as pd

In [2]:
train_csv_path = "../../dfdc_dataset/train.csv"
train_data_path = "../../dfdc_dataset/archive/train/"

test_csv_path = "../../dfdc_dataset/test.csv"
test_data_path = "../../dfdc_dataset/archive/validation/"

In [49]:
class DFDCDataset(torch.utils.data.Dataset):
    
    def __init__(self, data_csv, data_folder):
        self.data = pd.read_csv(data_csv)
        self.data_path = data_folder
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        img_path = self.data["path"][i]
        label = self.data["label"][i]
        label = 0 if label == "real" else 1 
        
        img_path = os.path.join(self.data_path, img_path)
        img = cv2.imread(img_path) / 255.
        img = cv2.resize(img, (225, 225))
        
        return img, label

In [50]:
train_dataset = DFDCDataset(train_csv_path, train_data_path)
test_dataset = DFDCDataset(test_csv_path, test_data_path)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [51]:
len(train_dataset), len(train_dataloader)

(93853, 2933)

## Train a model

In [52]:
from torchvision.models import resnet34
import torch.nn as nn
from tqdm import tqdm

In [53]:
model = resnet34(pretrained=True)

In [54]:
model.fc = nn.Linear(512, 2)

In [55]:
epochs = 10

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
loss_fn = nn.CrossEntropyLoss()

In [None]:
for e in range(epochs):
    
    train_loop = tqdm(train_dataloader, leave=False, position=0)
    total_corr = 0
    total = 0
    total_loss = 0.
    model.train()
    
    train_loop.set_description(f"Epoch {e+1}")
    
    for batch_num, (imgs, labels) in enumerate(train_loop):
        optimizer.zero_grad()
        
        imgs = imgs.float().transpose(1, 3)
        
        preds = model(imgs)
        
        loss = loss_fn(preds, labels)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        corr_preds = (torch.argmax(preds, dim=-1) == labels).sum().item()
        
        total += len(imgs)
        total_corr += corr_preds
        
        train_loop.set_postfix(loss = loss.item(), acc = corr_preds / len(imgs) * 100)
    
    print(f"TRAIN EPOCH {e + 1} LOSS = {total_loss / total} ACC = {total_corr / total * 100.}")
    
    
    test_loop = tqdm(test_dataloader, leave=False, position=0)
    total_corr = 0
    total = 0
    total_loss = 0.
    model.eval()
    
    test_loop.set_description(f"Epoch {e+1}")
    
    for batch_num, (imgs, labels) in enumerate(test_loop):
        
        imgs = imgs.float().transpose(1, 3)
        
        preds = model(imgs)
        
        loss = loss_fn(preds, labels)
        
        total_loss += loss.item()
        corr_preds = (torch.argmax(preds, dim=-1) == labels).sum().item()
        
        total += len(imgs)
        total_corr += corr_preds
        
        test_loop.set_postfix(loss = loss.item(), acc = corr_preds / len(imgs) * 100)
    
    print(f"TEST EPOCH {e + 1} LOSS = {total_loss / total} ACC = {total_corr / total * 100.}")
          

                                                                                                                       

ValueError: Using a target size (torch.Size([32])) that is different to the input size (torch.Size([32, 2])) is deprecated. Please ensure they have the same size.