In [None]:
"""
task 1: importing libraries, creating venv
task 2: face detection & alignment
task 3: local CNN branch
task 4: global ViT branch
task 5: facial region sub branch 
task 6: temporal ViT (do later)
task 7: reducing file size due to miscalculations (196gb -> 24.6gb)
task 8: seeing if audio makes sense with lip movement or time lag
task 9: fusion and classification

should have got facial subregion while face detection so as to reduce computation
removing noise from the images as we dont get face detected for every frame
doing def for getting features as that is very repitative
the cnn features and ViT are very heavy whcih doesnt make sense ig, need to improve on space complexity
reduce fps to 10frames/sec would be better as it is enough to capture the nuance (actuualy i just need to avgs acorss the all the frames)
adding voice lip reading to see if they are speaking gibirish or not
"""

'\ntask 1: importing libraries, creating venv\ntask 2: face detection & alignment\ntask 3: local CNN branch\ntask 4: global ViT branch\ntask 5: facial region sub branch\ntask 6: temporal ViT\ntask 7: fusion and classification\n'

## task 1: importing libraries

In [1]:
import numpy as np
import cv2
import os
import timm

from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torchvision import transforms
import torchvision.models as models
import torch.nn.functional as F


from facenet_pytorch import MTCNN
import torch.nn as nn
import mediapipe as mp

from torchvision.models import vit_b_16  
import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

  from .autonotebook import tqdm as notebook_tqdm


## task 2: face detection and alignent

In [None]:
"""2.5GB"""
def extract_faces(video_path, output_subdir, resize_dim=224):
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS)) 
    frame_count = 0
    saved_count = 0
 
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if frame_count % (fps // 10) != 0:
            frame_count += 1
            continue
        frame_count += 1

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(rgb_frame)
        boxes, _ = mtcnn.detect(img)

        if boxes is not None:
            for box in boxes:
                h, w, _ = rgb_frame.shape
                x1, y1, x2, y2 = map(int, box)

                x1 = max(0, x1)
                y1 = max(0, y1)
                x2 = min(w, x2)
                y2 = min(h, y2)

                if x2 > x1 and y2 > y1:
                    face_crop = rgb_frame[y1:y2, x1:x2]
                    face_crop = cv2.resize(face_crop, (resize_dim, resize_dim))
                    filename = os.path.join(output_subdir, f'face_{saved_count:04d}.jpg')
                    cv2.imwrite(filename, cv2.cvtColor(face_crop, cv2.COLOR_RGB2BGR))
                    saved_count += 1
                else:
                    print(f"skipping invalid box: {(x1, y1, x2, y2)}")

    cap.release()

In [None]:
def process_dataset(input_root, output_root):
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            video_dir = os.path.join(input_root, split, label)
            out_dir = os.path.join(output_root, split, label)
            os.makedirs(out_dir, exist_ok=True)

            video_files = os.listdir(video_dir)

            for video_file in tqdm(video_files, desc=f"{split}/{label}"):
                video_path = os.path.join(video_dir, video_file)
                output_subdir = os.path.join(out_dir, os.path.splitext(video_file)[0])
                os.makedirs(output_subdir, exist_ok=True)
                extract_faces(video_path, output_subdir)

In [None]:
mtcnn = MTCNN(keep_all=False)

input_root = 'deepfake_dataset' 
output_root = 'aligned_faces'  

# # Create output directory
# os.makedirs(output_root, exist_ok=True)
# process_dataset(input_root, output_root)

## task 3+4 local CNN & global ViT brach

In [None]:
transform_cnn = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # https://docs.pytorch.org/vision/stable/models/generated/torchvision.models.resnet50.html
                         std=[0.229, 0.224, 0.225])
])
transform_vit = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5], 
        std=[0.5, 0.5, 0.5]
    )
])

# Load Pretrained Vision Transformer (e.g., ViT-B/16)
feature_extractor_vit = timm.create_model('vit_base_patch16_224', pretrained=True)
feature_extractor_vit.eval()

# Load pre-trained ResNet-50 and remove final layers
resnet = models.resnet50(pretrained=True)
feature_extractor_cnn = nn.Sequential(*list(resnet.children())[:-2])
feature_extractor_cnn.eval()

In [None]:
def generate_features(input_root, output_root, model, transform):
    os.makedirs(output_root, exist_ok=True)
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            save_dir = os.path.join(output_root, split, label)
            label_path = os.path.join(input_root, split, label)
            os.makedirs(save_dir, exist_ok=True)
            video_folders = os.listdir(label_path)
            
            for video in tqdm(video_folders, desc=f"{split}/{label}"):
                video_path = os.path.join(label_path, video)
                features = []

                for frame_name in sorted(os.listdir(video_path)):
                    frame_path = os.path.join(video_path, frame_name)
                    
                    img = Image.open(frame_path).convert('RGB')
                    input_tensor = transform(img).unsqueeze(0)

                    with torch.no_grad():
                        feature = model(input_tensor)  # shape: (1, 2048, 7, 7)
                    features.append(feature.squeeze(0)) 


                video_tensor = torch.stack(features)  # shape: (num_frames, 2048, 7, 7) 
                                        # update shape to (num_frames, 512, 7, 7), probably good enough to capture features
                # Save features
                save_path = os.path.join(save_dir, f"{video}.pt")
                torch.save(video_tensor, save_path)

generate_features("aligned_faces", "extracted_cnn_features",  feature_extractor_cnn, transform_cnn)  # for cnn
generate_features("aligned_faces", "extracted_ViT_features", feature_extractor_vit, transform_vit)  # for ViT

## task 5: facial region subranch

In [None]:
# Input and Output Paths
input_root = "aligned_faces"
output_root = {
    "left_eye": "extracted_left_eye",
    "right_eye": "extracted_right_eye",
    "mouth": "extracted_mouth"
}
stack_size = 10

# Create global region folders
for region_path in output_root.values():  
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            os.makedirs(os.path.join(region_path, split, label), exist_ok=True)
            
# Define facial landmarks
FACIAL_REGIONS = {
    "left_eye": [33, 133, 159, 145, 153, 154, 155, 133],
    "right_eye": [362, 263, 386, 374, 380, 381, 382, 263],
    "mouth": [78, 95, 88, 178, 87, 14, 317, 402, 318, 324]
}

# Face mesh detector
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True)

In [None]:
def crop_region(image, landmarks, region_indices):
    h, w, _ = image.shape
    points = [(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in region_indices]

    xs, ys = zip(*points)
    x_min, x_max = max(min(xs) - 10, 0), min(max(xs) + 10, w)
    y_min, y_max = max(min(ys) - 10, 0), min(max(ys) + 10, h)

    cropped = image[y_min:y_max, x_min:x_max]
    if cropped.size == 0:
        return None
    return cropped

In [None]:
def extract_facial_region():
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            input_path = os.path.join(input_root, split, label)
            video_folders = os.listdir(input_path)

            for video in tqdm(video_folders, desc=f"{split}/{label}"):
                video_path = os.path.join(input_path, video)
                region_buffers = {region: [] for region in FACIAL_REGIONS}
                last_landmarks = None

                for frame_file in os.listdir(video_path):
                    image = cv2.imread(os.path.join(video_path, frame_file))
                    if image is None:
                        continue

                    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    results = face_mesh.process(rgb)

                    if results.multi_face_landmarks:
                        landmarks = results.multi_face_landmarks[0].landmark
                        last_landmarks = landmarks
                    elif last_landmarks:
                        landmarks = last_landmarks
                    else:
                        continue

                    for region, indices in FACIAL_REGIONS.items():
                        cropped = crop_region(image, landmarks, indices)
                        if cropped is None:
                            continue
                        resized = cv2.resize(cropped, (64, 64))
                        tensor_img = torch.tensor(resized).permute(2, 0, 1).float() / 255.0
                        region_buffers[region].append(tensor_img)

                # Save the entire buffer once per region
                for region in FACIAL_REGIONS:
                    if len(region_buffers[region]) == 0:
                        continue
                    full_stack = torch.stack(region_buffers[region])  # [num_frames, 3, 64, 64]
                    save_name = f"{video}.pt"
                    save_path = os.path.join(output_root[region], split, label, save_name)
                    torch.save(full_stack, save_path)
# extract_facial_region()

train/real: 100%|██████████| 180/180 [09:09<00:00,  3.05s/it]
train/fake: 100%|██████████| 180/180 [11:58<00:00,  3.99s/it]
test/real: 100%|██████████| 21/21 [01:26<00:00,  4.13s/it]
test/fake: 100%|██████████| 20/20 [01:18<00:00,  3.94s/it]


## task 6: temporal ViT

In [None]:
# Configuration
data_root = "aligned_faces"

chunk_size = 30
stride = 10  # overlap for smoothness
frame_feature_dim = 768  # ViT base output dim
temporal_vit_heads = 8
temporal_vit_layers = 4


# Positional encoding function
def get_positional_encoding(seq_len, dim):
    position = torch.arange(0, seq_len).unsqueeze(1).float()
    div_term = torch.exp(torch.arange(0, dim, 2).float() * -(np.log(10000.0) / dim))
    pe = torch.zeros(seq_len, dim)
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    return pe

# Temporal transformer setup
temporal_encoder_layer = nn.TransformerEncoderLayer(
    d_model=frame_feature_dim,
    nhead=temporal_vit_heads,
    dim_feedforward=1024,
    batch_first=True,
)
temporal_transformer = nn.TransformerEncoder(
    temporal_encoder_layer, num_layers=temporal_vit_layers
)    

In [None]:
# Remove classification head: keep only the feature extractor part
backbone = timm.create_model('vit_base_patch16_224', pretrained=True)
backbone.reset_classifier(0)  # Remove classification head
backbone.eval()

def extract_vit_features(x):
    with torch.no_grad():
        x = backbone(x)
    return x


# Image transform for ViT input
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

base_save_dir = "extracted_new_temporal_features"
os.makedirs(base_save_dir, exist_ok=True)   

In [None]:
for split in ['train', 'test']:
    for label in ['real', 'fake']:
        video_dir = os.path.join(data_root, split, label)
        videos = [v for v in os.listdir(video_dir) if os.path.isdir(os.path.join(video_dir, v))]

        save_dir = os.path.join(base_save_dir, split, label)
        os.makedirs(save_dir, exist_ok=True)

        for video in tqdm(videos, desc=f"{split}/{label}"):
        frame_dir = os.path.join(video_dir, video)
        frame_files = sorted([f for f in os.listdir(frame_dir)])

        # Extract features for all frames first (to avoid multiple disk reads per chunk)
        all_frame_feats = []
        for f in frame_files:
            img_path = os.path.join(frame_dir, f)
            img = Image.open(img_path).convert('RGB')
            img_tensor = transform(img).unsqueeze(0)
            feat = extract_vit_features(img_tensor)  # (1, 768)
            all_frame_feats.append(feat.squeeze(0))
        all_frame_feats = torch.stack(all_frame_feats)  # (num_frames, 768)

        num_frames = all_frame_feats.size(0)
        chunks = []
        for start_idx in range(0, num_frames - chunk_size + 1, stride):
            chunk_feats = all_frame_feats[start_idx:start_idx + chunk_size]  # (chunk_size, 768)
            pe = get_positional_encoding(chunk_size, frame_feature_dim)
            chunk_feats_pe = chunk_feats + pe
            chunk_feats_pe = chunk_feats_pe.unsqueeze(0)  # (1, chunk_size, 768)

            with torch.no_grad():
                temporal_out = temporal_transformer(chunk_feats_pe)  # (1, chunk_size, 768)

            chunks.append(temporal_out.squeeze(0))  # (chunk_size, 768)

        # Concatenate over all chunks to get (num_effective_frames, 768)
        chunks = torch.cat(chunks, dim=0)  # (num_frames_effectively_used, 768)

        # Add a depth dimension of size 9: (num_frames, 768, 9)
        chunks_3d = chunks.unsqueeze(-1).repeat(1, 1, 9)  # (N, 768, 9)

        save_path = os.path.join(save_dir, f"{video}.pt")
        torch.save(chunks_3d.cpu(), save_path)

train/real:   3%|▎         | 6/180 [10:24<5:01:45, 104.05s/it]


KeyboardInterrupt: 

## task 7: reducing feeature size

In [None]:
import os
import torch
from tqdm import tqdm
import torch
import torch.nn as nn

# CNN Compression: (2048, 7, 7) → (256, 7, 7)
cnn_reducer = nn.Conv2d(2048, 256, kernel_size=1)

# ViT Compression: (197, 768) → (197, 96)
vit_reducer = nn.Linear(768, 96)

# Source directories
cnn_dir = "extracted_cnn_features/test"
vit_dir = "extracted_vit_features/test"

# Target directories
cnn_new_dir = "extracted_cnn_new_features/test"
vit_new_dir = "extracted_vit_new_features/test"

# Make sure target directories exist
for label in ['real', 'fake']:
    os.makedirs(os.path.join(cnn_new_dir, label), exist_ok=True)
    os.makedirs(os.path.join(vit_new_dir, label), exist_ok=True)

# Set reducers to evaluation mode
cnn_reducer.eval()
vit_reducer.eval()

# Apply compression and save to new locations
def reducer():
    with torch.no_grad():
        for label in ['real', 'fake']:
            cnn_label_path = os.path.join(cnn_dir, label)
            vit_label_path = os.path.join(vit_dir, label)

            cnn_new_label_path = os.path.join(cnn_new_dir, label)
            vit_new_label_path = os.path.join(vit_new_dir, label)

            for fname in tqdm(os.listdir(cnn_label_path), desc=f"Compressing {label}"):
                if not fname.endswith(".pt"):
                    continue

                cnn_path = os.path.join(cnn_label_path, fname)
                vit_path = os.path.join(vit_label_path, fname)

                # Load features
                cnn_feat = torch.load(cnn_path)  # (T, 2048, 7, 7)
                vit_feat = torch.load(vit_path)  # (T, 197, 768)

                # Reduce CNN features
                cnn_feat_reduced = cnn_reducer(cnn_feat)  # (T, 256, 7, 7)

                # Reduce ViT features
                vit_feat_reduced = vit_reducer(vit_feat)  # (T, 197, 96)

                # Save to new directories
                torch.save(cnn_feat_reduced, os.path.join(cnn_new_label_path, fname))
                torch.save(vit_feat_reduced, os.path.join(vit_new_label_path, fname))
# reducer()


## task 8: train MLP & evaluate

In [None]:
cnn_dir = "extracted_cnn_new_features"
vit_dir = "extracted_vit_new_features"
left_eye_dir = "extracted_left_eye_features"
right_eye_dir = "extracted_right_eye_features"
mouth_dir = "extracted_mouth_features"
temporal_dir = "extracted_new_temporal_features"

split = 'train'
label_map = {'real': 0, 'fake': 1}
batch_size = 32
input_dim = (197 * 96) + (256 * 7 * 7) + (3 * 64 * 64 * 3) + (768 * 9)
num_epochs = 50
lr = 1e-6

In [None]:
def get_file_list(split):
    samples = []
    for label_str, label_val in label_map.items():
        cnn_path = os.path.join(cnn_dir, split, label_str)
        vit_path = os.path.join(vit_dir, split, label_str)
        left_eye_path = os.path.join(left_eye_dir, split, label_str)
        right_eye_path = os.path.join(right_eye_dir, split, label_str)
        mouth_path = os.path.join(mouth_dir, split, label_str)
        temporal_path = os.path.join(temporal_dir, split, label_str)

        
        for fname in os.listdir(cnn_path):
            if fname.endswith(".pt"):
                cnn_file = os.path.join(cnn_path, fname)
                vit_file = os.path.join(vit_path, fname)
                left_eye_file = os.path.join(left_eye_path, fname)
                right_eye_file = os.path.join(right_eye_path, fname)
                mouth_file = os.path.join(mouth_path, fname)
                temporal_file = os.path.join(temporal_path, fname)

                if os.path.exists(vit_file):
                    samples.append((cnn_file, vit_file, left_eye_file, right_eye_file, mouth_file, temporal_file, label_val))
    return samples

In [None]:
def batch_generator(file_list, batch_size):
    for i in range(0, len(file_list), batch_size):
        batch = file_list[i:i+batch_size]
        features = []
        labels = []
        for cnn_path, vit_path, left_eye_path, right_eye_path, mouth_path, temporal_path, label in batch:
            cnn_feat = torch.load(cnn_path)                 # (num_frames, 2048, 7, 7)
            vit_feat = torch.load(vit_path)                 # (num_frames, 197, 96)
            left_eye_feat = torch.load(left_eye_path)       # (num_frames, 3, 64, 64)
            right_eye_feat = torch.load(right_eye_path)     # (num_frames, 3, 64, 64)
            mouth_feat = torch.load(mouth_path)             # (num_frames, 3, 64, 64)
            temporal_feat = torch.load(temporal_path)             # (num_frames, 3, 64, 64)

            cnn_avg = cnn_feat.mean(dim=0)         # (2048, 7, 7)
            vit_avg = vit_feat.mean(dim=0)         # (197, 96)
            left_eye_avg = left_eye_feat.mean(dim=0)         # (64, 64)
            right_eye_avg = right_eye_feat.mean(dim=0)         # (64, 64)
            mouth_avg = mouth_feat.mean(dim=0)         # (64, 64)
            temporal_avg = temporal_feat.mean(dim=0)         # (64, 64)

            fused = torch.cat([cnn_avg.flatten(), vit_avg.flatten(), left_eye_avg.flatten(), right_eye_avg.flatten(), mouth_avg.flatten(), temporal_avg.flatten()], dim=0)
            features.append(fused)
            labels.append(label)

        features = torch.stack(features)
        features = F.batch_norm(features, running_mean=None, running_var=None, training=True)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # shape (batch_size, 1)
        yield features, labels
        
file_list_train = get_file_list('train')
file_list_test = get_file_list('test')

In [None]:
model = nn.Sequential(
    nn.Linear(input_dim, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.1),

    nn.Linear(512, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.1),
    
    nn.Linear(128, 1),
    nn.Sigmoid()
)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for X_batch, y_batch in batch_generator(file_list_train, batch_size):
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * X_batch.size(0)
    
    avg_loss = total_loss / len(file_list_train)
    print(f"Epoch {epoch+1} complete. Average Loss: {avg_loss:.4f}")

Epoch 1 complete. Average Loss: 0.5275
Epoch 2 complete. Average Loss: 0.5211
Epoch 3 complete. Average Loss: 0.5197
Epoch 4 complete. Average Loss: 0.5217
Epoch 5 complete. Average Loss: 0.5099
Epoch 6 complete. Average Loss: 0.5119
Epoch 7 complete. Average Loss: 0.5151
Epoch 8 complete. Average Loss: 0.5081
Epoch 9 complete. Average Loss: 0.5136
Epoch 10 complete. Average Loss: 0.5135
Epoch 11 complete. Average Loss: 0.5114
Epoch 12 complete. Average Loss: 0.5119
Epoch 13 complete. Average Loss: 0.5108
Epoch 14 complete. Average Loss: 0.5091
Epoch 15 complete. Average Loss: 0.5017
Epoch 16 complete. Average Loss: 0.5108
Epoch 17 complete. Average Loss: 0.5053
Epoch 18 complete. Average Loss: 0.5016
Epoch 19 complete. Average Loss: 0.5077
Epoch 20 complete. Average Loss: 0.5010
Epoch 21 complete. Average Loss: 0.4954
Epoch 22 complete. Average Loss: 0.4988
Epoch 23 complete. Average Loss: 0.4971
Epoch 24 complete. Average Loss: 0.5014
Epoch 25 complete. Average Loss: 0.4925
Epoch 26 

In [None]:
torch.save(model.state_dict(), "model7.pth")

In [None]:
# model.eval()
# model.load_state_dict(torch.load("model5.pth"))
model.eval()
y_true = []
y_pred = []
y_prob = []

with torch.no_grad():
    for X_batch, y_batch in batch_generator(file_list_test, batch_size):
        outputs = model(X_batch)
        probs = outputs.squeeze(1).numpy()
        preds = (probs >= 0.5).astype(int)
        y_prob.extend(probs.tolist())
        y_pred.extend(preds.tolist())
        y_true.extend(y_batch.squeeze(1).numpy().tolist())

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
roc_auc = roc_auc_score(y_true, y_prob)
cm = confusion_matrix(y_true, y_pred)

print(f"Test Accuracy:  {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall:    {recall:.4f}")
print(f"Test F1-score:  {f1:.4f}")
print(f"Test ROC AUC:   {roc_auc:.4f}")
print(f"Confusion Matrix:\n   {cm}")


Test Accuracy:  0.5000
Test Precision: 0.5000
Test Recall:    0.6500
Test F1-score:  0.5652
Test ROC AUC:   0.5025
Confusion Matrix:
   [[ 7 13]
 [ 7 13]]


In [None]:
""""
model 3:

Test Accuracy:  0.4750
Test Precision: 0.4872
Test Recall:    0.9500
Test F1-score:  0.6441
Test ROC AUC:   0.4825
Confusion Matrix:
   [[ 0 20]
 [ 1 19]]

model 4:
Test Accuracy:  0.6000
Test Precision: 0.5588
Test Recall:    0.9500
Test F1-score:  0.7037
Test ROC AUC:   0.6825
Confusion Matrix:
   [[ 5 15]
 [ 1 19]]

 model 6:
 Test Accuracy:  0.5000
Test Precision: 0.5000
Test Recall:    0.6500
Test F1-score:  0.5652
Test ROC AUC:   0.4975
Confusion Matrix:
   [[ 7 13]
 [ 7 13]]
"""

In [5]:
import numpy as np

# Case 1
confidences1 = [
    0.5699, 0.5338, 0.5913, 0.5609, 0.5096, 0.5951, 0.5687, 0.4923, 0.5180, 0.4610,
    0.5308, 0.4856, 0.4915, 0.5454, 0.5254, 0.5849, 0.5008, 0.4538, 0.4192, 0.4103,
    0.4825, 0.5545, 0.4291, 0.5627, 0.5094, 0.5163, 0.4627, 0.4958, 0.5376, 0.6003,
    0.5290, 0.5471, 0.5184, 0.5077, 0.5442, 0.4670, 0.5532, 0.4784, 0.5144, 0.4241
]

# Case 2
confidences2 = [
    0.4792, 0.4165, 0.3935, 0.4888, 0.4869, 0.5575, 0.4369, 0.4190, 0.3915, 0.5161,
    0.4184, 0.4645, 0.3272, 0.4421, 0.5519, 0.4179, 0.4256, 0.3231, 0.3035, 0.3467,
    0.3666, 0.5159, 0.3462, 0.5549, 0.5471, 0.5165, 0.3977, 0.4066, 0.4282, 0.5677,
    0.4377, 0.4827, 0.4282, 0.5412, 0.5263, 0.4085, 0.5496, 0.4776, 0.5307, 0.4417
]

# Case 3 (example, replace with your actual third list if different)
confidences3 = [
    0.4792, 0.4165, 0.3935, 0.4888, 0.4869, 0.5575, 0.4369, 0.4190, 0.3915, 0.5161,
    0.4184, 0.4645, 0.3272, 0.4421, 0.5519, 0.4179, 0.4256, 0.3231, 0.3035, 0.3467,
    0.3666, 0.5159, 0.3462, 0.5549, 0.5471, 0.5165, 0.3977, 0.4066, 0.4282, 0.5677,
    0.4377, 0.4827, 0.4282, 0.5412, 0.5263, 0.4085, 0.5496, 0.4776, 0.5307, 0.4417
]
confidences4 = [
    0.5270, 0.4997, 0.4106, 0.5195, 0.5833, 0.5680, 0.4729, 0.5273, 0.5583, 0.6504,
    0.5328, 0.6440, 0.4677, 0.4743, 0.4959, 0.4630, 0.5865, 0.4870, 0.4978, 0.4622,
    0.5110, 0.6130, 0.4501, 0.4734, 0.5093, 0.6621, 0.5715, 0.5819, 0.4787, 0.5733,
    0.4888, 0.4677, 0.4752, 0.4618, 0.5163, 0.4736, 0.5532, 0.5404, 0.5659, 0.5239
]
for i, confs in enumerate([confidences1, confidences2, confidences3, confidences4], 1):
    if len(confs) == 0:
        print(f"Case {i}: No data")
        continue
    spread = np.std(np.array(confs) - 0.5)
    print(f"Case {i} - Standard deviation from 0.5: {spread:.4f}")

Case 1 - Standard deviation from 0.5: 0.0486
Case 2 - Standard deviation from 0.5: 0.0717
Case 3 - Standard deviation from 0.5: 0.0717
Case 4 - Standard deviation from 0.5: 0.0580
