In [None]:
"""
task 1: importing libraries, creating venv
task 2: face detection & alignment
task 3: local CNN branch
task 4: global ViT branch
task 5: facial region sub branch 
task 6: temporal ViT (do later)
task 7: reducing file size due to miscalculations (196gb -> 24.6gb)
task 8: seeing if audio makes sense with lip movement or time lag
task 9: fusion and classification

should have got facial subregion while face detection so as to reduce computation
removing noise from the images as we dont get face detected for every frame
doing def for getting features as that is very repitative
the cnn features and ViT are very heavy whcih doesnt make sense ig, need to improve on space req
adding voice lip reading to see if they are speaking gibirish or not
"""

'\ntask 1: importing libraries, creating venv\ntask 2: face detection & alignment\ntask 3: local CNN branch\ntask 4: global ViT branch\ntask 5: facial region sub branch\ntask 6: temporal ViT\ntask 7: fusion and classification\n'

## task 1: importing libraries

In [3]:
import numpy as np
import cv2
import os
import timm

from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torchvision import transforms
import torchvision.models as models
import torch.nn.functional as F


from facenet_pytorch import MTCNN
import torch.nn as nn
import mediapipe as mp
from torch.optim.lr_scheduler import StepLR

import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


## task 2: face detection and alignent

In [None]:
def extract_faces(video_path, output_subdir, resize_dim=224):
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS)) 
    frame_count = 0
    saved_count = 0
 
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if frame_count % (fps // 10) != 0:
            frame_count += 1
            continue
        frame_count += 1

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(rgb_frame)
        boxes, _ = mtcnn.detect(img)

        if boxes is not None:
            for box in boxes:
                h, w, _ = rgb_frame.shape
                x1, y1, x2, y2 = map(int, box)

                x1 = max(0, x1)
                y1 = max(0, y1)
                x2 = min(w, x2)
                y2 = min(h, y2)

                if x2 > x1 and y2 > y1:
                    face_crop = rgb_frame[y1:y2, x1:x2]
                    face_crop = cv2.resize(face_crop, (resize_dim, resize_dim))
                    filename = os.path.join(output_subdir, f'face_{saved_count:04d}.jpg')
                    cv2.imwrite(filename, cv2.cvtColor(face_crop, cv2.COLOR_RGB2BGR))
                    saved_count += 1
                else:
                    print(f"skipping invalid box")

    cap.release()

In [None]:
def process_dataset(input_root, output_root):
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            video_dir = os.path.join(input_root, split, label)
            out_dir = os.path.join(output_root, split, label)
            os.makedirs(out_dir, exist_ok=True)

            video_files = os.listdir(video_dir)

            for video_file in tqdm(video_files, desc=f"{split}/{label}"):
                video_path = os.path.join(video_dir, video_file)
                output_subdir = os.path.join(out_dir, os.path.splitext(video_file)[0])
                os.makedirs(output_subdir, exist_ok=True)
                extract_faces(video_path, output_subdir)

In [None]:
mtcnn = MTCNN(keep_all=False)

input_root = 'deepfake_dataset' 
output_root = 'aligned_faces'  

# # Create output directory
# os.makedirs(output_root, exist_ok=True)
# process_dataset(input_root, output_root)

## task 3 local CNN brach

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # https://docs.pytorch.org/vision/stable/models/generated/torchvision.models.resnet50.html
                         std=[0.229, 0.224, 0.225])  # the values for transform values are picked from the above link
])

# Load pre trained resnet 50 and remove final layers
resnet = models.resnet50(pretrained=True)
feature_extractor = nn.Sequential(*list(resnet.children())[:-2])
feature_extractor.eval()

input_root = 'aligned_faces'
output_root = 'extracted_cnn_features'
os.makedirs(output_root, exist_ok=True)

In [None]:
def generate_cnn_features():
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            save_dir = os.path.join(output_root, split, label)
            label_path = os.path.join(input_root, split, label)
            os.makedirs(save_dir, exist_ok=True)
            video_folders = os.listdir(label_path)
            
            for video in tqdm(video_folders, desc=f"{split}/{label}"):
                video_path = os.path.join(label_path, video)
                features = []

                for frame_name in sorted(os.listdir(video_path)):
                    frame_path = os.path.join(video_path, frame_name)
                    
                    img = Image.open(frame_path).convert('RGB')
                    input_tensor = transform(img).unsqueeze(0)

                    with torch.no_grad():
                        feature = feature_extractor(input_tensor)  # shape: (1, 2048, 7, 7)
                    features.append(feature.squeeze(0))

                if features:
                    video_tensor = torch.stack(features)  # shape: (num_frames, 2048, 7, 7) 
                                            # update shape to (num_frames, 512, 7, 7), probably good enough to capture features
                    save_path = os.path.join(save_dir, f"{video}.pt")
                    torch.save(video_tensor, save_path)
# generate_cnn_features()

train/real:   0%|          | 0/180 [00:00<?, ?it/s]

train/real: 100%|██████████| 180/180 [1:27:32<00:00, 29.18s/it]
train/fake: 100%|██████████| 180/180 [1:13:15<00:00, 24.42s/it]
test/real: 100%|██████████| 21/21 [09:01<00:00, 25.80s/it]
test/fake: 100%|██████████| 20/20 [08:08<00:00, 24.42s/it]


## task 4: global ViT ranch

In [None]:
model = timm.create_model('vit_base_patch16_224', pretrained=True)
model.eval().to("cpu")

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],  
        std=[0.5, 0.5, 0.5]
    )
])

input_root = 'aligned_faces'  
output_root = 'extracted_ViT_features'  
os.makedirs(output_root, exist_ok=True)

In [None]:
def generate_ViT_features():
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            label_input_path = os.path.join(input_root, split, label)
            label_output_path = os.path.join(output_root, split, label)

            video_folders = os.listdir(label_input_path)

            for video in tqdm(video_folders, desc=f"{split}/{label}"):
                video_input_path = os.path.join(label_input_path, video)
                os.makedirs(label_output_path, exist_ok=True)

                if not os.path.isdir(video_input_path):
                    continue

                video_features = []

                for frame_file in sorted(os.listdir(video_input_path)):
                    frame_path = os.path.join(video_input_path, frame_file)

                    img = Image.open(frame_path).convert('RGB')
                    input_tensor = transform(img).unsqueeze(0)

                    with torch.no_grad():
                        features = model.forward_features(input_tensor)  # [1, 197, 768]

                    video_features.append(features.squeeze(0).cpu())

                if video_features:
                    video_tensor = torch.stack(video_features)  # shape: [num_frames, 197, 768]
                    save_path = os.path.join(label_output_path, f"{video}.pt")

                    torch.save(video_tensor, save_path)
                    
# generate_ViT_features()

train/real:   0%|          | 0/180 [00:00<?, ?it/s]

train/real: 100%|██████████| 180/180 [3:11:12<00:00, 63.74s/it]   
train/fake: 100%|██████████| 180/180 [2:44:48<00:00, 54.94s/it]  
test/real: 100%|██████████| 21/21 [20:33<00:00, 58.72s/it] 
test/fake: 100%|██████████| 20/20 [32:14<00:00, 96.73s/it] 


## task 5: facial region subranch

In [None]:
input_root = "aligned_faces"
output_root = {
    "left_eye": "extracted_left_eye",
    "right_eye": "extracted_right_eye",
    "mouth": "extracted_mouth"
}
stack_size = 10

for region_path in output_root.values():  
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            os.makedirs(os.path.join(region_path, split, label), exist_ok=True)

FACIAL_REGIONS = {
    "left_eye": [33, 133, 159, 145, 153, 154, 155, 133],
    "right_eye": [362, 263, 386, 374, 380, 381, 382, 263],
    "mouth": [78, 95, 88, 178, 87, 14, 317, 402, 318, 324]
}

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True)

In [4]:
def crop_region(image, landmarks, region_indices):
    h, w, _ = image.shape
    points = [(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in region_indices]

    xs, ys = zip(*points)
    x_min, x_max = max(min(xs) - 10, 0), min(max(xs) + 10, w)
    y_min, y_max = max(min(ys) - 10, 0), min(max(ys) + 10, h)

    cropped = image[y_min:y_max, x_min:x_max]
    if cropped.size == 0:
        return None
    return cropped

In [None]:
def extract_facial_features():
    for split in ['train', 'test']:
        for label in ['real', 'fake']:
            input_path = os.path.join(input_root, split, label)
            video_folders = [v for v in os.listdir(input_path) if os.path.isdir(os.path.join(input_path, v))]

            for video in tqdm(video_folders, desc=f"{split}/{label}"):
                video_path = os.path.join(input_path, video)
                region_buffers = {region: [] for region in FACIAL_REGIONS}
                last_landmarks = None

                for frame_file in sorted(os.listdir(video_path)):
                    if not frame_file.lower().endswith('.jpg'):
                        continue
                    image = cv2.imread(os.path.join(video_path, frame_file))
                    if image is None:
                        continue

                    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    results = face_mesh.process(rgb)

                    if results.multi_face_landmarks:
                        landmarks = results.multi_face_landmarks[0].landmark
                        last_landmarks = landmarks
                    elif last_landmarks:
                        landmarks = last_landmarks
                    else:
                        continue

                    for region, indices in FACIAL_REGIONS.items():
                        cropped = crop_region(image, landmarks, indices)
                        if cropped is None:
                            continue
                        resized = cv2.resize(cropped, (64, 64))
                        tensor_img = torch.tensor(resized).permute(2, 0, 1).float() / 255.0
                        region_buffers[region].append(tensor_img)

                for region in FACIAL_REGIONS:
                    if len(region_buffers[region]) == 0:
                        continue
                    full_stack = torch.stack(region_buffers[region])  # [num_frames, 3, 64, 64]
                    save_name = f"{video}.pt"
                    save_path = os.path.join(output_root[region], split, label, save_name)
                    torch.save(full_stack, save_path)
                    
extract_facial_features()

train/real: 100%|██████████| 180/180 [09:09<00:00,  3.05s/it]
train/fake: 100%|██████████| 180/180 [11:58<00:00,  3.99s/it]
test/real: 100%|██████████| 21/21 [01:26<00:00,  4.13s/it]
test/fake: 100%|██████████| 20/20 [01:18<00:00,  3.94s/it]


## task 6: temporal ViT

In [None]:
# Configuration
data_root = "aligned_faces"

chunk_size = 30
stride = 10  
frame_feature_dim = 768 
temporal_vit_heads = 8
temporal_vit_layers = 4


def get_positional_encoding(seq_len, dim):
    position = torch.arange(0, seq_len).unsqueeze(1).float()
    div_term = torch.exp(torch.arange(0, dim, 2).float() * -(np.log(10000.0) / dim))
    pe = torch.zeros(seq_len, dim)
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    return pe

temporal_encoder_layer = nn.TransformerEncoderLayer(
    d_model=frame_feature_dim,
    nhead=temporal_vit_heads,
    dim_feedforward=1024,
    batch_first=True,
)
temporal_transformer = nn.TransformerEncoder(
    temporal_encoder_layer, num_layers=temporal_vit_layers
)    

In [None]:
backbone = timm.create_model('vit_base_patch16_224', pretrained=True)
backbone.reset_classifier(0) 
backbone.eval()

def extract_vit_features(x):
    with torch.no_grad():
        x = backbone(x)
    return x

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

base_save_dir = "extracted_temporal_features"
os.makedirs(base_save_dir, exist_ok=True)   

In [None]:
for split in ['train', 'test']:
    for label in ['real', 'fake']:
        video_dir = os.path.join(data_root, split, label)
        videos = [v for v in os.listdir(video_dir) if os.path.isdir(os.path.join(video_dir, v))]

        save_dir = os.path.join(base_save_dir, split, label)
        os.makedirs(save_dir, exist_ok=True)

        for video in tqdm(videos, desc=f"{split}/{label}"):
        frame_dir = os.path.join(video_dir, video)
        frame_files = sorted([f for f in os.listdir(frame_dir)])

        all_frame_feats = []
        for f in frame_files:
            img_path = os.path.join(frame_dir, f)
            img = Image.open(img_path).convert('RGB')
            img_tensor = transform(img).unsqueeze(0)
            feat = extract_vit_features(img_tensor)  # (1, 768)
            all_frame_feats.append(feat.squeeze(0))
        all_frame_feats = torch.stack(all_frame_feats)  # (num_frames, 768)

        num_frames = all_frame_feats.size(0)
        chunks = []
        for start_idx in range(0, num_frames - chunk_size + 1, stride):
            chunk_feats = all_frame_feats[start_idx:start_idx + chunk_size]  
            pe = get_positional_encoding(chunk_size, frame_feature_dim)
            chunk_feats_pe = chunk_feats + pe
            chunk_feats_pe = chunk_feats_pe.unsqueeze(0) 

            with torch.no_grad():
                temporal_out = temporal_transformer(chunk_feats_pe) 

            chunks.append(temporal_out.squeeze(0))  

        chunks = torch.cat(chunks, dim=0) # remove this, get dim of (N,768*9) and then reshape to (N, 768, 9) insted of reapting values
        chunks_3d = chunks.unsqueeze(-1).repeat(1, 1, 9)  # (N, 768, 9)

        save_path = os.path.join(save_dir, f"{video}.pt")
        torch.save(chunks_3d.cpu(), save_path)

In [None]:
input_root = "extracted_temporal_features"
output_root = "extracted_new_temporal_features"
os.makedirs(output_root, exist_ok=True)

for split in ['train', 'test']:
    for label in ['real', 'fake']:
        input_dir = os.path.join(input_root, split, label)
        output_dir = os.path.join(output_root, split, label)
        os.makedirs(output_dir, exist_ok=True)

        for fname in tqdm(os.listdir(input_dir), desc=f"{split}/{label}"):
            if not fname.endswith(".pt"):
                continue

            input_path = os.path.join(input_dir, fname)
            output_path = os.path.join(output_dir, fname)

            data = torch.load(input_path)

            if data.ndim == 1:
                print(f"Skipping (1D) {fname}, shape: {data.shape}")
                continue

            if data.ndim == 2:
                # Expected shape: (num_frames, 768)
                expanded = data.unsqueeze(-1).repeat(1, 1, 9)  # (num_frames, 768, 9)
                torch.save(expanded, output_path)
            else:
                print(f"unexpected shape in {fname}: {data.shape}")


train/real:   2%|▏         | 4/180 [00:00<00:08, 19.83it/s]

Unexpected shape in 01__hugging_happy.pt: torch.Size([1260, 768, 9])
Unexpected shape in 01__kitchen_pan.pt: torch.Size([810, 768, 9])


train/real: 100%|██████████| 180/180 [00:09<00:00, 19.24it/s]
train/fake: 100%|██████████| 180/180 [00:07<00:00, 23.97it/s]
test/real: 100%|██████████| 20/20 [00:00<00:00, 22.97it/s]
test/fake: 100%|██████████| 20/20 [00:00<00:00, 24.73it/s]


## task 7: reducing feeature size

In [None]:
# CNN Compression: (2048, 7, 7) → (256, 7, 7)
cnn_reducer = nn.Conv2d(2048, 256, kernel_size=1)
# ViT Compression: (197, 768) → (197, 96)
vit_reducer = nn.Linear(768, 96)

cnn_dir = "extracted_cnn_features/test"
vit_dir = "extracted_vit_features/test"
cnn_new_dir = "extracted_cnn_new_features/test"
vit_new_dir = "extracted_vit_new_features/test"
for label in ['real', 'fake']:
    os.makedirs(os.path.join(cnn_new_dir, label), exist_ok=True)
    os.makedirs(os.path.join(vit_new_dir, label), exist_ok=True)


cnn_reducer.eval()
vit_reducer.eval()

def reducer():
    with torch.no_grad():
        for label in ['real', 'fake']:
            cnn_label_path = os.path.join(cnn_dir, label)
            vit_label_path = os.path.join(vit_dir, label)

            cnn_new_label_path = os.path.join(cnn_new_dir, label)
            vit_new_label_path = os.path.join(vit_new_dir, label)

            for fname in tqdm(os.listdir(cnn_label_path), desc=f"Compressing {label}"):
                if not fname.endswith(".pt"):
                    continue

                cnn_path = os.path.join(cnn_label_path, fname)
                vit_path = os.path.join(vit_label_path, fname)
                cnn_feat = torch.load(cnn_path)  # (T, 2048, 7, 7)
                vit_feat = torch.load(vit_path)  # (T, 197, 768)

                cnn_feat_reduced = cnn_reducer(cnn_feat)  # (T, 256, 7, 7)
                vit_feat_reduced = vit_reducer(vit_feat)  # (T, 197, 96)

                torch.save(cnn_feat_reduced, os.path.join(cnn_new_label_path, fname))
                torch.save(vit_feat_reduced, os.path.join(vit_new_label_path, fname))
# reducer()

## task 8: train MLP & evaluate

In [5]:
cnn_dir = "extracted_cnn_new_features"
vit_dir = "extracted_vit_new_features"
left_eye_dir = "extracted_left_eye_features"
right_eye_dir = "extracted_right_eye_features"
mouth_dir = "extracted_mouth_features"
temporal_dir = "extracted_new_temporal_features"

split = 'train'
label_map = {'real': 0, 'fake': 1}
batch_size = 180
input_dim = (197 * 96) + (256 * 7 * 7) + (3 * 64 * 64 * 3) + (768 * 9)
num_epochs = 20
lr = 1e-7

In [10]:
def get_file_list(split):
    samples = []
    for label_str, label_val in label_map.items():
        cnn_path = os.path.join(cnn_dir, split, label_str)
        vit_path = os.path.join(vit_dir, split, label_str)
        left_eye_path = os.path.join(left_eye_dir, split, label_str)
        right_eye_path = os.path.join(right_eye_dir, split, label_str)
        mouth_path = os.path.join(mouth_dir, split, label_str)
        temporal_path = os.path.join(temporal_dir, split, label_str)

        
        for fname in os.listdir(cnn_path):
            if fname.endswith(".pt"):
                cnn_file = os.path.join(cnn_path, fname)
                vit_file = os.path.join(vit_path, fname)
                left_eye_file = os.path.join(left_eye_path, fname)
                right_eye_file = os.path.join(right_eye_path, fname)
                mouth_file = os.path.join(mouth_path, fname)
                temporal_file = os.path.join(temporal_path, fname)

                if os.path.exists(vit_file):
                    samples.append((cnn_file, vit_file, left_eye_file, right_eye_file, mouth_file, temporal_file, label_val))
    return samples

In [12]:
def batch_generator(file_list, batch_size, scaler):
    for i in range(0, len(file_list), batch_size):
        batch = file_list[i:i+batch_size]
        features = []
        labels = []
        for cnn_path, vit_path, left_eye_path, right_eye_path, mouth_path, temporal_path, label in batch:
            cnn_feat = torch.load(cnn_path)                 # (num_frames, 2048, 7, 7)
            vit_feat = torch.load(vit_path)                 # (num_frames, 197, 96)
            left_eye_feat = torch.load(left_eye_path)       # (num_frames, 3, 64, 64)
            right_eye_feat = torch.load(right_eye_path)     # (num_frames, 3, 64, 64)
            mouth_feat = torch.load(mouth_path)             # (num_frames, 3, 64, 64)
            temporal_feat = torch.load(temporal_path)             # (num_frames, 768, 9)

            cnn_avg = cnn_feat.mean(dim=0)         # (2048, 7, 7)
            vit_avg = vit_feat.mean(dim=0)         # (197, 96)
            left_eye_avg = left_eye_feat.mean(dim=0)         # (64, 64)
            right_eye_avg = right_eye_feat.mean(dim=0)         # (64, 64)
            mouth_avg = mouth_feat.mean(dim=0)         # (64, 64)
            temporal_avg = temporal_feat.mean(dim=0)         # (768, 9)

            fused = torch.cat([cnn_avg.flatten(), vit_avg.flatten(), left_eye_avg.flatten(), right_eye_avg.flatten(), mouth_avg.flatten(), temporal_avg.flatten()], dim=0)
            features.append(fused)
            labels.append(label)

        features = torch.stack(features)
        if scaler is not None:
            features = torch.tensor(scaler.transform(features.numpy()), dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # shape (batch_size, 1)
        yield features, labels
        
file_list_train = get_file_list('train')
file_list_test = get_file_list('test')

all_train_features = []
for X_batch, _ in batch_generator(file_list_train, batch_size, scaler=None):
    all_train_features.append(X_batch)
all_train_features = torch.cat(all_train_features, dim=0).numpy()

scaler = StandardScaler()
scaler.fit(all_train_features)

In [6]:
model = nn.Sequential(
    nn.Linear(input_dim, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.1),

    nn.Linear(512, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.1),
    
    nn.Linear(128, 1),
    nn.Sigmoid()
)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
scheduler = StepLR(optimizer, step_size=10, gamma=0.75)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for X_batch, y_batch in batch_generator(file_list_train, batch_size, scaler=scaler):
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * X_batch.size(0)
    
    avg_loss = total_loss / len(file_list_train)
    print(f"epoch {epoch+1} complete. avg loss: {avg_loss:.4f}")
    scheduler.step() 

NameError: name 'batch_generator' is not defined

In [178]:
torch.save(model.state_dict(), "model9.pth")

In [13]:
def model_eval():    
    model.eval()
    y_true = []
    y_pred = []
    y_prob = []

    with torch.no_grad():
        for X_batch, y_batch in batch_generator(file_list_test, batch_size, scaler=scaler):
            outputs = model(X_batch)
            probs = outputs.squeeze(1).numpy()
            preds = (probs >= 0.5).astype(int)
            y_prob.extend(probs.tolist())
            y_pred.extend(preds.tolist())
            y_true.extend(y_batch.squeeze(1).numpy().tolist())

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_prob)
    cm = confusion_matrix(y_true, y_pred)

    print(f"acc:  {accuracy}")
    print(f"precision: {precision}")
    print(f"recall:    {recall}")
    print(f"f1 score:  {f1}")
    print(f"ROC AUC:   {roc_auc}")
    print(f"condusion matrix:\n   {cm}")

model.load_state_dict(torch.load("model7.pth"))
model_eval()
model.load_state_dict(torch.load("model8.pth"))
model_eval()

acc:  0.575
precision: 0.5428571428571428
recall:    0.95
f1 score:  0.6909090909090909
ROC AUC:   0.7025
condusion matrix:
   [[ 4 16]
 [ 1 19]]
acc:  0.65
precision: 0.75
recall:    0.45
f1 score:  0.5625
ROC AUC:   0.66
condusion matrix:
   [[17  3]
 [11  9]]


In [None]:
with torch.no_grad():
    for X_batch, y_batch in batch_generator(file_list_test, batch_size, scaler=scaler):
        outputs = model(X_batch)
        probs = outputs.squeeze(1).numpy()
        preds = (probs >= 0.5).astype(int)
        y_true_batch = y_batch.squeeze(1).numpy().astype(int)
        for prob, pred, true_label in zip(probs, preds, y_true_batch):
            print(f"true: {true_label} | pred: {pred} | confidence: {prob:.4f}")
        y_prob.extend(probs.tolist())
        y_pred.extend(preds.tolist())
        y_true.extend(y_true_batch.tolist())

True: 0 | Pred: 1 | Confidence: 0.5270
True: 0 | Pred: 0 | Confidence: 0.4997
True: 0 | Pred: 0 | Confidence: 0.4106
True: 0 | Pred: 1 | Confidence: 0.5195
True: 0 | Pred: 1 | Confidence: 0.5833
True: 0 | Pred: 1 | Confidence: 0.5680
True: 0 | Pred: 0 | Confidence: 0.4729
True: 0 | Pred: 1 | Confidence: 0.5273
True: 0 | Pred: 1 | Confidence: 0.5583
True: 0 | Pred: 1 | Confidence: 0.6504
True: 0 | Pred: 1 | Confidence: 0.5328
True: 0 | Pred: 1 | Confidence: 0.6440
True: 0 | Pred: 0 | Confidence: 0.4677
True: 0 | Pred: 0 | Confidence: 0.4743
True: 0 | Pred: 0 | Confidence: 0.4959
True: 0 | Pred: 0 | Confidence: 0.4630
True: 0 | Pred: 1 | Confidence: 0.5865
True: 0 | Pred: 0 | Confidence: 0.4870
True: 0 | Pred: 0 | Confidence: 0.4978
True: 0 | Pred: 0 | Confidence: 0.4622
True: 1 | Pred: 1 | Confidence: 0.5110
True: 1 | Pred: 1 | Confidence: 0.6130
True: 1 | Pred: 0 | Confidence: 0.4501
True: 1 | Pred: 0 | Confidence: 0.4734
True: 1 | Pred: 1 | Confidence: 0.5093
True: 1 | Pred: 1 | Confi

In [None]:
""""
model 4:
Test Accuracy:  0.6000
Test Precision: 0.5588
Test Recall:    0.9500
Test F1-score:  0.7037
Test ROC AUC:   0.6825
Confusion Matrix:
   [[ 5 15]
 [ 1 19]]


 model 7:
 Test Accuracy:  0.5750
Test Precision: 0.5429
Test Recall:    0.9500
Test F1-score:  0.6909
Test ROC AUC:   0.7025
Confusion Matrix:
   [[ 4 16]
 [ 1 19]]

model 8:
Test Accuracy:  0.6500
Test Precision: 0.7500
Test Recall:    0.4500
Test F1-score:  0.5625
Test ROC AUC:   0.6600
Confusion Matrix:
   [[17  3]
 [11  9]]

model 7 + model 8:
Test Accuracy:  0.6500
Test Precision: 0.6250
Test Recall:    0.7500
Test F1-score:  0.6818
Test ROC AUC:   0.7025
Confusion Matrix:
   [[11  9]
 [ 5 15]] 

"""

In [185]:
model7 = nn.Sequential(
    nn.Linear(input_dim, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(512, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(128, 1),
    nn.Sigmoid()
)
model8 = nn.Sequential(
    nn.Linear(input_dim, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(512, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(128, 1),
    nn.Sigmoid()
)
model7.load_state_dict(torch.load("model7.pth"))
model8.load_state_dict(torch.load("model8.pth"))
model7.eval()
model8.eval()

y_true, y_pred, y_prob = [], [], []

with torch.no_grad():
    for X_batch, y_batch in batch_generator(file_list_test, batch_size, scaler=scaler):
        prob7 = model7(X_batch).squeeze(1).numpy()
        prob8 = model8(X_batch).squeeze(1).numpy()
        avg_prob = (prob7 + prob8) / 2
        preds = (avg_prob >= 0.5).astype(int)
        y_prob.extend(avg_prob.tolist())
        y_pred.extend(preds.tolist())
        y_true.extend(y_batch.squeeze(1).numpy().tolist())
        print("Model7:", prob7[:5], "Model8:", prob8[:5], "Avg:", avg_prob[:5])
        
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
roc_auc = roc_auc_score(y_true, y_prob)
cm = confusion_matrix(y_true, y_pred)

print(f"acc:  {accuracy}")
print(f"precision: {precision}")
print(f"recall:    {recall}")
print(f"f1 score:  {f1}")
print(f"ROC AUC:   {roc_auc}")
print(f"condusion matrix:\n   {cm}")


Model7: [0.5967618  0.5357842  0.44540623 0.5943625  0.56813085] Model8: [0.47916403 0.416531   0.39346752 0.4887952  0.4869161 ] Avg: [0.5379629  0.4761576  0.41943687 0.5415788  0.52752346]
acc:  0.65
precision: 0.625
recall:    0.75
f1 score:  0.6818181818181818
ROC AUC:   0.7025
condusion matrix:
   [[11  9]
 [ 5 15]]
