In [1]:
import timm
from timm import create_model
import torch
import torch.nn as nn
import sys
import os
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision import datasets, transforms, models
from torchvision.models import resnet101
sys.path.append(os.path.abspath(".."))
from data.ImageDataset import ImageDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
from torch_dct import dct_2d

In [3]:
def evaluate_model(model, dataloader, device):
    model.eval()  # Set model to evaluation mode
    y_true = []
    y_pred = []
    y_prob = []

    with torch.no_grad():  # Disable gradient computation for evaluation
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)  # Outputs should have shape (batch_size, 1)
            
            # Convert outputs to probabilities
            probs = outputs.squeeze(dim=1)  # Squeeze only the last dimension
            if probs.dim() == 0:  # If it became a scalar (batch_size=1), wrap it in a list
                probs = probs.unsqueeze(0)
            
            # Apply a threshold to get binary predictions (default 0.5 for binary classification)
            preds = (probs > 0.5).float()
            
            # Append results
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy().tolist())  # Ensure this is iterable
            y_prob.extend(probs.cpu().numpy().tolist())  # Ensure this is iterable

    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auroc = roc_auc_score(y_true, y_prob)
    cm = confusion_matrix(y_true, y_pred)

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "auroc": auroc,
        "confusion_matrix": cm
    }

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))  # Normalize for ImageNet
])

train_dataset = ImageDataset(
        annotations_path="/home/ec2-user/CS230Project/data/annotations/train.json",
        images_dir="/home/ec2-user/CS230Project/data/train",
        transform=transform)

val_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/val.json",
    images_dir="/home/ec2-user/CS230Project/data/val",
    transform=transform)

test_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/test.json",
    images_dir="/home/ec2-user/CS230Project/data/test",
    transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, num_workers=7,shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=16, num_workers=7, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, num_workers=7, shuffle=False)

In [9]:
class ViTBinaryClassifier(nn.Module):
    def __init__(self, model_name="vit_base_patch16_224", pretrained=True, num_classes=1):
        super(ViTBinaryClassifier, self).__init__()
        self.vit = timm.create_model(model_name, pretrained=pretrained, drop_rate=0.6, attn_drop_rate=0.5)
        in_features = self.vit.head.in_features
        self.vit.head = nn.Sequential(
            nn.Linear(in_features, num_classes),
            nn.Sigmoid()  # Sigmoid for binary classification
        )

    def forward(self, x):
        return self.vit(x)

In [12]:
vit = ViTBinaryClassifier(model_name="vit_base_patch16_224", pretrained=False)
vit.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/ViT/ViT_6.pth"))
vit = vit.to(device)
vit.eval()
print(evaluate_model(vit, train_loader, device))
print("-" * 50)
print(evaluate_model(vit, val_loader, device))
print("-" * 50)
print(evaluate_model(vit, test_loader, device))


  vit.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/ViT/ViT_6.pth"))


{'accuracy': 0.9034628206000408, 'precision': np.float64(0.9188598267066159), 'recall': np.float64(0.9096885397041728), 'f1_score': np.float64(0.9142511834021553), 'auroc': np.float64(0.9642594781354227), 'confusion_matrix': array([[17146,  2004],
       [ 2253, 22694]])}
--------------------------------------------------
{'accuracy': 0.7721674876847291, 'precision': np.float64(0.7997997997997998), 'recall': np.float64(0.9115801483171706), 'f1_score': np.float64(0.852039456145028), 'auroc': np.float64(0.7803843855581976), 'confusion_matrix': array([[ 849, 1200],
       [ 465, 4794]])}
--------------------------------------------------
{'accuracy': 0.6992, 'precision': np.float64(0.7552880872047755), 'recall': np.float64(0.7952790872446539), 'f1_score': np.float64(0.7747678791307531), 'auroc': np.float64(0.7395690448781667), 'confusion_matrix': array([[ 8183,  7543],
       [ 5993, 23281]])}


In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((299, 299)),           
    transforms.ToTensor(),                    
    transforms.Normalize(                     
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    ),
])

train_dataset = ImageDataset(
        annotations_path="/home/ec2-user/CS230Project/data/annotations/train.json",
        images_dir="/home/ec2-user/CS230Project/data/train",
        transform=transform)

val_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/val.json",
    images_dir="/home/ec2-user/CS230Project/data/val",
    transform=transform)

test_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/test.json",
    images_dir="/home/ec2-user/CS230Project/data/test",
    transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, num_workers=7,shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=16, num_workers=7, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, num_workers=7, shuffle=False)

In [8]:
xception = timm.create_model('xception', pretrained=False)
xception.fc = nn.Sequential(
    nn.Linear(xception.fc.in_features, 512),
    nn.ReLU(),                             
    nn.Dropout(p=0.5),                     
    nn.Linear(512, 1),                     
    nn.Sigmoid()                          
)
xception.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/ExceptionNet/exception_net_9.pth"))
xception = xception.to(device)
xception.eval()
print(evaluate_model(xception, train_loader, device))
print("-" * 50)
print(evaluate_model(xception, val_loader, device))
print("-" * 50)
print(evaluate_model(xception, test_loader, device))




  model = create_fn(
  xception.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/ExceptionNet/exception_net_9.pth"))


{'accuracy': 0.9864163094995124, 'precision': np.float64(0.9845565992676325), 'recall': np.float64(0.9915420691866758), 'f1_score': np.float64(0.9880369874777816), 'auroc': np.float64(0.9996102368875802), 'confusion_matrix': array([[18762,   388],
       [  211, 24736]])}
--------------------------------------------------
{'accuracy': 0.8823207443897099, 'precision': np.float64(0.8781158672855424), 'recall': np.float64(0.9712873169804145), 'f1_score': np.float64(0.9223546406644998), 'auroc': np.float64(0.918908216651721), 'confusion_matrix': array([[1340,  709],
       [ 151, 5108]])}
--------------------------------------------------
{'accuracy': 0.8293333333333334, 'precision': np.float64(0.8306688717383315), 'recall': np.float64(0.9265218282434925), 'f1_score': np.float64(0.8759810095920938), 'auroc': np.float64(0.8826093660400853), 'confusion_matrix': array([[10197,  5529],
       [ 2151, 27123]])}


In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((299, 299)),           
    transforms.ToTensor(),                    
    transforms.Normalize(                     
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    ),
])

train_dataset = ImageDataset(
        annotations_path="/home/ec2-user/CS230Project/data/annotations/train.json",
        images_dir="/home/ec2-user/CS230Project/data/train",
        transform=transform)

val_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/val.json",
    images_dir="/home/ec2-user/CS230Project/data/val",
    transform=transform)

test_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/test.json",
    images_dir="/home/ec2-user/CS230Project/data/test",
    transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, num_workers=7,shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=8, num_workers=7, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, num_workers=7, shuffle=False)

In [None]:
efficientnet = efficientnet_b4(weights=EfficientNet_B4_Weights.DEFAULT)
efficientnet.classifier[1] = torch.nn.Linear(efficientnet.classifier[1].in_features, 1)
efficientnet.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/Efficientnet_b4/efficientnet_b4_10.pth"))
efficientnet.to(device)
efficientnet.eval()
print(evaluate_model(efficientnet, train_loader, device))
print("-" * 50)
print(evaluate_model(efficientnet, val_loader, device))
print("-" * 50)
print(evaluate_model(efficientnet, test_loader, device))


  efficientnet.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/Efficientnet_b4/efficientnet_b4_10.pth"))


{'accuracy': 0.913214050842461, 'precision': np.float64(0.9373395179325769), 'recall': np.float64(0.9072433559145389), 'f1_score': np.float64(0.9220459128592671), 'auroc': np.float64(0.9724941115373469), 'confusion_matrix': array([[17637,  1513],
       [ 2314, 22633]])}
--------------------------------------------------
{'accuracy': 0.926655719759168, 'precision': np.float64(0.9444758140410314), 'recall': np.float64(0.9541737972998668), 'f1_score': np.float64(0.9493000378357926), 'auroc': np.float64(0.9626251346665379), 'confusion_matrix': array([[1754,  295],
       [ 241, 5018]])}
--------------------------------------------------
{'accuracy': 0.8182222222222222, 'precision': np.float64(0.8668521739130435), 'recall': np.float64(0.851335656213705), 'f1_score': np.float64(0.85902385219909), 'auroc': np.float64(0.8906959099512541), 'confusion_matrix': array([[11898,  3828],
       [ 4352, 24922]])}


: 

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((224, 224)),           
    transforms.ToTensor(),                    
    transforms.Normalize(                     
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    ),
])

train_dataset = ImageDataset(
        annotations_path="/home/ec2-user/CS230Project/data/annotations/train.json",
        images_dir="/home/ec2-user/CS230Project/data/train",
        transform=transform)

val_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/val.json",
    images_dir="/home/ec2-user/CS230Project/data/val",
    transform=transform)

test_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/test.json",
    images_dir="/home/ec2-user/CS230Project/data/test",
    transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, num_workers=7,shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=64, num_workers=7, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=7, shuffle=False)

In [10]:
swin = create_model('swin_base_patch4_window7_224', pretrained=False, num_classes=1)
swin.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/SwinTransformer/Swin_9.pth"))
swin.to(device)
swin.eval()
print(evaluate_model(swin, train_loader, device))
print("-" * 50)
print(evaluate_model(swin, val_loader, device))
print("-" * 50)
print(evaluate_model(swin, test_loader, device))


  swin.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/SwinTransformer/Swin_9.pth"))


{'accuracy': 0.9909971199854866, 'precision': np.float64(0.9971245747610562), 'recall': np.float64(0.9869322964685132), 'f1_score': np.float64(0.9920022562904168), 'auroc': np.float64(0.9995901179953197), 'confusion_matrix': array([[19079,    71],
       [  326, 24621]])}
--------------------------------------------------
{'accuracy': 0.8899835796387521, 'precision': np.float64(0.8985507246376812), 'recall': np.float64(0.9549343981745579), 'f1_score': np.float64(0.9258849557522124), 'auroc': np.float64(0.9229448951347992), 'confusion_matrix': array([[1482,  567],
       [ 237, 5022]])}
--------------------------------------------------
{'accuracy': 0.8388222222222222, 'precision': np.float64(0.8661744653962553), 'recall': np.float64(0.8896973423515748), 'f1_score': np.float64(0.8777783394840166), 'auroc': np.float64(0.9014806609404541), 'confusion_matrix': array([[11702,  4024],
       [ 3229, 26045]])}


In [11]:

class FFTResNet(nn.Module):
    def __init__(self, num_classes=1):
        super(FFTResNet, self).__init__()
        # Load a pretrained ResNet model
        self.resnet = models.resnet101(pretrained=True)

        # Modify the first convolutional layer to accept DCT input if needed
        self.resnet.conv1 = nn.Conv2d(
            6, 64, kernel_size=7, stride=2, padding=3, bias=False
        )  # Ensure it matches DCT input (3 channels)

        # Modify the output layer to match the number of classes
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 128),  # Add an intermediate FC layer
            nn.ReLU(),
            nn.Linear(128, num_classes),  # Output layer
            nn.Sigmoid()  # For binary classification
        )

    def apply_fft_batch(self, x):
        assert len(x.shape) == 4, "Expected input tensor of shape (B, C, H, W)"
        real_parts = torch.stack([torch.real(torch.fft.fft2(x[:, c, :, :])) for c in range(x.shape[1])], dim=1)
        imag_parts = torch.stack([torch.imag(torch.fft.fft2(x[:, c, :, :])) for c in range(x.shape[1])], dim=1)
        # Concatenate real and imaginary parts along the channel dimension
        fft_images = torch.cat([real_parts, imag_parts], dim=1)  # (B, 6, H, W) if input has 3 channels
        return fft_images

    def forward(self, x):
        x = self.apply_fft_batch(x)
        return self.resnet(x)
    
fft = FFTResNet()
fft.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/FFTcnn/fft_cnn_3.pth"))
fft.to(device)
fft.eval()
print(evaluate_model(fft, train_loader, device))
print("-" * 50)
print(evaluate_model(fft, val_loader, device))
print("-" * 50)
print(evaluate_model(fft, test_loader, device))

  fft.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/FFTcnn/fft_cnn_3.pth"))


{'accuracy': 0.6767807333832233, 'precision': np.float64(0.79006184224802), 'recall': np.float64(0.5837976510201628), 'f1_score': np.float64(0.6714460247573822), 'auroc': np.float64(0.7656496943232447), 'confusion_matrix': array([[15280,  3870],
       [10383, 14564]])}
--------------------------------------------------
{'accuracy': 0.6375205254515599, 'precision': np.float64(0.7407749077490775), 'recall': np.float64(0.7634531279710972), 'f1_score': np.float64(0.7519430658301339), 'auroc': np.float64(0.5665849642496245), 'confusion_matrix': array([[ 644, 1405],
       [1244, 4015]])}
--------------------------------------------------
{'accuracy': 0.6066888888888889, 'precision': np.float64(0.6953718394490768), 'recall': np.float64(0.7036619525859125), 'f1_score': np.float64(0.6994923340747407), 'auroc': np.float64(0.6002211463927534), 'confusion_matrix': array([[ 6702,  9024],
       [ 8675, 20599]])}


In [None]:
class DCTResNet(nn.Module):
    def __init__(self, num_classes=1):
        super(DCTResNet, self).__init__()
        # Load a pretrained ResNet model
        self.resnet = models.resnet101(pretrained=True)

        # Modify the first convolutional layer to accept DCT input if needed
        self.resnet.conv1 = nn.Conv2d(
            3, 64, kernel_size=7, stride=2, padding=3, bias=False
        )  # Ensure it matches DCT input (3 channels)

        # Modify the output layer to match the number of classes
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 128),  # Add an intermediate FC layer
            nn.ReLU(),
            nn.Linear(128, num_classes),  # Output layer
            nn.Sigmoid()  # For binary classification
        )

    def apply_dct_batch(self, x):
        """
        Applies DCT to a batch of images.
        x: Tensor of shape (B, C, H, W)
        """
        assert len(x.shape) == 4, "Expected input tensor of shape (B, C, H, W)"
        # Apply DCT to each channel of each image in the batch
        dct_images = torch.stack([dct_2d(x[:, c, :, :]) for c in range(x.shape[1])], dim=1)
        return dct_images

    def forward(self, x):
        # Apply DCT transformation
        x = self.apply_dct_batch(x)
        # Pass the DCT-transformed images through ResNet
        return self.resnet(x)
    
dct = DCTResNet()
dct.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/DCTcnn/dct_cnn_3.pth"))
dct.to(device)
dct.eval()
print(evaluate_model(dct, train_loader, device))
print("-" * 50)
print(evaluate_model(dct, val_loader, device))
print("-" * 50)
print(evaluate_model(dct, test_loader, device))   

  dct.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/DCTcnn/dct_cnn_3.pth"))


{'accuracy': 0.694945234369685, 'precision': np.float64(0.8322830548650055), 'recall': np.float64(0.5770633743536296), 'f1_score': np.float64(0.6815642458100558), 'auroc': np.float64(0.7986228485852147), 'confusion_matrix': array([[16249,  2901],
       [10551, 14396]])}
--------------------------------------------------
{'accuracy': 0.6223316912972086, 'precision': np.float64(0.7459161582365677), 'recall': np.float64(0.7206693287697281), 'f1_score': np.float64(0.7330754352030948), 'auroc': np.float64(0.5671825129358293), 'confusion_matrix': array([[ 758, 1291],
       [1469, 3790]])}
--------------------------------------------------
{'accuracy': 0.5996444444444444, 'precision': np.float64(0.7074060427413412), 'recall': np.float64(0.6558379449340712), 'f1_score': np.float64(0.6806466479951785), 'auroc': np.float64(0.612155259053833), 'confusion_matrix': array([[ 7785,  7941],
       [10075, 19199]])}


: 

In [6]:
resnet = resnet101(pretrained=False)  
resnet.fc = nn.Linear(resnet.fc.in_features, 1)
resnet.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/Resnet/Resnet_7.pth"))
resnet.to(device)
resnet.eval()
print(evaluate_model(resnet, train_loader, device))
print("-" * 50)
print(evaluate_model(resnet, val_loader, device))
print("-" * 50)
print(evaluate_model(resnet, test_loader, device))

  resnet.load_state_dict(torch.load("/home/ec2-user/CS230Project/code/models/saved-weights/Resnet/Resnet_7.pth"))


{'accuracy': 0.9721749778896523, 'precision': np.float64(0.9881863834691693), 'recall': np.float64(0.9623201186515412), 'f1_score': np.float64(0.9750817408257346), 'auroc': np.float64(0.9966842897543315), 'confusion_matrix': array([[18863,   287],
       [  940, 24007]])}
--------------------------------------------------
{'accuracy': 0.8674055829228243, 'precision': np.float64(0.8799149840595112), 'recall': np.float64(0.9446662863662293), 'f1_score': np.float64(0.9111416781292985), 'auroc': np.float64(0.9164588609677097), 'confusion_matrix': array([[1371,  678],
       [ 291, 4968]])}
--------------------------------------------------
{'accuracy': 0.7932222222222223, 'precision': np.float64(0.8400367809828696), 'recall': np.float64(0.8425906948145112), 'f1_score': np.float64(0.8413117997169023), 'auroc': np.float64(0.8563556651664677), 'confusion_matrix': array([[11029,  4697],
       [ 4608, 24666]])}
