In [1]:
import timm
import torch
import torch.nn as nn
import copy
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.metrics import confusion_matrix
from torchvision import transforms, datasets

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(timm.__version__)
print(torch.__version__)

0.9.12
2.1.1


In [3]:
class CNN_5Layer(nn.Module):
    def __init__(self):
        super(CNN_5Layer, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),
        )

        dummy_input = torch.randn(1, 3, 224, 224)
        conv_output_size = self._get_conv_output_size(dummy_input)

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(conv_output_size, 4, bias=True),
            nn.Softmax(dim=1)
        )

        self.l1_regularizer = nn.L1Loss()

    def _get_conv_output_size(self, x):
        with torch.no_grad():
            conv_output = self.conv_layers(x)
        return conv_output.view(x.size(0), -1).shape[1]

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [4]:
class CNN_6Layer(nn.Module):
    def __init__(self):
        super(CNN_6Layer, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),
        )

        dummy_input = torch.randn(1, 3, 224, 224)
        conv_output_size = self._get_conv_output_size(dummy_input)

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(conv_output_size, 4, bias=True),
            nn.Softmax(dim=1)
        )

        self.l1_regularizer = nn.L1Loss()

    def _get_conv_output_size(self, x):
        with torch.no_grad():
            conv_output = self.conv_layers(x)
        return conv_output.view(x.size(0), -1).shape[1]

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [5]:
def create_model(model_name):
    if model_name == 'vit-tiny':
        model = timm.create_model('vit_tiny_patch16_224', pretrained=False, num_classes=4)
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/vit-tiny/vit-tiny-130-epochs-early-stopping-tiny.h5'

    if model_name == 'vit-small': 
        model = model = timm.create_model('vit_small_patch16_224', pretrained=False, num_classes=4)
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/vit-small/vit-small-130-epochs-early-stopping-small.h5'
        
    if model_name == 'vit-base': 
        model = timm.create_model('vit_base_patch16_224', pretrained=False, num_classes=4)
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/vit-base/vit-base-130-epochs-early-stopping-base.h5'

    if model_name == 'resnet-18': 
        model = timm.create_model('resnet18', pretrained=False, num_classes=4)
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/resnet-18/resnet-18-130-epochs-early-stopping-resnet18.h5'
    
    if model_name == 'resnet-34': 
        model = timm.create_model('resnet34', pretrained=False, num_classes=4)
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/resnet-34/resnet-34-130-epochs-early-stopping-resnet34.h5'
    
    if model_name == 'cnn-5-layer': 
        model = CNN_5Layer() 
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/cnn-5-layer/cnn-5-layer-130-epochs-early-stopping-with-regularization-5-layer'
    
    if model_name == 'cnn-6-layer': 
        model = CNN_6Layer()
        model_path = '/Users/aravdhoot/Parkinson-Project/experiments/non-keyframe/cnn-6-layer/cnn-6-layer-130-epochs-early-stopping-with-regularization-6-layer'

    state_dict = torch.load(model_path, map_location=torch.device('cpu'))
    model.load_state_dict(state_dict)

    return model

In [6]:
transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

dataset = datasets.ImageFolder(root='/Users/aravdhoot/Parkinson-Project/non-keyframes/energy_images', transform=transform)

total_size = len(dataset)
train_size = int(total_size * 0.8) 
validation_size = int(total_size * 0.1) 
test_size = total_size - train_size - validation_size
generator = torch.Generator().manual_seed(0) 
train_dataset, validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size, test_size], generator=generator)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [7]:
class_zero_images = torch.stack([image for image, label in test_dataset if label == 0], dim=0)
class_zero_labels = torch.tensor([label for image, label in test_dataset if label == 0], dtype=torch.int64)
class_one_images = torch.stack([image for image, label in test_dataset if label == 1], dim=0)
class_one_labels = torch.tensor([label for image, label in test_dataset if label == 1], dtype=torch.int64)
class_two_images = torch.stack([image for image, label in test_dataset if label == 2], dim=0)
class_two_labels = torch.tensor([label for image, label in test_dataset if label == 2], dtype=torch.int64)
class_three_images = torch.stack([image for image, label in test_dataset if label == 3], dim=0)
class_three_labels = torch.tensor([label for image, label in test_dataset if label == 3], dtype=torch.int64)

In [8]:
dataset_zero = TensorDataset(class_zero_images, class_zero_labels)
test_loader_zero = DataLoader(dataset_zero, batch_size=class_zero_labels.shape[0], shuffle=False)
dataset_one = TensorDataset(class_one_images, class_one_labels)
test_loader_one = DataLoader(dataset_one, batch_size=class_one_labels.shape[0], shuffle=False)
dataset_two = TensorDataset(class_two_images, class_two_labels)
test_loader_two = DataLoader(dataset_two, batch_size=class_two_labels.shape[0], shuffle=False)
dataset_three = TensorDataset(class_three_images, class_three_labels)
test_loader_three = DataLoader(dataset_three, batch_size=class_three_labels.shape[0], shuffle=False)


In [9]:
def test(model_name):
    model = create_model(model_name)

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    model.to(device)

    model.eval()

    test_loss = 0.0
    test_correct = 0
    test_total = 0
    all_predictions = []
    all_labels = []
    total_accuracy = 0
    total_accuracy_weighted = 0

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(test_loader_zero):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            print(f"Class {batch_idx}, Loss: {loss.item():.6f}, Accuracy: {100 * test_correct / test_total:.2f}%")
        
    total_accuracy += 100 * test_correct / test_total
    total_accuracy_weighted += (test_correct / test_total) * class_zero_labels.shape[0]

    model.eval()

    test_loss = 0.0
    test_correct = 0
    test_total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(test_loader_one):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            print(f"Class {batch_idx+1}, Loss: {loss.item():.6f}, Accuracy: {100 * test_correct / test_total:.2f}%")

    total_accuracy += 100 * test_correct / test_total
    total_accuracy_weighted += (test_correct / test_total) * class_one_labels.shape[0]

    model.eval()

    test_loss = 0.0
    test_correct = 0
    test_total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(test_loader_two):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            print(f"Class {batch_idx+2}, Loss: {loss.item():.6f}, Accuracy: {100 * test_correct / test_total:.2f}%")

    total_accuracy += 100 * test_correct / test_total
    total_accuracy_weighted += (test_correct / test_total)  * class_two_labels.shape[0]
        
    model.eval()

    test_loss = 0.0
    test_correct = 0
    test_total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(test_loader_three):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            print(f"Class {batch_idx+3}, Loss: {loss.item():.6f}, Accuracy: {100 * test_correct / test_total:.2f}%")

    total_accuracy += 100 * test_correct / test_total
    total_accuracy_weighted += (test_correct / test_total)  * class_three_labels.shape[0]

    total_size = class_zero_labels.shape[0] + class_one_labels.shape[0] + class_two_labels.shape[0] + class_three_labels.shape[0]

    print(f'Model — {model_name}')
    print(f'Average Accuracy — {total_accuracy/4}')
    print(f'Averge Weighted Accuracy — {(total_accuracy_weighted/total_size) * 100}')

In [10]:
models = ['vit-tiny', 'vit-small', 'vit-base', 'resnet-18', 'resnet-34', 'cnn-5-layer', 'cnn-6-layer']
for model_name in models:
    test(model_name)

Using device: cpu
Class 0, Loss: 0.005548, Accuracy: 100.00%
Class 1, Loss: 0.038003, Accuracy: 99.32%
Class 2, Loss: 0.010968, Accuracy: 99.83%
Class 3, Loss: 0.017585, Accuracy: 99.82%
Model — vit-tiny
Average Accuracy — 99.7420425513726
Averge Weighted Accuracy — 99.70674486803519
Using device: cpu
Class 0, Loss: 0.000335, Accuracy: 100.00%
Class 1, Loss: 0.023278, Accuracy: 99.77%
Class 2, Loss: 0.017844, Accuracy: 99.65%
Class 3, Loss: 0.012772, Accuracy: 99.82%
Model — vit-small
Average Accuracy — 99.81275987485468
Averge Weighted Accuracy — 99.76539589442815
Using device: cpu
Class 0, Loss: 0.025323, Accuracy: 98.31%
Class 1, Loss: 0.018540, Accuracy: 99.77%
Class 2, Loss: 0.005557, Accuracy: 99.83%
Class 3, Loss: 0.013028, Accuracy: 99.82%
Model — vit-base
Average Accuracy — 99.43220895421419
Averge Weighted Accuracy — 99.70674486803519
Using device: cpu
Class 0, Loss: 0.000183, Accuracy: 100.00%
Class 1, Loss: 0.015765, Accuracy: 99.77%
Class 2, Loss: 0.000141, Accuracy: 100.0