In [None]:
!git clone https://github.com/ails-lab/ntua-parkinson-dataset.git


Cloning into 'ntua-parkinson-dataset'...
remote: Enumerating objects: 42113, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 42113 (delta 0), reused 4 (delta 0), pack-reused 42104 (from 1)[K
Receiving objects: 100% (42113/42113), 2.14 GiB | 32.95 MiB/s, done.
Resolving deltas: 100% (6/6), done.
Updating files: 100% (44019/44019), done.


In [None]:
# Remove the .git folder to avoid ImageFolder errors
!rm -rf /content/ntua-parkinson-dataset/.git

# Now you can safely list and load your dataset folder
!ls /content/ntua-parkinson-dataset

# Remove all .ipynb_checkpoints folders recursively
!find /content/ntua-parkinson-dataset -type d -name ".ipynb_checkpoints" -exec rm -rf {} +

# Then continue with your dataset loading code...


'Non PD Patients'  'PD Patients'   README.md


In [None]:
import os
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Dataset root folder (contains PD and NPD subfolders with images)
base_dir = "/content/ntua-parkinson-dataset"

# Data transforms matching VGG19 input requirements
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load dataset (ImageFolder reads classes from folder names)
dataset = datasets.ImageFolder(base_dir, transform=transform)
print("Classes found:", dataset.classes)

# Create train, val, test splits (stratified)
indices = list(range(len(dataset)))
train_idx, test_idx = train_test_split(indices, test_size=0.2, stratify=dataset.targets, random_state=42)
val_idx, test_idx = train_test_split(test_idx, test_size=0.5, stratify=[dataset.targets[i] for i in test_idx], random_state=42)

train_ds = Subset(dataset, train_idx)
val_ds = Subset(dataset, val_idx)
test_ds = Subset(dataset, test_idx)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)
test_loader = DataLoader(test_ds, batch_size=32)

# Load pretrained VGG19 model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg19(pretrained=True)

# Replace the last classifier layer for 2 classes
model.classifier[6] = nn.Linear(4096, 2)
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training function
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss, correct = 0, 0
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
    return running_loss / len(loader.dataset), correct / len(loader.dataset)

# Validation function
def eval_model(model, loader, criterion, device):
    model.eval()
    running_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
    return running_loss / len(loader.dataset), correct / len(loader.dataset)

# Train loop
num_epochs = 5
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = eval_model(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs} — Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Test evaluation and classification report
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = outputs.argmax(dim=1).cpu()
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

print("\nTest Classification Report:")
print(classification_report(all_labels, all_preds, target_names=dataset.classes))


Classes found: ['Non PD Patients', 'PD Patients']


Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:05<00:00, 99.0MB/s]


Epoch 1/5 — Train Loss: 0.3784, Train Acc: 0.8259, Val Loss: 0.2235, Val Acc: 0.9048
Epoch 2/5 — Train Loss: 0.1816, Train Acc: 0.9223, Val Loss: 0.1789, Val Acc: 0.9237
Epoch 3/5 — Train Loss: 0.1153, Train Acc: 0.9539, Val Loss: 0.1159, Val Acc: 0.9566
Epoch 4/5 — Train Loss: 0.0817, Train Acc: 0.9687, Val Loss: 0.0790, Val Acc: 0.9727
Epoch 5/5 — Train Loss: 0.0564, Train Acc: 0.9791, Val Loss: 0.0683, Val Acc: 0.9755

Test Classification Report:
                 precision    recall  f1-score   support

Non PD Patients       0.96      0.96      0.96      1071
    PD Patients       0.99      0.99      0.99      3330

       accuracy                           0.98      4401
      macro avg       0.97      0.97      0.97      4401
   weighted avg       0.98      0.98      0.98      4401



In [None]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Set the same base directory used before
base_dir = "/content/ntua-parkinson-dataset"  # Change if needed

# Transform for ResNet input (same as VGG)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load dataset
dataset = datasets.ImageFolder(base_dir, transform=transform)

# Stratified split
indices = list(range(len(dataset)))
train_idx, test_idx = train_test_split(indices, test_size=0.2, stratify=dataset.targets, random_state=42)
val_idx, test_idx = train_test_split(test_idx, test_size=0.5, stratify=[dataset.targets[i] for i in test_idx], random_state=42)

train_ds = Subset(dataset, train_idx)
val_ds = Subset(dataset, val_idx)
test_ds = Subset(dataset, test_idx)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)
test_loader = DataLoader(test_ds, batch_size=32)

# Load pretrained ResNet50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=True)

# Replace the final layer for 2 classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training function
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss, correct = 0, 0
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
    return running_loss / len(loader.dataset), correct / len(loader.dataset)

# Evaluation function
def eval_model(model, loader, criterion, device):
    model.eval()
    running_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
    return running_loss / len(loader.dataset), correct / len(loader.dataset)

# Train ResNet50
num_epochs = 5
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = eval_model(model, val_loader, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs} — Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

# Final test evaluation
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = outputs.argmax(dim=1).cpu()
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

print("\nTest Classification Report:")
print(classification_report(all_labels, all_preds, target_names=dataset.classes))


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 164MB/s]


Epoch 1/5 — Train Loss: 0.2433, Acc: 0.8937, Val Loss: 0.1431, Acc: 0.9446
Epoch 2/5 — Train Loss: 0.0985, Acc: 0.9601, Val Loss: 0.1017, Acc: 0.9616
Epoch 3/5 — Train Loss: 0.0638, Acc: 0.9753, Val Loss: 0.0603, Acc: 0.9773
Epoch 4/5 — Train Loss: 0.0427, Acc: 0.9846, Val Loss: 0.0682, Acc: 0.9736
Epoch 5/5 — Train Loss: 0.0317, Acc: 0.9882, Val Loss: 0.0498, Acc: 0.9818

Test Classification Report:
                 precision    recall  f1-score   support

Non PD Patients       0.95      0.97      0.96      1071
    PD Patients       0.99      0.98      0.99      3330

       accuracy                           0.98      4401
      macro avg       0.97      0.98      0.97      4401
   weighted avg       0.98      0.98      0.98      4401

