In [56]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import timm

data_path = '/Users/rachelwang/Downloads/notes/models/csv/adherence_labeled_with_image.csv'
data = pd.read_csv(data_path)

In [57]:
data.head()

Unnamed: 0,pid,task,adherence,file,spectrogram
0,1f9475bb-f13b-4f68-969b-28f20455b3e7,Loudness,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
1,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-Breath-2,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
2,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-FiveBreaths-1,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
3,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-ThreeQuickBreaths-2,4.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
4,1f9475bb-f13b-4f68-969b-28f20455b3e7,Maximum-phonation-time-1,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...


In [51]:
# Count the occurrences of each adherence label
label_counts = data['adherence'].value_counts().sort_index()

# Print the counts for each label
print("Adherence label counts:")
print(label_counts)

Adherence label counts:
adherence
1.0     20
2.0     11
3.0     25
4.0     90
5.0    395
Name: count, dtype: int64


In [52]:
# Save the split dataframes to CSV files
train_csv_path = '/Users/rachelwang/Downloads/notes/models/train_data_adherence.csv'
val_csv_path = '/Users/rachelwang/Downloads/notes/models/val_data_adherence.csv'
test_csv_path = '/Users/rachelwang/Downloads/notes/models/test_data_adherence.csv'

# Load the split CSV files
train_df = pd.read_csv(train_csv_path)
val_df = pd.read_csv(val_csv_path)
test_df = pd.read_csv(test_csv_path)

In [53]:
# Define the dataset class
class SpectrogramDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['spectrogram']
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        label = self.dataframe.iloc[idx]['adherence'] - 1  # 1-based to 0-based label
        return image, label

In [54]:
# Define the transformations with augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(10),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [39]:
# Create datasets and dataloaders
train_dataset = SpectrogramDataset(train_df, transform=transform)
val_dataset = SpectrogramDataset(val_df, transform=transform)
test_dataset = SpectrogramDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [40]:
# Load the EfficientNet model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=5)
model = model.to(device)

In [41]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [42]:
# Training loop
num_epochs = 50
best_model_path = 'efficientnet_best_model_adherence.pth'
best_val_loss = float('inf')
early_stop_patience = 10
early_stop_counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    print(f"Starting epoch {epoch + 1}/{num_epochs}")
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to LongTensor

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if batch_idx % 10 == 0:
            print(f"Batch {batch_idx}, Loss: {loss.item()}")

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device).long()  # Convert labels to LongTensor

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * val_correct / val_total

    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), best_model_path)
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

print("Training complete.")
print(f"Best model saved at: {best_model_path}")

Starting epoch 1/50
Batch 0, Loss: 4.330209732055664
Batch 10, Loss: 0.6061384677886963
Epoch [1/50], Loss: 2.1057, Accuracy: 52.47%
Validation Loss: 2.0706, Validation Accuracy: 54.63%
Starting epoch 2/50
Batch 0, Loss: 1.5679315328598022
Batch 10, Loss: 3.8740005493164062
Epoch [2/50], Loss: 2.0554, Accuracy: 66.98%
Validation Loss: 2.3078, Validation Accuracy: 68.52%
Starting epoch 3/50
Batch 0, Loss: 1.9588309526443481
Batch 10, Loss: 1.4499611854553223
Epoch [3/50], Loss: 1.5645, Accuracy: 64.51%
Validation Loss: 1.4315, Validation Accuracy: 67.59%
Starting epoch 4/50
Batch 0, Loss: 0.9025329947471619
Batch 10, Loss: 0.3136403262615204
Epoch [4/50], Loss: 0.8282, Accuracy: 73.15%
Validation Loss: 1.8324, Validation Accuracy: 62.04%
Starting epoch 5/50
Batch 0, Loss: 0.4891032576560974
Batch 10, Loss: 0.9837957620620728
Epoch [5/50], Loss: 0.9139, Accuracy: 74.69%
Validation Loss: 1.2059, Validation Accuracy: 77.78%
Starting epoch 6/50
Batch 0, Loss: 0.3931542634963989
Batch 10, Lo

In [43]:
# Load the saved model
model.load_state_dict(torch.load(best_model_path))

# Evaluate on test set
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to LongTensor

        outputs = model(images)
        loss = criterion(outputs, labels)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_loss /= len(test_loader)
test_accuracy = 100 * test_correct / test_total

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 1.8237, Test Accuracy: 68.81%


In [64]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Cross-validation setup
full_dataset = SpectrogramDataset(data, transform=transform)
kf = KFold(n_splits=5, shuffle=True, random_state=42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(full_dataset)):
    print(f"Fold {fold + 1}")
    
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)
    
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=5)
    model.load_state_dict(torch.load('/Users/rachelwang/Downloads/notes/models/efficientnet_best_model_adherence.pth'))
    model = model.to(device)
    
    # Evaluation on validation set
    model.eval()
    val_true = []
    val_pred = []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(predicted.cpu().numpy())
    
    val_accuracy = 100 * accuracy_score(val_true, val_pred)
    fold_results.append(val_accuracy)
    print(f"Fold {fold + 1} Validation Accuracy: {val_accuracy:.2f}%")

Fold 1
Fold 1 Validation Accuracy: 78.90%
Fold 2
Fold 2 Validation Accuracy: 77.78%
Fold 3
Fold 3 Validation Accuracy: 75.93%
Fold 4
Fold 4 Validation Accuracy: 69.44%
Fold 5
Fold 5 Validation Accuracy: 80.56%


In [67]:
# Print cross-validation results
print("Cross-validation results:")
for i, accuracy in enumerate(fold_results):
    print(f"Fold {i + 1}: {accuracy:.2f}%")

print(f"Average cross-validation accuracy: {np.mean(fold_results):.2f}%")

Cross-validation results:
Fold 1: 78.90%
Fold 2: 77.78%
Fold 3: 75.93%
Fold 4: 69.44%
Fold 5: 80.56%
Average cross-validation accuracy: 76.52%
