In [None]:
# https://medium.com/@imabhi1216/fine-tuning-a-pre-trained-resnet-18-model-for-image-classification-on-custom-dataset-with-pytorch-02df12e83c2c

In [51]:
import os
import shutil
from sklearn.model_selection import train_test_split

AI_CLASS = "ai"
HUMAN_CLASS = "human"

# Original path with mel_spec inside each label
original_data_path = "/data/sg2121/fypdataset/dataset/features"
binary_train_dest = os.path.join(original_data_path, "binary_train")
binary_val_dest = os.path.join(original_data_path, "binary_val")
binary_test_dest = os.path.join(original_data_path, "binary_test")

In [52]:
# Ensure folders exist
for folder in [binary_train_dest, binary_val_dest, binary_test_dest]:
    for label in [AI_CLASS, HUMAN_CLASS]:
        os.makedirs(os.path.join(folder, label), exist_ok=True)

def gather_and_split(class_name, val_size=0.15, test_size=0.15):
    mel_spec_dir = os.path.join(original_data_path, class_name, "Mel_Spectrogram")
    all_files = [os.path.join(mel_spec_dir, f) for f in os.listdir(mel_spec_dir) if f.endswith(".png")]

    # Step 1: Train+Val and Test split
    train_val_files, test_files = train_test_split(all_files, test_size=test_size, random_state=42)

    # Step 2: Train and Val split
    val_fraction = val_size / (1 - test_size)  # Adjust val split relative to train_val
    train_files, val_files = train_test_split(train_val_files, test_size=val_fraction, random_state=42)

    for file_list, dest_root in [
        (train_files, binary_train_dest),
        (val_files, binary_val_dest),
        (test_files, binary_test_dest),
    ]:
        for fpath in file_list:
            fname = os.path.basename(fpath)
            dest = os.path.join(dest_root, class_name, fname)
            shutil.copy(fpath, dest)

# Run for both classes
#gather_and_split(AI_CLASS)
#gather_and_split(HUMAN_CLASS)

print("Mel spectrograms split into train/val/test folders.")

Mel spectrograms split into train/val/test folders.


In [66]:
import torch
import torchvision.models as models

In [65]:
def get_model():
    # Load the pre-trained ResNet-18 model
    model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

    # Modify the last layer of the model
    num_classes = 2 # number of classes in dataset
    model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
    return model

In [55]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

# Define the transformations to apply to the images
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the train and validation datasets

train_dataset = ImageFolder('/data/sg2121/fypdataset/dataset/features/binary_train', transform=transform)
val_dataset = ImageFolder('/data/sg2121/fypdataset/dataset/features/binary_val', transform=transform)
test_dataset = ImageFolder('/data/sg2121/fypdataset/dataset/features/binary_test', transform=transform)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

In [56]:
# Determine whether to use GPU (if available) or CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print(device)

def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    
    for epoch in range(num_epochs):
        # Set the model to training mode
        model.train()

        # Initialize running loss and correct predictions count for training
        running_loss = 0.0
        running_corrects = 0

        # Iterate over the training data loader
        for inputs, labels in train_loader:
            # Move inputs and labels to the device (GPU or CPU)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Reset the gradients to zero before the backward pass
            optimizer.zero_grad()

            # Forward pass: compute the model output
            outputs = model(inputs)
            # Get the predicted class (with the highest score)
            _, preds = torch.max(outputs, 1)
            # Compute the loss between the predictions and actual labels
            loss = criterion(outputs, labels)

            # Backward pass: compute gradients
            loss.backward()
            # Perform the optimization step to update model parameters
            optimizer.step()

            # Accumulate the running loss and the number of correct predictions
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        # Compute average training loss and accuracy for this epoch
        train_loss = running_loss / len(train_loader.dataset)
        train_acc = running_corrects.float() / len(train_loader.dataset)

        # Set the model to evaluation mode for validation
        model.eval()
        # Initialize running loss and correct predictions count for validation
        running_loss = 0.0
        running_corrects = 0

        # Disable gradient computation for validation (saves memory and computations)
        with torch.no_grad():
            # Iterate over the validation data loader
            for inputs, labels in val_loader:
                # Move inputs and labels to the device (GPU or CPU)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass: compute the model output
                outputs = model(inputs)
                # Get the predicted class (with the highest score)
                _, preds = torch.max(outputs, 1)
                # Compute the loss between the predictions and actual labels
                loss = criterion(outputs, labels)

                # Accumulate the running loss and the number of correct predictions
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

        # Compute average validation loss and accuracy for this epoch
        val_loss = running_loss / len(val_loader.dataset)
        val_acc = running_corrects.float() / len(val_loader.dataset)

        # Print the results for the current epoch
        print(f'Epoch [{epoch+1}/{num_epochs}], train loss: {train_loss:.4f}, train acc: {train_acc:.4f}, val loss: {val_loss:.4f}, val acc: {val_acc:.4f}')

cuda


In [73]:
model = get_model()
model = model.to(device)

# Define the loss function and optimizer
lr = 0.001
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr, momentum=0.9)
num_epochs = 5

train(model, train_loader, val_loader, criterion, optimizer, num_epochs)

Epoch [1/5], train loss: 0.7264, train acc: 0.5162, val loss: 0.6524, val acc: 0.6719
Epoch [2/5], train loss: 0.5761, train acc: 0.7223, val loss: 0.5370, val acc: 0.7656
Epoch [3/5], train loss: 0.4836, train acc: 0.7717, val loss: 0.4811, val acc: 0.7891
Epoch [4/5], train loss: 0.4134, train acc: 0.8279, val loss: 0.4421, val acc: 0.8203
Epoch [5/5], train loss: 0.4014, train acc: 0.8484, val loss: 0.4519, val acc: 0.7891


In [68]:
import time
from datetime import datetime
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

def evaluate_model(model, test_loader, device, hyperparams=None):
    log_file="/data/sg2121/aimusicdetector/training_logfile.txt"
    
    correct_pred = {classname: 0 for classname in test_loader.dataset.classes}
    total_pred = {classname: 0 for classname in test_loader.dataset.classes}

    model.eval()
    all_labels = []
    all_preds = []

    start_time = time.time()

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

            for label, prediction in zip(labels, preds):
                classname = test_loader.dataset.classes[label]
                if label == prediction:
                    correct_pred[classname] += 1
                total_pred[classname] += 1

    end_time = time.time()
    total_inference_time = end_time - start_time
    avg_inference_time = total_inference_time / len(test_loader.dataset)

    accuracy_per_class = {
        classname: correct_pred[classname] / total_pred[classname]
        if total_pred[classname] > 0 else 0
        for classname in test_loader.dataset.classes
    }

    overall_accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None, labels=[0, 1])
    cm = confusion_matrix(all_labels, all_preds)

    fpr = {}
    for i, classname in enumerate(test_loader.dataset.classes):
        FP = cm[:, i].sum() - cm[i, i]
        TN = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fpr[classname] = FP / (FP + TN) if (FP + TN) > 0 else 0

    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_lines = [f"===== Evaluation at {timestamp} =====\n"]

    # Log hyperparameters
    log_lines.append("Hyperparameters:")
    if hyperparams:
        for key, value in hyperparams.items():
            log_lines.append(f"{key}: {value}")
    else:
        log_lines.append("No hyperparameters provided.")
    log_lines.append("")

    # Log results
    log_lines.append("Accuracy per class:")
    for classname, acc in accuracy_per_class.items():
        log_lines.append(f"{classname}: {acc:.4f}")
    log_lines.append("\nPrecision, Recall, F1:")
    for i, classname in enumerate(test_loader.dataset.classes):
        log_lines.append(f"{classname} → Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
    log_lines.append("\nFalse Positive Rate:")
    for classname, rate in fpr.items():
        log_lines.append(f"{classname}: {rate:.4f}")
    log_lines.append(f"\nOverall Accuracy: {overall_accuracy:.4f}")
    log_lines.append(f"Average Inference Time per Sample: {avg_inference_time:.6f} seconds")
    log_lines.append("=" * 40 + "\n\n")

    # Print and save
    print("\n".join(log_lines))
    with open(log_file, "a") as f:
        f.write("\n".join(log_lines))

    return overall_accuracy


In [69]:
hyperparams = {
    "batch_size": batch_size,
    "learning_rate": lr,
    "epochs": num_epochs,
    "optimizer": optimizer,
    "model": model.__class__.__name__,
}

evaluate_model(model, test_loader, device, hyperparams=hyperparams)


===== Evaluation at 2025-04-23 17:50:15 =====

Hyperparameters:
batch_size: 32
learning_rate: 0.001
epochs: 5
optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
model: ResNet

Accuracy per class:
ai: 0.7414
human: 0.8143

Precision, Recall, F1:
ai → Precision: 0.7679, Recall: 0.7414, F1: 0.7544
human → Precision: 0.7917, Recall: 0.8143, F1: 0.8028

False Positive Rate:
ai: 0.1857
human: 0.2586

Overall Accuracy: 0.7812
Average Inference Time per Sample: 0.013757 seconds




0.78125

In [None]:
import random
from itertools import product

# Define hyperparameter search space
learning_rates = [1e-3, 1e-4, 5e-4]
weight_decays = [1e-6, 1e-7, 1e-8]
epochs_list = [5, 10, 20] 

# Generate hyperparameter combinations and randomly select 10
param_combinations = list(product(learning_rates, weight_decays, epochs_list))
random.shuffle(param_combinations)
hyperparam_trials = param_combinations[:10]

# Run randomized search
best_model = None
best_acc = 0

for i, (lr, wd, epochs) in enumerate(hyperparam_trials):
    print(f"\n=== Trial {i+1}/10: LR={lr}, WD={wd}, Epochs={epochs} ===")

    model = get_model()
    model = model.to(device)
    
    #optimizer = torch.optim.Adamax(model.parameters(), lr=lr, weight_decay=wd)
    optimizer = torch.optim.SGD(model.fc.parameters(), lr=lr, momentum=0.9)
    
    #train_part(model, optimizer, epochs=epochs)  # Use variable epochs
    train(model, train_loader, val_loader, criterion, optimizer, epochs)

    hyperparams = {
        "batch_size": batch_size,
        "learning_rate": lr,
        "epochs": epochs,
        "optimizer": optimizer,
        "model": model.__class__.__name__,
    }
    
    val_acc = evaluate_model(model, val_loader, device, hyperparams=hyperparams)
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model
        torch.save(model.state_dict(), 'best_model.pt')
    
print("\nBest validation accuracy:", best_acc)


=== Trial 1/10: LR=0.0005, WD=1e-08, Epochs=5 ===
