In [1]:
# https://medium.com/@imabhi1216/fine-tuning-a-pre-trained-resnet-18-model-for-image-classification-on-custom-dataset-with-pytorch-02df12e83c2c

In [1]:
print("HELLO")

HELLO


In [2]:
import os
import shutil
from sklearn.model_selection import train_test_split
import random

import torch
import torchvision.models as models

from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader

import time
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

import pandas as pd
import torch.nn.functional as F

from PIL import Image

from tqdm import tqdm

from itertools import product

In [3]:
def get_model():
    # Load the pre-trained ResNet-18 model
    model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

    # Modify the last layer of the model
    num_classes = 2 # number of classes in dataset
    model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
    return model

In [11]:
# Directory paths for the segments and lyrics
ai_segments_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/normal_data/ai_segments"
human_segments_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/normal_data/human"
ai_mfcc_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/features/ai/MFCC"
human_mfcc_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/features/human/MFCC"

ai_aug_segments_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/normal_data/augmented_ai"
ai_aug_mfcc_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/features/ai_aug/MFCC"

# List to store the results
ai_aug_test_files = []

# Loop through files in the directory
for filename in os.listdir(ai_aug_mfcc_path):
    if filename.endswith(".png"):
        full_path = os.path.join(ai_aug_mfcc_path, filename)
        ai_aug_test_files.append((full_path, 1))

# Helper function to read file paths from a text file
def read_file_paths(file_name):
    with open(file_name, 'r') as f:
        return [line.strip() for line in f.readlines()]

# Read all file paths from the text files
train_files = read_file_paths('/vol/bitbucket/sg2121/fyp/aimusicdetector/train_test_split/bitbucket/augmented/train_files_w_aug.txt')
val_files = read_file_paths('/vol/bitbucket/sg2121/fyp/aimusicdetector/train_test_split/bitbucket/augmented/val_files_w_aug.txt')
test_files = read_file_paths('/vol/bitbucket/sg2121/fyp/aimusicdetector/train_test_split/bitbucket/augmented/test_files_w_aug.txt')


# Function to convert segment file path to lyric file path
def convert_to_mfcc_path(file_path, is_ai):
    if is_ai:
        if file_path.startswith(ai_segments_path):
            base_mfcc_path = ai_mfcc_path
        elif file_path.startswith(ai_aug_segments_path):
            base_mfcc_path = ai_aug_mfcc_path
        else:
            return
    else:
        if file_path.startswith(human_segments_path):
            base_mfcc_path = human_mfcc_path
        else:
            return

    # Convert filename to mfcc filename
    file_name = os.path.basename(file_path).replace('.mp3', '-MFCC.png')
    return os.path.join(base_mfcc_path, file_name)


# Process the file lists and create tuples of (lyric_path, label)
def process_file_paths(file_paths, is_ai):
    return [(convert_to_mfcc_path(file_path, is_ai), 0 if is_ai else 1) for file_path in file_paths]

# Convert all file paths from the train, validation, and test sets
ai_train_files = process_file_paths(train_files, is_ai=True)
human_train_files = process_file_paths(train_files, is_ai=False)

ai_val_files = process_file_paths(val_files, is_ai=True)
human_val_files = process_file_paths(val_files, is_ai=False)

ai_test_files = process_file_paths(test_files, is_ai=True)
human_test_files = process_file_paths(test_files, is_ai=False)

def clean(paths):
    return [(p, l) for p, l in paths if p is not None]

train_files_combined = clean(ai_train_files) + clean(human_train_files)
val_files_combined = clean(ai_val_files) + clean(human_val_files)
test_files_combined = clean(ai_test_files) + clean(human_test_files)

# Shuffle the data
random.shuffle(train_files_combined)
random.shuffle(val_files_combined)
random.shuffle(test_files_combined)
random.shuffle(ai_aug_test_files)

# Check the splits
print(f"Training set size: {len(train_files_combined)}")
print(f"Validation set size: {len(val_files_combined)}")
print(f"Test set size: {len(test_files_combined)}")
print(f"Test set size: {len(ai_aug_test_files)}")

Training set size: 37297
Validation set size: 7991
Test set size: 7995
Test set size: 14149


In [12]:
# Custom dataset class
class MFCCDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        path, label = self.data[idx]
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        filename = os.path.basename(path)
        return image, label, filename
        
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [13]:
# Create datasets
train_dataset = MFCCDataset(train_files_combined, transform=transform)
val_dataset = MFCCDataset(val_files_combined, transform=transform)
test_dataset = MFCCDataset(test_files_combined, transform=transform)
ai_sug_test_dataset = MFCCDataset(ai_aug_test_files, transform=transform)
# DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
ai_aug_test_loader = DataLoader(ai_sug_test_dataset, batch_size=batch_size, shuffle=False)
# Confirm sizes
print(f"Train set: {len(train_dataset)} samples")
print(f"Val set: {len(val_dataset)} samples")
print(f"Test set: {len(test_dataset)} samples")

Train set: 37297 samples
Val set: 7991 samples
Test set: 7995 samples


In [7]:
# Determine whether to use GPU or CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print(device)

def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    
    for epoch in range(num_epochs):
        # Set the model to training mode
        model.train()

        # Initialize running loss and correct predictions count for training
        running_loss = 0.0
        running_corrects = 0

        # Iterate over the training data loader
        for inputs, labels, filenames in train_loader:
            # Move inputs and labels to the device (GPU or CPU)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Reset the gradients to zero before the backward pass
            optimizer.zero_grad()

            # Forward pass: compute the model output
            outputs = model(inputs)
            # Get the predicted class (with the highest score)
            _, preds = torch.max(outputs, 1)
            # Compute the loss between the predictions and actual labels
            loss = criterion(outputs, labels)

            # Backward pass: compute gradients
            loss.backward()
            # Perform the optimization step to update model parameters
            optimizer.step()

            # Accumulate the running loss and the number of correct predictions
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        # Compute average training loss and accuracy for this epoch
        train_loss = running_loss / len(train_loader.dataset)
        train_acc = running_corrects.float() / len(train_loader.dataset)

        # Set the model to evaluation mode for validation
        model.eval()
        # Initialize running loss and correct predictions count for validation
        running_loss = 0.0
        running_corrects = 0

        # Disable gradient computation for validation
        with torch.no_grad():
            # Iterate over the validation data loader
            for inputs, labels, filenames in val_loader:
                # Move inputs and labels to the device (GPU or CPU)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass: compute the model output
                outputs = model(inputs)
                # Get the predicted class (with the highest score)
                _, preds = torch.max(outputs, 1)
                # Compute the loss between the predictions and actual labels
                loss = criterion(outputs, labels)

                # Accumulate the running loss and the number of correct predictions
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

        # Compute average validation loss and accuracy for this epoch
        val_loss = running_loss / len(val_loader.dataset)
        val_acc = running_corrects.float() / len(val_loader.dataset)

        # Print the results for the current epoch
        print(f'Epoch [{epoch+1}/{num_epochs}], train loss: {train_loss:.4f}, train acc: {train_acc:.4f}, val loss: {val_loss:.4f}, val acc: {val_acc:.4f}')

cuda


In [8]:
model = get_model()
model = model.to(device)

# Define the loss function and optimizer
lr = 0.001
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr, momentum=0.9)
num_epochs = 5

In [9]:
# Best hyperparameters from search
lr = 0.001
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-08)
num_epochs = 20

In [10]:
train(model, train_loader, val_loader, criterion, optimizer, num_epochs)

Epoch [1/20], train loss: 0.2446, train acc: 0.8933, val loss: 0.1999, val acc: 0.9148
Epoch [2/20], train loss: 0.1677, train acc: 0.9312, val loss: 0.1773, val acc: 0.9233
Epoch [3/20], train loss: 0.1364, train acc: 0.9447, val loss: 0.1719, val acc: 0.9320
Epoch [4/20], train loss: 0.1159, train acc: 0.9529, val loss: 0.1641, val acc: 0.9338
Epoch [5/20], train loss: 0.0918, train acc: 0.9625, val loss: 0.1826, val acc: 0.9280
Epoch [6/20], train loss: 0.0734, train acc: 0.9707, val loss: 0.4614, val acc: 0.8771
Epoch [7/20], train loss: 0.0607, train acc: 0.9764, val loss: 0.3480, val acc: 0.8856
Epoch [8/20], train loss: 0.0465, train acc: 0.9830, val loss: 0.2671, val acc: 0.9204
Epoch [9/20], train loss: 0.0359, train acc: 0.9867, val loss: 0.2647, val acc: 0.9324
Epoch [10/20], train loss: 0.0279, train acc: 0.9903, val loss: 0.2160, val acc: 0.9373
Epoch [11/20], train loss: 0.0282, train acc: 0.9896, val loss: 0.2697, val acc: 0.9339
Epoch [12/20], train loss: 0.0205, train 

In [11]:
def evaluate_model(model, test_loader, device, hyperparams=None):
    log_file = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/mfcc/training_large_logfile.txt"

    model.eval()
    correct_pred = {classname: 0 for classname in ['ai', 'human']}
    total_pred = {classname: 0 for classname in ['ai', 'human']}

    all_labels = []
    all_preds = []
    
    start_time = time.time()

    with torch.no_grad():
        for inputs, labels, filenames in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            probs = F.softmax(outputs, dim=1)
            _, preds = torch.max(outputs, 1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

            for label, prediction in zip(labels, preds):
                classname = 'ai' if label.item() == 0 else 'human'
                if label == prediction:
                    correct_pred[classname] += 1
                total_pred[classname] += 1

    end_time = time.time()
    avg_inference_time = (end_time - start_time) / len(test_loader.dataset)

    accuracy_per_class = {
        classname: correct_pred[classname] / total_pred[classname]
        if total_pred[classname] > 0 else 0
        for classname in ['ai', 'human']
    }

    overall_accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None, labels=[0, 1])
    cm = confusion_matrix(all_labels, all_preds)

    fpr = {}
    for i, classname in enumerate(['ai', 'human']):
        FP = cm[:, i].sum() - cm[i, i]
        TN = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fpr[classname] = FP / (FP + TN) if (FP + TN) > 0 else 0

    # Logging
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_lines = [f"===== Evaluation at {timestamp} =====\n"]

    if hyperparams:
        log_lines.append("Hyperparameters:")
        for key, value in hyperparams.items():
            log_lines.append(f"{key}: {value}")
    else:
        log_lines.append("No hyperparameters provided.")
    log_lines.append("")

    log_lines.append("Accuracy per class:")
    for classname, acc in accuracy_per_class.items():
        log_lines.append(f"{classname}: {acc:.4f}")
    log_lines.append("\nPrecision, Recall, F1:")
    for i, classname in enumerate(['ai', 'human']):
        log_lines.append(f"{classname} → Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
    log_lines.append("\nFalse Positive Rate:")
    for classname, rate in fpr.items():
        log_lines.append(f"{classname}: {rate:.4f}")
    log_lines.append(f"\nOverall Accuracy: {overall_accuracy:.4f}")
    log_lines.append(f"Average Inference Time per Sample: {avg_inference_time:.6f} seconds")
    log_lines.append("=" * 40 + "\n\n")

    print("\n".join(log_lines))
    with open(log_file, "a") as f:
        f.write("\n".join(log_lines))

    return overall_accuracy


In [12]:
model.eval()
results = []

for batch in tqdm(test_loader):
    inputs, labels, filenames = batch 
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        probs = F.softmax(outputs, dim=1)

        for i in range(len(filenames)):
            results.append({
                "filename": filenames[i], 
                "prob_ai": probs[i][0].item(),
                "prob_human": probs[i][1].item(),
                "true_label": labels[i].item(),
                "pred_label": torch.argmax(probs[i]).item()
            })

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Save to CSV
df.to_csv("mfcc_test_large_with_aug_predictions.csv", index=False)

# Preview results
print(df.head())


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [03:34<00:00,  1.17it/s]


                               filename       prob_ai    prob_human  \
0     S641RN_segment_2_shifted-MFCC.png  1.000000e+00  5.368911e-13   
1                      H15498N-MFCC.png  9.106738e-01  8.932620e-02   
2                      H13630N-MFCC.png  7.589860e-08  9.999999e-01   
3  U1741RN_segment_2_stretched-MFCC.png  1.000000e+00  2.391614e-13   
4                      H12629N-MFCC.png  1.213010e-06  9.999988e-01   

   true_label  pred_label  
0           0           0  
1           1           0  
2           1           1  
3           0           0  
4           1           1  


In [13]:
torch.save(model.state_dict(), 'cur_model.pt')

In [14]:
hyperparams = {
    "batch_size": batch_size,
    "learning_rate": lr,
    "epochs": num_epochs,
    "optimizer": optimizer,
    "model": model.__class__.__name__,
}

evaluate_model(model, test_loader, device, hyperparams=hyperparams)


===== Evaluation at 2025-05-28 01:24:33 =====

Hyperparameters:
batch_size: 32
learning_rate: 0.001
epochs: 20
optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 1e-08
)
model: ResNet

Accuracy per class:
ai: 0.9301
human: 0.9485

Precision, Recall, F1:
ai → Precision: 0.9534, Recall: 0.9301, F1: 0.9416
human → Precision: 0.9229, Recall: 0.9485, F1: 0.9355

False Positive Rate:
ai: 0.0515
human: 0.0699

Overall Accuracy: 0.9387
Average Inference Time per Sample: 0.014409 seconds




0.9387116948092558

HYPERPARAM SEARCH BELOW

In [23]:
# Define hyperparameter search space
learning_rates = [1e-3, 1e-4, 5e-4]
weight_decays = [1e-6, 1e-7, 1e-8]
epochs_list = [5, 10, 20] 

# Generate hyperparameter combinations and randomly select 10
param_combinations = list(product(learning_rates, weight_decays, epochs_list))
random.shuffle(param_combinations)
hyperparam_trials = param_combinations[:10]

# Run randomized search
best_model = None
best_acc = 0

for i, (lr, wd, epochs) in enumerate(hyperparam_trials):
    print(f"\n=== Trial {i+1}/10: LR={lr}, WD={wd}, Epochs={epochs} ===")

    model = get_model()
    model = model.to(device)
    
    # Use an optimizer like Adam or SGD
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
    
    # Train the model
    train(model, train_loader, val_loader, criterion, optimizer, epochs)

    hyperparams = {
        "batch_size": batch_size,
        "learning_rate": lr,
        "epochs": epochs,
        "optimizer": optimizer,
        "model": model.__class__.__name__,
    }
    
    val_acc = evaluate_model(model, val_loader, device, hyperparams=hyperparams)
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model
        torch.save(model.state_dict(), 'best_model.pt')
    
print("\nBest validation accuracy:", best_acc)


=== Trial 1/10: LR=0.0001, WD=1e-06, Epochs=5 ===
Epoch [1/5], train loss: 0.3265, train acc: 0.8677, val loss: 0.2543, val acc: 0.8953
Epoch [2/5], train loss: 0.2356, train acc: 0.9053, val loss: 0.2355, val acc: 0.9068
Epoch [3/5], train loss: 0.2022, train acc: 0.9183, val loss: 0.2194, val acc: 0.9109
Epoch [4/5], train loss: 0.1782, train acc: 0.9295, val loss: 0.2280, val acc: 0.9047
Epoch [5/5], train loss: 0.1552, train acc: 0.9394, val loss: 0.1999, val acc: 0.9224
===== Evaluation at 2025-05-23 16:59:53 =====

Hyperparameters:
batch_size: 32
learning_rate: 0.0001
epochs: 5
optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 1e-06
)
model: ResNet

Accuracy per class:
ai: 0.8105
human: 0.9560

Precision, Recall, F1:
ai → Precision: 0.8467, Recall: 0.8105, F1: 0.8282
human → Precision: 0.9439, Recall: 0.9560, F1: 0.9499

False P

In [16]:
model = get_model()
model.load_state_dict(torch.load("best_model.pt", map_location=device))
model.to(device)
model.eval()
results = []

for batch in tqdm(test_loader):
    inputs, labels, filenames = batch 
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        probs = F.softmax(outputs, dim=1)

        for i in range(len(filenames)):
            results.append({
                "filename": filenames[i], 
                "prob_ai": probs[i][0].item(),
                "prob_human": probs[i][1].item(),
                "true_label": labels[i].item(),
                "pred_label": torch.argmax(probs[i]).item()
            })

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Save to CSV
df.to_csv("check.csv", index=False)

# Preview results
print(df.head())


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [04:27<00:00,  1.07s/it]

                               filename       prob_ai    prob_human  \
0             S627RN_segment_1-MFCC.png  9.999994e-01  5.364727e-07   
1  S4042RN_segment_2_stretched-MFCC.png  8.199579e-10  1.000000e+00   
2            S1453RN_segment_1-MFCC.png  9.990741e-01  9.258232e-04   
3  S4764RN_segment_1_stretched-MFCC.png  2.466736e-10  1.000000e+00   
4            S2694RN_segment_1-MFCC.png  9.999838e-01  1.616777e-05   

   true_label  pred_label  
0           0           0  
1           0           1  
2           0           0  
3           0           1  
4           0           0  





In [17]:
# Load the CSV
df = pd.read_csv("check.csv")  

total = len(df)

# Count where prediction is correct
correct = (df["true_label"] == df["pred_label"]).sum()

print(f"Correct predictions: {correct}/{total} ({correct/total:.2%})")


Correct predictions: 5543/7995 (69.33%)
