In [None]:
# https://medium.com/@imabhi1216/fine-tuning-a-pre-trained-resnet-18-model-for-image-classification-on-custom-dataset-with-pytorch-02df12e83c2c

In [36]:
print("HELLO")

HELLO


In [37]:
import os
import shutil
from sklearn.model_selection import train_test_split
import random

import torch
import torchvision.models as models

from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader

import time
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

import pandas as pd
import torch.nn.functional as F

from PIL import Image

from tqdm import tqdm

from itertools import product

In [38]:
def get_model():
    # Load the pre-trained ResNet-18 model
    model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

    # Modify the last layer of the model
    num_classes = 2 # number of classes in dataset
    model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
    return model

In [39]:
# Directory paths for the segments and lyrics
ai_segments_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/normal_data/ai_segments"
human_segments_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/normal_data/human"
ai_plp_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/features/ai/PLP"
human_plp_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/features/human/PLP"

ai_aug_segments_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/normal_data/augmented_ai"
ai_aug_plp_path = "/vol/bitbucket/sg2121/fypdataset/dataset_large2/features/ai_aug/PLP"

# List to store the results
ai_aug_test_files = []

# Loop through files in the directory
for filename in os.listdir(ai_aug_plp_path):
    if filename.endswith(".png"):
        full_path = os.path.join(ai_aug_plp_path, filename)
        ai_aug_test_files.append((full_path, 1))

# Helper function to read file paths from a text file
def read_file_paths(file_name):
    with open(file_name, 'r') as f:
        return [line.strip() for line in f.readlines()]

# Read all file paths from the text files
train_files = read_file_paths('/vol/bitbucket/sg2121/fyp/aimusicdetector/train_test_split/bitbucket/train_files_large.txt')
val_files = read_file_paths('/vol/bitbucket/sg2121/fyp/aimusicdetector/train_test_split/bitbucket/val_files_large.txt')
test_files = read_file_paths('/vol/bitbucket/sg2121/fyp/aimusicdetector/train_test_split/bitbucket/test_files_large.txt')

# Function to convert segment file path to lyric file path
def convert_to_plp_path(file_path, is_ai):
    if is_ai:
        if file_path.startswith(ai_segments_path):
            base_plp_path = ai_plp_path
        elif file_path.startswith(ai_aug_segments_path):
            base_plp_path = ai_aug_plp_path
        else:
            return
    else:
        if file_path.startswith(human_segments_path):
            base_plp_path = human_plp_path
        else:
            return

    # Convert filename to plp filename
    file_name = os.path.basename(file_path).replace('.mp3', '_plp.png')
    return os.path.join(base_plp_path, file_name)


# Process the file lists and create tuples of (lyric_path, label)
def process_file_paths(file_paths, is_ai):
    return [(convert_to_plp_path(file_path, is_ai), 0 if is_ai else 1) for file_path in file_paths]

# Convert all file paths from the train, validation, and test sets
ai_train_files = process_file_paths(train_files, is_ai=True)
human_train_files = process_file_paths(train_files, is_ai=False)

ai_val_files = process_file_paths(val_files, is_ai=True)
human_val_files = process_file_paths(val_files, is_ai=False)

ai_test_files = process_file_paths(test_files, is_ai=True)
human_test_files = process_file_paths(test_files, is_ai=False)

def clean(paths):
    return [(p, l) for p, l in paths if p is not None]

train_files_combined = clean(ai_train_files) + clean(human_train_files)
val_files_combined = clean(ai_val_files) + clean(human_val_files)
test_files_combined = clean(ai_test_files) + clean(human_test_files)

# Shuffle the data if needed
random.shuffle(train_files_combined)
random.shuffle(val_files_combined)
random.shuffle(test_files_combined)
random.shuffle(ai_aug_test_files)

# Example of how you might check the splits
print(f"Training set size: {len(train_files_combined)}")
print(f"Validation set size: {len(val_files_combined)}")
print(f"Test set size: {len(test_files_combined)}")
print(f"AI Aug Test set size: {len(ai_aug_test_files)}")

Training set size: 22736
Validation set size: 4871
Test set size: 4875
AI Aug Test set size: 14149


In [40]:
# Custom dataset class
class PLPDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        path, label = self.data[idx]
        filename = os.path.basename(path)

        try:
            image = Image.open(path).convert('RGB')
        except FileNotFoundError:
            print(f"{filename} not found.")
            raise IndexError(f"{filename} not found")  # Or raise a dummy image if you want to proceed

        if self.transform:
            image = self.transform(image)

        return image, label, filename
        
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [41]:
# Create datasets
train_dataset = PLPDataset(train_files_combined, transform=transform)
val_dataset = PLPDataset(val_files_combined, transform=transform)
test_dataset = PLPDataset(test_files_combined, transform=transform)

ai_aug_test_dataset = PLPDataset(ai_aug_test_files, transform=transform)

# DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

ai_aug_test_loader = DataLoader(ai_aug_test_dataset, batch_size=batch_size, shuffle=False)

# Confirm sizes
print(f"Train set: {len(train_dataset)} samples")
print(f"Val set: {len(val_dataset)} samples")
print(f"Test set: {len(test_dataset)} samples")
print(f"AI Aug Test set: {len(ai_aug_test_dataset)} samples")

Train set: 22736 samples
Val set: 4871 samples
Test set: 4875 samples
AI Aug Test set: 14149 samples


In [42]:
# Determine whether to use GPU (if available) or CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print(device)

def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    
    for epoch in range(num_epochs):
        # Set the model to training mode
        model.train()

        # Initialize running loss and correct predictions count for training
        running_loss = 0.0
        running_corrects = 0

        # Iterate over the training data loader
        for inputs, labels, filenames in train_loader:
            # Move inputs and labels to the device (GPU or CPU)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Reset the gradients to zero before the backward pass
            optimizer.zero_grad()

            # Forward pass: compute the model output
            outputs = model(inputs)
            # Get the predicted class (with the highest score)
            _, preds = torch.max(outputs, 1)
            # Compute the loss between the predictions and actual labels
            loss = criterion(outputs, labels)

            # Backward pass: compute gradients
            loss.backward()
            # Perform the optimization step to update model parameters
            optimizer.step()

            # Accumulate the running loss and the number of correct predictions
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        # Compute average training loss and accuracy for this epoch
        train_loss = running_loss / len(train_loader.dataset)
        train_acc = running_corrects.float() / len(train_loader.dataset)

        # Set the model to evaluation mode for validation
        model.eval()
        # Initialize running loss and correct predictions count for validation
        running_loss = 0.0
        running_corrects = 0

        # Disable gradient computation for validation (saves memory and computations)
        with torch.no_grad():
            # Iterate over the validation data loader
            for inputs, labels, filenames in val_loader:
                # Move inputs and labels to the device (GPU or CPU)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass: compute the model output
                outputs = model(inputs)
                # Get the predicted class (with the highest score)
                _, preds = torch.max(outputs, 1)
                # Compute the loss between the predictions and actual labels
                loss = criterion(outputs, labels)

                # Accumulate the running loss and the number of correct predictions
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

        # Compute average validation loss and accuracy for this epoch
        val_loss = running_loss / len(val_loader.dataset)
        val_acc = running_corrects.float() / len(val_loader.dataset)

        # Print the results for the current epoch
        print(f'Epoch [{epoch+1}/{num_epochs}], train loss: {train_loss:.4f}, train acc: {train_acc:.4f}, val loss: {val_loss:.4f}, val acc: {val_acc:.4f}')

cuda


In [43]:
model = get_model()
model = model.to(device)

# Define the loss function and optimizer
lr = 0.001
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr, momentum=0.9)
num_epochs = 5

In [44]:
#hyperparameters
lr = 0.001
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr, momentum=0.9, weight_decay=1e-07)
num_epochs = 5

In [27]:
train(model, train_loader, val_loader, criterion, optimizer, num_epochs)

Epoch [1/5], train loss: 0.4282, train acc: 0.8129, val loss: 0.3827, val acc: 0.8411
Epoch [2/5], train loss: 0.3974, train acc: 0.8338, val loss: 0.3818, val acc: 0.8407
Epoch [3/5], train loss: 0.3951, train acc: 0.8318, val loss: 0.3805, val acc: 0.8409
Epoch [4/5], train loss: 0.3899, train acc: 0.8354, val loss: 0.3856, val acc: 0.8436
Epoch [5/5], train loss: 0.3881, train acc: 0.8368, val loss: 0.3772, val acc: 0.8436


In [47]:
def evaluate_model(model, test_loader, device, hyperparams=None):
    log_file = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/plp/training_large_logfile.txt"

    model.eval()
    correct_pred = {classname: 0 for classname in ['ai', 'human']}
    total_pred = {classname: 0 for classname in ['ai', 'human']}

    all_labels = []
    all_preds = []
    
    start_time = time.time()

    with torch.no_grad():
        for inputs, labels, filenames in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            probs = F.softmax(outputs, dim=1)
            _, preds = torch.max(outputs, 1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

            for label, prediction in zip(labels, preds):
                classname = 'ai' if label.item() == 0 else 'human'
                if label == prediction:
                    correct_pred[classname] += 1
                total_pred[classname] += 1

    end_time = time.time()
    avg_inference_time = (end_time - start_time) / len(test_loader.dataset)

    accuracy_per_class = {
        classname: correct_pred[classname] / total_pred[classname]
        if total_pred[classname] > 0 else 0
        for classname in ['ai', 'human']
    }

    overall_accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None, labels=[0, 1])
    cm = confusion_matrix(all_labels, all_preds)

    fpr = {}
    for i, classname in enumerate(['ai', 'human']):
        FP = cm[:, i].sum() - cm[i, i]
        TN = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fpr[classname] = FP / (FP + TN) if (FP + TN) > 0 else 0

    # Logging
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_lines = [f"===== Evaluation at {timestamp} =====\n"]

    if hyperparams:
        log_lines.append("Hyperparameters:")
        for key, value in hyperparams.items():
            log_lines.append(f"{key}: {value}")
    else:
        log_lines.append("No hyperparameters provided.")
    log_lines.append("")

    log_lines.append("Accuracy per class:")
    for classname, acc in accuracy_per_class.items():
        log_lines.append(f"{classname}: {acc:.4f}")
    log_lines.append("\nPrecision, Recall, F1:")
    for i, classname in enumerate(['ai', 'human']):
        log_lines.append(f"{classname} → Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
    log_lines.append("\nFalse Positive Rate:")
    for classname, rate in fpr.items():
        log_lines.append(f"{classname}: {rate:.4f}")
    log_lines.append(f"\nOverall Accuracy: {overall_accuracy:.4f}")
    log_lines.append(f"Average Inference Time per Sample: {avg_inference_time:.6f} seconds")
    log_lines.append("=" * 40 + "\n\n")

    print("\n".join(log_lines))
    with open(log_file, "a") as f:
        f.write("\n".join(log_lines))

    return overall_accuracy


In [46]:
model.eval()
results = []

for batch in tqdm(test_loader):
    inputs, labels, filenames = batch  # Unpack the filename from the dataset
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        probs = F.softmax(outputs, dim=1)

        for i in range(len(filenames)):
            results.append({
                "filename": filenames[i],  # Use filename directly from dataset
                "prob_ai": probs[i][0].item(),
                "prob_human": probs[i][1].item(),
                "true_label": labels[i].item(),
                "pred_label": torch.argmax(probs[i]).item()
            })

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Save to CSV
df.to_csv("plp_test_large_predictions.csv", index=False)

# Preview results
print(df.head())


  1%|▎                                          | 1/153 [00:03<08:40,  3.42s/it]


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'cur_model.pt')

In [30]:
hyperparams = {
    "batch_size": batch_size,
    "learning_rate": lr,
    "epochs": num_epochs,
    "optimizer": optimizer,
    "model": model.__class__.__name__,
}

evaluate_model(model, test_loader, device, hyperparams=hyperparams)


===== Evaluation at 2025-05-28 00:57:52 =====

Hyperparameters:
batch_size: 32
learning_rate: 0.001
epochs: 5
optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 1e-07
)
model: ResNet

Accuracy per class:
ai: 0.4538
human: 0.9509

Precision, Recall, F1:
ai → Precision: 0.7353, Recall: 0.4538, F1: 0.5612
human → Precision: 0.8529, Recall: 0.9509, F1: 0.8992

False Positive Rate:
ai: 0.0491
human: 0.5462

Overall Accuracy: 0.8361
Average Inference Time per Sample: 0.267669 seconds




0.8361025641025641

In [None]:
HYPERPARANM

In [11]:
# Define hyperparameter search space
learning_rates = [1e-3, 1e-4, 5e-4]
weight_decays = [1e-6, 1e-7, 1e-8]
epochs_list = [5, 10, 20] 

# Generate hyperparameter combinations and randomly select 10
param_combinations = list(product(learning_rates, weight_decays, epochs_list))
random.shuffle(param_combinations)
hyperparam_trials = param_combinations[:10]

# Run randomized search
best_model = None
best_acc = 0

for i, (lr, wd, epochs) in enumerate(hyperparam_trials):
    print(f"\n=== Trial {i+1}/10: LR={lr}, WD={wd}, Epochs={epochs} ===")

    model = get_model()
    model = model.to(device)
    
    # Use an optimizer like Adam or SGD (adjust based on your requirement)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
    
    # Train the model (assuming you have a function to handle training)
    train(model, train_loader, val_loader, criterion, optimizer, epochs)

    hyperparams = {
        "batch_size": batch_size,
        "learning_rate": lr,
        "epochs": epochs,
        "optimizer": optimizer,
        "model": model.__class__.__name__,
    }
    
    val_acc = evaluate_model(model, val_loader, device, hyperparams=hyperparams)
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model
        torch.save(model.state_dict(), 'best_model.pt')
    
print("\nBest validation accuracy:", best_acc)


=== Trial 1/10: LR=0.0001, WD=1e-08, Epochs=10 ===
Epoch [1/10], train loss: 0.5028, train acc: 0.7453, val loss: 0.4157, val acc: 0.8004
Epoch [2/10], train loss: 0.3842, train acc: 0.8239, val loss: 0.3754, val acc: 0.8299
Epoch [3/10], train loss: 0.3391, train acc: 0.8492, val loss: 0.3602, val acc: 0.8344
Epoch [4/10], train loss: 0.3021, train acc: 0.8678, val loss: 0.3448, val acc: 0.8416
Epoch [5/10], train loss: 0.2621, train acc: 0.8891, val loss: 0.3620, val acc: 0.8425
Epoch [6/10], train loss: 0.2333, train acc: 0.9015, val loss: 0.3576, val acc: 0.8416
Epoch [7/10], train loss: 0.1956, train acc: 0.9220, val loss: 0.3600, val acc: 0.8470
Epoch [8/10], train loss: 0.1630, train acc: 0.9381, val loss: 0.3674, val acc: 0.8422
Epoch [9/10], train loss: 0.1328, train acc: 0.9506, val loss: 0.4027, val acc: 0.8335
Epoch [10/10], train loss: 0.1041, train acc: 0.9655, val loss: 0.4312, val acc: 0.8353
===== Evaluation at 2025-05-12 23:16:39 =====

Hyperparameters:
batch_size: 3

In [48]:
model = get_model()
model.load_state_dict(torch.load("best_model.pt", map_location=device))
model.to(device)
model.eval()
results = []

for batch in tqdm(test_loader):
    inputs, labels, filenames = batch  # Unpack the filename from the dataset
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        probs = F.softmax(outputs, dim=1)

        for i in range(len(filenames)):
            results.append({
                "filename": filenames[i],  # Use filename directly from dataset
                "prob_ai": probs[i][0].item(),
                "prob_human": probs[i][1].item(),
                "true_label": labels[i].item(),
                "pred_label": torch.argmax(probs[i]).item()
            })

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Save to CSV
df.to_csv("best_plp_ai_aug_test_predictions.csv", index=False)

# Preview results
print(df.head())


100%|█████████████████████████████████████████| 153/153 [06:07<00:00,  2.40s/it]

          filename   prob_ai  prob_human  true_label  pred_label
0  H20441N_plp.png  0.000301    0.999699           1           1
1    H441N_plp.png  0.000463    0.999537           1           1
2  H16310N_plp.png  0.000207    0.999793           1           1
3   H4960N_plp.png  0.014200    0.985800           1           1
4   H9761N_plp.png  0.620863    0.379137           1           0





In [49]:
# Load the CSV
df = pd.read_csv("best_plp_ai_aug_test_predictions.csv")  # Replace with your actual file path

# Total number of rows (i.e. total number of samples/files)
total = len(df)

# Count where prediction is correct
correct = (df["true_label"] == df["pred_label"]).sum()

print(f"Correct predictions: {correct}/{total} ({correct/total:.2%})")

Correct predictions: 4348/4875 (89.19%)
