In [68]:
print("HELLO")

HELLO


In [69]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import itertools
import json
from tqdm import tqdm

In [70]:
# Load the predictions from the CSV files
mel_spec_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/mel-spec/mel-spec_test_large_with_aug_predictions.csv"
cqt_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/cqt/cqt_test_large_with_aug_predictions.csv"
mfcc_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/mfcc/mfcc_test_large_with_aug_predictions.csv"
plp_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/plp/plp_test_large_with_aug_predictions.csv"
chrm_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_cnn/large/chromagram/chromagram_test_large_with_aug_predictions.csv"
clean_lyrics_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/lyric_detection/large/clean_lyrics_test_large_predictions.csv"

mel_spec_trans_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_transformer/large/mel-spec/tensors_test_large_with_aug_predictions.csv"
mfcc_trans_csv = "/vol/bitbucket/sg2121/fyp/aimusicdetector/music_transformer/large/mfcc/tensors_test_large_with_aug_predictions.csv"

# Read the CSV files into pandas DataFrames
df_mel = pd.read_csv(mel_spec_csv)
df_clean_lyrics = pd.read_csv(clean_lyrics_csv)
df_mfcc = pd.read_csv(mfcc_csv)
df_plp = pd.read_csv(plp_csv)
df_cqt = pd.read_csv(cqt_csv)
df_chrm = pd.read_csv(chrm_csv)

df_mel_t = pd.read_csv(mel_spec_trans_csv)
df_mfcc_t = pd.read_csv(mfcc_trans_csv)

print(len(df_mel.index))

df_mel['base_filename'] = df_mel['filename'].str.replace(r'-Mel_Spectrogram\.png$', '', regex=True)
df_clean_lyrics['base_filename'] = df_clean_lyrics['filename'].str.replace(r'_lyrics\.txt$', '', regex=True)
df_mfcc['base_filename'] = df_mfcc['filename'].str.replace(r'-MFCC\.png$', '', regex=True)
df_plp['base_filename'] = df_plp['filename'].replace(r'_plp\.png$', '', regex=True)
df_cqt['base_filename'] = df_cqt['filename'].str.replace(r'-CQT\.png$', '', regex=True)
df_chrm['base_filename'] = df_chrm['filename'].str.replace(r'-Chromagram\.png$', '', regex=True)

df_mel_t['base_filename'] = df_mel_t['filename'].str.replace(r'.pt$', '', regex=True)
df_mfcc_t['base_filename'] = df_mfcc_t['filename'].str.replace(r'.pt$', '', regex=True)

print(len(df_mel.index))
print(len(df_mfcc_t.index))
print(len(df_clean_lyrics.index))

7995
7995
7995
2967


In [71]:
def rename_columns(df, suffix):
    return df.rename(columns={col: f"{col}{suffix}" for col in df.columns if col != 'base_filename'})

# Add suffixes to avoid column name clashes
df_mel = rename_columns(df_mel, '_mel')
df_clean_lyrics = rename_columns(df_clean_lyrics, '_lyrics')
df_mfcc = rename_columns(df_mfcc, '_mfcc')
df_plp = rename_columns(df_plp, '_plp')
df_cqt = rename_columns(df_cqt, '_cqt')
df_chrm = rename_columns(df_chrm, '_chrm')

df_mel_t = rename_columns(df_mel_t, '_melt')
df_mfcc_t = rename_columns(df_mfcc_t, '_mfcct')

merged_df = df_mel.copy()
# Merge sequentially on 'base_filename'
for df in [df_clean_lyrics, df_mfcc, df_cqt, df_mel_t, df_mfcc_t]: #, df_plp, df_chrm]
    print(df.columns)  # Check before merge
    assert 'base_filename' in df.columns
    merged_df = pd.merge(merged_df, df, on='base_filename', how='left')
    
print(df_mel_t.head())
#print(len(merged_df.index))

Index(['filename_lyrics', 'prob_ai_lyrics', 'prob_human_lyrics',
       'true_label_lyrics', 'pred_label_lyrics', 'base_filename'],
      dtype='object')
Index(['filename_mfcc', 'prob_ai_mfcc', 'prob_human_mfcc', 'true_label_mfcc',
       'pred_label_mfcc', 'base_filename'],
      dtype='object')
Index(['filename_cqt', 'prob_ai_cqt', 'prob_human_cqt', 'true_label_cqt',
       'pred_label_cqt', 'base_filename'],
      dtype='object')
Index(['filename_melt', 'prob_ai_melt', 'prob_human_melt', 'true_label_melt',
       'pred_label_melt', 'base_filename'],
      dtype='object')
Index(['filename_mfcct', 'prob_ai_mfcct', 'prob_human_mfcct',
       'true_label_mfcct', 'pred_label_mfcct', 'base_filename'],
      dtype='object')
                    filename_melt  prob_ai_melt  prob_human_melt  \
0                       H2859N.pt      0.000809         0.999191   
1  U1170RN_segment_1_stretched.pt      0.528127         0.471873   
2              S91RN_segment_2.pt      0.999980         0.000020  

In [72]:
def apply_weighted_ensemble(df, weights=None):
    # Find all prob columns for AI and Human
    ai_cols = [col for col in df.columns if col.startswith('prob_ai_')]
    human_cols = [col for col in df.columns if col.startswith('prob_human_')]
    
    assert len(ai_cols) == len(human_cols), "Mismatch in number of AI and Human columns"
    
    model_keys = [col.replace('prob_ai_', '') for col in ai_cols]
    
    # If no weights provided, use equal weighting
    if weights is None:
        weights = {key: 1 / len(model_keys) for key in model_keys}

    assert abs(sum(weights.values()) - 1.0) < 1e-6, "Weights must sum to 1"
    for key in model_keys:
        assert key in weights, f"Missing weight for model: {key}"

    def compute_weighted_prob(row, prob_prefix, weights, keys):
        total_weight = 0.0
        weighted_sum = 0.0
        for key in keys:
            col_name = f"{prob_prefix}_{key}"
            value = row.get(col_name)
            if pd.notna(value):
                weighted_sum += value * weights[key]
                total_weight += weights[key]
        return weighted_sum / total_weight if total_weight > 0 else np.nan
    
    # Apply to each row
    df['weighted_prob_ai'] = df.apply(lambda row: compute_weighted_prob(row, 'prob_ai', weights, model_keys), axis=1)
    df['weighted_prob_human'] = df.apply(lambda row: compute_weighted_prob(row, 'prob_human', weights, model_keys), axis=1)
    
    # Final prediction
    df['final_pred_label'] = df.apply(
        lambda row: 0 if row['weighted_prob_ai'] > row['weighted_prob_human'] else 1,
        axis=1
    )
    
    return df


In [73]:
merged_df = apply_weighted_ensemble(merged_df)
print(merged_df[['base_filename', 'weighted_prob_ai', 'weighted_prob_human', 'final_pred_label']].head())

custom_weights = {
    'mel': 0.00,
    'lyrics': 0.0,
    'mfcc': 0.0,
    #'plp': 0.0,
    'cqt': 0.0,
    #'chrm': 0.0,
    'melt': 0,
    'mfcct': 1.0
}

merged_df = apply_weighted_ensemble(merged_df, weights=custom_weights)

print(merged_df[['base_filename', 'weighted_prob_ai', 'weighted_prob_human', 'final_pred_label']].head())


                 base_filename  weighted_prob_ai  weighted_prob_human  \
0                        H279N          0.000050             0.999950   
1                       H8167N          0.005733             0.994267   
2            S4594RN_segment_1          0.999198             0.000802   
3   U524RN_segment_1_stretched          0.998923             0.001077   
4  U1301RN_segment_2_stretched          0.999481             0.000519   

   final_pred_label  
0                 1  
1                 1  
2                 0  
3                 0  
4                 0  
                 base_filename  weighted_prob_ai  weighted_prob_human  \
0                        H279N          0.000002             0.999998   
1                       H8167N          0.000002             0.999998   
2            S4594RN_segment_1          0.999999             0.000001   
3   U524RN_segment_1_stretched          0.999999             0.000001   
4  U1301RN_segment_2_stretched          0.999999             0.0

In [74]:
# Calculate accuracy, precision, recall based on the final prediction
y_true = merged_df['true_label_mel']
y_pred = merged_df['final_pred_label']

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

# Compute basic metrics
accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, labels=[0, 1])

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=[0, 1])

fpr_ai = cm[1][0] / (cm[1][0] + cm[1][1])
fpr_human = cm[0][1] / (cm[0][1] + cm[0][0])

# Log the results
print(f"Ensembled Model Accuracy: {accuracy:.4f}")
for i, label in enumerate(['ai', 'human']):
    print(f"Precision for {label}: {precision[i]:.4f}")
    print(f"Recall for {label}: {recall[i]:.4f}")
    print(f"F1-score for {label}: {f1[i]:.4f}")
    print(f"False Positive Rate for {label}: {fpr_ai if label == 'ai' else fpr_human:.4f}")


# Save the ensembled results to a new CSV file
ensembled_csv_file = "ensembled_predictions.csv"
merged_df[['base_filename', 'weighted_prob_ai', 'weighted_prob_human', 'final_pred_label', 'true_label_mel']].to_csv(ensembled_csv_file, index=False)
merged_df.to_csv("full_csv.csv", index=False)

print(f"Ensembled results saved to {ensembled_csv_file}")

Ensembled Model Accuracy: 0.9660
Precision for ai: 0.9735
Recall for ai: 0.9621
F1-score for ai: 0.9678
False Positive Rate for ai: 0.0296
Precision for human: 0.9576
Recall for human: 0.9704
F1-score for human: 0.9640
False Positive Rate for human: 0.0379
Ensembled results saved to ensembled_predictions.csv


In [75]:
# Define step size and base modalities
modalities = ['mel', 'lyrics', 'mfcc', 'cqt', 'melt', 'mfcct'] #, 'plp', 'chrm']
step = 0.05

# Generate grid of weights summing to 1
def generate_weight_combinations(modalities, step=0.1):
    ranges = [np.arange(0, 1 + step, step) for _ in modalities]
    all_combinations = list(itertools.product(*ranges))
    valid_combinations = [
        combo for combo in all_combinations if abs(sum(combo) - 1.0) < 1e-6
    ]
    return [dict(zip(modalities, combo)) for combo in valid_combinations]

print("Generating weight combinations")

Generating weight combinations


In [76]:
# Prepare grid
weight_combinations = generate_weight_combinations(modalities, step=step)

print(len(weight_combinations))

53130


In [77]:
# Prepare ground truth
y_true = merged_df['true_label_mel'].values 

best_score = 0
best_weights = None
best_metrics = None

for weights in tqdm(weight_combinations, desc="Searching best weights"):
    
    df_copy = merged_df.copy()
    df_copy = apply_weighted_ensemble(df_copy, weights)

    y_pred = df_copy['final_pred_label'].values
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

    score = accuracy 
    if score > best_score:
        best_score = score
        best_weights = weights
        best_metrics = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1
        }


# Show best weights and metrics
print("\nBest Ensemble Weights:")
for k, v in best_weights.items():
    print(f"{k}: {v:.2f}")
print("\nMetrics for Best Weights:")
for k, v in best_metrics.items():
    print(f"{k.capitalize()}: {v:.4f}")

Searching best weights: 100%|████████████████████████| 53130/53130 [3:18:47<00:00,  4.45it/s]


Best Ensemble Weights:
mel: 0.25
lyrics: 0.10
mfcc: 0.15
cqt: 0.05
melt: 0.25
mfcct: 0.20

Metrics for Best Weights:
Accuracy: 0.9846
Precision: 0.9842
Recall: 0.9851
F1: 0.9846





In [78]:
df_copy = merged_df.copy()
df_copy = apply_weighted_ensemble(df_copy, best_weights)

y_pred = df_copy['final_pred_label'].values
accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

# Confusion matrix: [[TN, FP], [FN, TP]]
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

# Per-class metrics
per_class = {
    "AI": {
        "Precision": tn / (tn + fn) if (tn + fn) > 0 else 0,
        "Recall": tn / (tn + fp) if (tn + fp) > 0 else 0,
        "F1": (2 * tn) / (2 * tn + fn + fp) if (2 * tn + fn + fp) > 0 else 0,
        "Accuracy": (tn + tp) / (tn + fp + fn + tp),
        "FPR": fp / (fp + tn) if (fp + tn) > 0 else 0,
    },
    "Human": {
        "Precision": tp / (tp + fp) if (tp + fp) > 0 else 0,
        "Recall": tp / (tp + fn) if (tp + fn) > 0 else 0,
        "F1": (2 * tp) / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0,
        "Accuracy": (tn + tp) / (tn + fp + fn + tp),
        "FPR": fn / (fn + tp) if (fn + tp) > 0 else 0,
    }
}

score = accuracy 
best_score = score
best_metrics = {
    'accuracy': accuracy,
    'precision_macro': precision,
    'recall_macro': recall,
    'f1_macro': f1,
    'per_class': per_class
}

# Show best weights and metrics
print("\nBest Ensemble Weights:")
for k, v in best_weights.items():
    print(f"{k}: {v:.2f}")
print("\nBest Metrics:")
for k, v in best_metrics.items():
    if k != 'per_class':
        print(f"{k.replace('_', ' ').capitalize()}: {v:.4f}")
    else:
        print("\nPer-Class Metrics:")
        for cls, metrics in v.items():
            print(f"\nClass: {cls}")
            for metric_name, val in metrics.items():
                print(f"  {metric_name}: {val:.4f}")


Best Ensemble Weights:
mel: 0.25
lyrics: 0.10
mfcc: 0.15
cqt: 0.05
melt: 0.25
mfcct: 0.20

Best Metrics:
Accuracy: 0.9846
Precision macro: 0.9842
Recall macro: 0.9851
F1 macro: 0.9846

Per-Class Metrics:

Class: Human
  Precision: 0.9938
  Recall: 0.9772
  F1: 0.9854
  Accuracy: 0.9846
  FPR: 0.0228

Class: AI
  Precision: 0.9746
  Recall: 0.9931
  F1: 0.9837
  Accuracy: 0.9846
  FPR: 0.0069


In [79]:
# Save best weights to a JSON file
with open("best_ensemble_weights.json", "w") as f:
    json.dump(best_weights, f, indent=4)

print("Best weights saved to best_ensemble_weights.json")

merged_df = apply_weighted_ensemble(merged_df, weights=best_weights)

# Save only the key prediction outputs
ensembled_csv_file = "best_weights_ensembled_predictions.csv"
merged_df[['base_filename', 'weighted_prob_ai', 'weighted_prob_human', 'final_pred_label', 'true_label_mel']].to_csv(ensembled_csv_file, index=False)

# Save the full DataFrame
merged_df.to_csv("full_ensembled_output.csv", index=False)

print(f"Final ensembled predictions saved to {ensembled_csv_file}")
print(f"Full data (with all features) saved to full_ensembled_output.csv")


Best weights saved to best_ensemble_weights.json
Final ensembled predictions saved to best_weights_ensembled_predictions.csv
Full data (with all features) saved to full_ensembled_output.csv
