In [2]:
import pandas as pd

In [4]:
# Load the predictions from the two CSV files
music_csv = "/data/sg2121/aimusicdetector/music_cnn/music_test_predictions.csv"
lyric_csv = "/data/sg2121/aimusicdetector/lyric_detection/lyrics_test_predictions.csv"

# Read the CSV files into pandas DataFrames
df_mel = pd.read_csv(music_csv)
df_lyrics = pd.read_csv(lyric_csv)

df_mel['base_filename'] = df_mel['filename'].str.replace(r'-Mel_Spectrogram\.png$', '', regex=True)
df_lyrics['base_filename'] = df_lyrics['filename'].str.replace(r'_lyrics\.txt$', '', regex=True)

df_mel.head()


Unnamed: 0,filename,prob_ai,prob_human,true_label,pred_label,base_filename
0,H199N-Mel_Spectrogram.png,0.247414,0.752586,1,1,H199N
1,H405N-Mel_Spectrogram.png,0.143636,0.856364,1,1,H405N
2,H396N-Mel_Spectrogram.png,0.194314,0.805686,1,1,H396N
3,U63RN_segment_2-Mel_Spectrogram.png,0.848963,0.151037,0,0,U63RN_segment_2
4,H361N-Mel_Spectrogram.png,0.042401,0.957599,1,1,H361N


In [7]:
merged_df = pd.merge(df_mel, df_lyrics, on='base_filename', suffixes=('_mel', '_lyrics'))
merged_df.head()

Unnamed: 0,filename_mel,prob_ai_mel,prob_human_mel,true_label_mel,pred_label_mel,base_filename,filename_lyrics,prob_ai_lyrics,prob_human_lyrics,true_label_lyrics,pred_label_lyrics
0,H199N-Mel_Spectrogram.png,0.247414,0.752586,1,1,H199N,H199N_lyrics.txt,0.138137,0.861863,1,1
1,H405N-Mel_Spectrogram.png,0.143636,0.856364,1,1,H405N,H405N_lyrics.txt,0.356109,0.643892,1,1
2,H396N-Mel_Spectrogram.png,0.194314,0.805686,1,1,H396N,H396N_lyrics.txt,0.021202,0.978798,1,1
3,U63RN_segment_2-Mel_Spectrogram.png,0.848963,0.151037,0,0,U63RN_segment_2,U63RN_segment_2_lyrics.txt,0.467536,0.532464,0,1
4,H361N-Mel_Spectrogram.png,0.042401,0.957599,1,1,H361N,H361N_lyrics.txt,0.345887,0.654113,1,1


In [9]:
# Define the weights for each model
weight_model1 = 0.6 # music model
weight_model2 = 0.4 # lyric model

# Calculate the weighted averages of the probabilities for both AI and Human classes
merged_df['weighted_prob_ai'] = (merged_df['prob_ai_mel'] * weight_model1 +
                                  merged_df['prob_ai_lyrics'] * weight_model2)

merged_df['weighted_prob_human'] = (merged_df['prob_human_mel'] * weight_model1 +
                                     merged_df['prob_human_lyrics'] * weight_model2)

# Decide the final predicted class based on the weighted average probabilities
merged_df['final_pred_label'] = merged_df.apply(
    lambda row: 0 if row['weighted_prob_ai'] > row['weighted_prob_human'] else 1,
    axis=1
)
merged_df.head()

Unnamed: 0,filename_mel,prob_ai_mel,prob_human_mel,true_label_mel,pred_label_mel,base_filename,filename_lyrics,prob_ai_lyrics,prob_human_lyrics,true_label_lyrics,pred_label_lyrics,weighted_prob_ai,weighted_prob_human,final_pred_label
0,H199N-Mel_Spectrogram.png,0.247414,0.752586,1,1,H199N,H199N_lyrics.txt,0.138137,0.861863,1,1,0.203703,0.796297,1
1,H405N-Mel_Spectrogram.png,0.143636,0.856364,1,1,H405N,H405N_lyrics.txt,0.356109,0.643892,1,1,0.228625,0.771375,1
2,H396N-Mel_Spectrogram.png,0.194314,0.805686,1,1,H396N,H396N_lyrics.txt,0.021202,0.978798,1,1,0.125069,0.874931,1
3,U63RN_segment_2-Mel_Spectrogram.png,0.848963,0.151037,0,0,U63RN_segment_2,U63RN_segment_2_lyrics.txt,0.467536,0.532464,0,1,0.696392,0.303608,0
4,H361N-Mel_Spectrogram.png,0.042401,0.957599,1,1,H361N,H361N_lyrics.txt,0.345887,0.654113,1,1,0.163796,0.836204,1


In [10]:

# Optional: Calculate accuracy, precision, recall, etc. based on the final prediction
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

y_true = merged_df['true_label_mel']
y_pred = merged_df['final_pred_label']

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, labels=[0, 1])

# Log the results
print(f"Ensembled Model Accuracy: {accuracy:.4f}")
for i, label in enumerate(['ai', 'human']):
    print(f"Precision for {label}: {precision[i]:.4f}")
    print(f"Recall for {label}: {recall[i]:.4f}")
    print(f"F1-score for {label}: {f1[i]:.4f}")

# Optionally save the ensembled results to a new CSV file
ensembled_csv_file = "ensembled_predictions.csv"
merged_df[['base_filename', 'weighted_prob_ai', 'weighted_prob_human', 'final_pred_label']].to_csv(ensembled_csv_file, index=False)

print(f"Ensembled results saved to {ensembled_csv_file}")

Ensembled Model Accuracy: 0.9219
Precision for ai: 1.0000
Recall for ai: 0.8276
F1-score for ai: 0.9057
Precision for human: 0.8750
Recall for human: 1.0000
F1-score for human: 0.9333
Ensembled results saved to ensembled_predictions.csv
