In [6]:
from pathlib import Path
import librosa
import torch
import pandas as pd

# Eval with local model

In [7]:
from sonics import HFAudioClassifier
from sonics.utils.dataset import AudioDataset

model_time = 120
model = HFAudioClassifier.from_pretrained("awsaf49/sonics-spectttra-alpha-120s")

In [45]:
from gradio_client import Client, handle_file
def get_predictions_hf(audio_files, client: Client):
    
    results = []
    for file in audio_files:
        result = client.predict(
                audio_file=handle_file(file),
                model_type="SpecTTTra-α",
                duration="120s",
                api_name="/predict"
        )
        results.append(result["label"])
    return results

def get_predictions_local(audio_files) ->list[bool]:
    predictions = []
    for audio in audio_files:
        raw = model(audio["audio"][None,:])
        tf = torch.nn.functional.sigmoid(raw)>0.5 # if higher, then it's fake
        predictions.append(tf.item())
    return predictions

def run_experiment(local):
    if not local:
        client = Client("awsaf49/sonics-fake-song-detection")
    results = {}
    with torch.no_grad():
        for folder in Path("../data/examples").iterdir():
            print("running",folder.name)
            all_audio = list(folder.glob("*"))[:10]
            if local:
                dataset = AudioDataset(all_audio, labels=[0]*10, random_sampling=False, max_len=16000*model_time)
                result = get_predictions_local(dataset)
            else:
                result = get_predictions_hf(all_audio, client)
            results[folder.name] = result

    print(results)
    df = pd.DataFrame(results)
    print("True if fake")
    return df

In [46]:
run_experiment(local=False)

Loaded as API: https://awsaf49-sonics-fake-song-detection.hf.space ✔
running musicgen
running suno
running YuE
running real
running udio
{'musicgen': ['Real', 'Fake', 'Fake', 'Fake', 'Real', 'Fake', 'Fake', 'Fake', 'Fake', 'Real'], 'suno': ['Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Fake'], 'YuE': ['Real', 'Fake', 'Fake', 'Fake', 'Real', 'Real', 'Fake', 'Real', 'Fake', 'Fake'], 'real': ['Real', 'Real', 'Real', 'Real', 'Real', 'Real', 'Real', 'Real', 'Real', 'Real'], 'udio': ['Real', 'Real', 'Real', 'Real', 'Fake', 'Real', 'Fake', 'Real', 'Real', 'Real']}
True if fake


Unnamed: 0,musicgen,suno,YuE,real,udio
0,Real,Fake,Real,Real,Real
1,Fake,Fake,Fake,Real,Real
2,Fake,Fake,Fake,Real,Real
3,Fake,Fake,Fake,Real,Real
4,Real,Fake,Real,Real,Fake
5,Fake,Fake,Real,Real,Real
6,Fake,Fake,Fake,Real,Fake
7,Fake,Fake,Real,Real,Real
8,Fake,Fake,Fake,Real,Real
9,Real,Fake,Fake,Real,Real


In [47]:
run_experiment(local=True)

running musicgen
running suno
running YuE
running real
running udio
{'musicgen': [False, False, False, False, False, False, True, False, False, False], 'suno': [False, False, False, False, False, False, False, False, False, False], 'YuE': [False, False, False, False, False, False, False, False, False, False], 'real': [False, False, False, False, False, False, False, False, False, False], 'udio': [False, False, False, False, False, False, False, False, False, False]}
True if fake


Unnamed: 0,musicgen,suno,YuE,real,udio
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
5,False,False,False,False,False
6,True,False,False,False,False
7,False,False,False,False,False
8,False,False,False,False,False
9,False,False,False,False,False
