In [None]:
pip install -q transformers datasets torchaudio

In [None]:
import numpy as np
import pandas as pd
import requests
import io
from IPython.display import Audio, display
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import re

In [None]:
# Merge datasets to get spotify links for most similar and most different song

In [None]:
from transformers import Wav2Vec2Processor, DistilHuBERTForSequenceClassification
from datasets import load_dataset
import torch

processor = Wav2Vec2Processor.from_pretrained("facebook/distilhubert-base")
model = DistilHuBERTForSequenceClassification.from_pretrained("facebook/distilhubert-base", num_labels=2)

In [None]:
def preprocess_function(examples):
    audio = examples["audio"]
    inputs = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt", padding=True)
    return inputs

dataset = load_dataset("path/to/your/dataset")
dataset = dataset.map(preprocess_function)


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=processor.feature_extractor,
)

trainer.train()


In [None]:
def measure_similarity(file1, file2, model, processor):
    y1, sr1 = load_audio(file1)
    y2, sr2 = load_audio(file2)
    
    inputs1 = processor(y1, sampling_rate=sr1, return_tensors="pt", padding=True)
    inputs2 = processor(y2, sampling_rate=sr2, return_tensors="pt", padding=True)
    
    with torch.no_grad():
        outputs1 = model(**inputs1).logits
        outputs2 = model(**inputs2).logits
        
    euclidean_distance = torch.nn.functional.pairwise_distance(outputs1, outputs2)
    return euclidean_distance.item()

similarity_score = measure_similarity('audio1.wav', 'audio2.wav', model, processor)
print(f"Similarity score: {similarity_score}")
