In [None]:
import os
import json
import torch
import torchaudio
import numpy as np
from tqdm import tqdm
from transformers import ASTFeatureExtractor, ASTModel
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed
import unicodedata
import sys

# Load environment variables
load_dotenv()

# Initialize Pinecone
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENVIRONMENT = os.getenv('PINECONE_ENVIRONMENT')  # Make sure this is set in your .env file
pc = Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT)

In [None]:
def populate_song_embeddings(indexName, generate_embedding):
    def process_song(song):
        audio_path = os.path.join('songs', f"{song['id']}.mp3")
        if os.path.exists(audio_path):
            embedding = generate_embedding(audio_path)
            
            # Store in Pinecone
            index.upsert(vectors=[(song['id'], embedding.tolist(), {
                'name': song['name'],
                'artist': song['artist'],
                'album': song['album'],
                'genre': song['genre'],
                'preview_url': song['preview_url'],
                'cover_image_url': song['cover_image_url'],
            })])

    # Check if index exists, if not create it
    if indexName not in pc.list_indexes().names():
        pc.create_index(
            name=indexName,
            dimension=768,  # AST embedding dimension
            metric='cosine',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-east-1'  # Choose an appropriate region
            )
        )

    index = pc.Index(indexName)

    # Load metadata
    with open('metadata/popular_songs.json', 'r') as f:
        all_songs = json.load(f)
        
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_song, song) for song in all_songs]
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Generating embeddings"):
            future.result()  # Raise exceptions if any

    print("All song embeddings generated and stored in Pinecone.")

In [None]:
def populate_artist_embedding(indexName, generate_embedding):
    def to_ascii(name):
        ascii_name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
        return ascii_name if ascii_name else f"artist_{hash(name)}"

    def process_artist(artist, songs):
        embeddings = []
        
        for song in songs:
            # change to songs (copied preview dir)
            audio_path = os.path.join('songs', f"{song['id']}.mp3")
            if os.path.exists(audio_path):
                embedding = generate_embedding(audio_path)
                embeddings.append(embedding)
        
        if embeddings:
            artist_embedding = np.mean(embeddings, axis=0)
            
            # Store in Pinecone
            ascii_artist = to_ascii(artist)
            index.upsert(vectors=[(ascii_artist, artist_embedding.tolist(), {
                'name': artist,
                'songs': [song['name'] for song in songs],
                'genres': list(set(song['genre'] for song in songs)),
            })])

    # Check if index exists, if not create it
    if indexName not in pc.list_indexes().names():
        pc.create_index(
            name=indexName,
            dimension=768,  # AST embedding dimension
            metric='cosine',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-east-1'  # Choose an appropriate region
            )
        )

    index = pc.Index(indexName)

    # Load metadata
    with open('metadata/artist_metadata.json', 'r') as f:
        artist_songs = json.load(f)

    with ThreadPoolExecutor() as executor:
            futures = [executor.submit(process_artist, artist, songs) for artist, songs in artist_songs.items()]
            
            for future in tqdm(as_completed(futures), total=len(futures), desc="Generating embeddings"):
                future.result()  # Raise exceptions if any

    print("All artist embeddings generated and stored in Pinecone.")

In [None]:
def populate_genre_embedding(indexName, generate_embedding):
    def to_ascii(name):
        ascii_name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
        return ascii_name if ascii_name else f"placeholder_{hash(name)}"

    def process_genre(genre, songs):
        embeddings = []
        total_popularity = 0
        song_count = 0
        
        for song in songs:
            audio_path = os.path.join('songs', f"{song['id']}.mp3")
            if os.path.exists(audio_path):
                embedding = generate_embedding(audio_path)
                embeddings.append(embedding)
                total_popularity += song.get('popularity', 0)  # Accumulate popularity
                song_count += 1
        
        if embeddings:
            genre_embedding = np.mean(embeddings, axis=0)
            average_popularity = total_popularity / song_count if song_count > 0 else 0
            
            # Store in Pinecone
            ascii_genre = to_ascii(genre)
            index.upsert(vectors=[(ascii_genre, genre_embedding.tolist(), {
                'name': genre,
                'songs': [song['name'] for song in songs],
                'artists': list(set(song['artist'] for song in songs)),
                'popularity': average_popularity
            })])

    # Check if index exists, if not create it
    if indexName not in pc.list_indexes().names():
        pc.create_index(
            name=indexName,
            dimension=768,  # AST embedding dimension
            metric='cosine',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-east-1'  # Choose an appropriate region
            )
        )

    index = pc.Index(indexName)

    # Load metadata
    with open('metadata/genres_metadata.json', 'r') as f:
        genre_songs = json.load(f)

    with ThreadPoolExecutor() as executor:
            genre_futures = [executor.submit(process_genre, genre, songs) for genre, songs in genre_songs.items()]
            
            for future in tqdm(as_completed(genre_futures), total=len(genre_futures), desc="Generating embeddings"):
                future.result()  # Raise exceptions if any

    print("All genre embeddings generated and stored in Pinecone.")


In [None]:
# Initialize AST model and feature extractor
feature_extractor = ASTFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
model = ASTModel.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
model.to('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()

def generate_AST_embedding(audio_path):
    waveform, original_sample_rate = torchaudio.load(audio_path)
    waveform = waveform.squeeze().numpy()

    inputs = feature_extractor(waveform, sampling_rate=16000, return_tensors="pt")
    inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu')for k, v in inputs.items()}  # Move inputs to GPU
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Use mean pooling to get a single vector
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()  # Move back to CPU for further processing
    embeddings = embeddings.mean(axis=0)
    return embeddings

populate_song_embeddings('ast-song-embeddings', generate_AST_embedding)
populate_artist_embedding('ast-artist-embeddings', generate_AST_embedding)
populate_genre_embedding('ast-genre-embeddings', generate_AST_embedding)

In [None]:
src_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(src_path)

from model.audio_preprocessor import AudioFeatureExtractor
from model.custom_model import AudioTransformerModel

# Initialize audio feature extractor
feature_extractor = AudioFeatureExtractor()
model = AudioTransformerModel(
    patch_size=32,
    num_layers=4,
    num_heads=8,
    d_model=256,
    dim_feedforward=512
)
model.load_state_dict(torch.load('../model/audio_embedding_model.pth'))
model.to('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()

def generate_custom_embedding(audio_path):
    # Extract audio features
    inputs = feature_extractor(audio_path)
    inputs = torch.tensor(inputs).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
    
    embeddings = []
    # Extractor splits audio into chunks
    for chunk in inputs: 
        with torch.no_grad():
            chunk_embedding = model(chunk)

        # Use mean pooling to get a single vector
        embeddings.append(chunk_embedding.mean(dim=0))
    
    embeddings = torch.stack(embeddings)
    embeddings = embeddings.mean(dim=0)       
    
    return embeddings.squeeze().cpu().numpy()

populate_song_embeddings('custom-song-embeddings', generate_custom_embedding)
populate_artist_embedding('custom-artist-embeddings', generate_custom_embedding)
populate_genre_embedding('custom-genre-embeddings', generate_custom_embedding)