## ✅ Step 1: Install Required Libraries

In [1]:
! pip install -q pandas numpy faiss-cpu sentence-transformers spotipy


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.7/278.7 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## ✅ Step 2: Import Libraries

In [2]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import yaml


## ✅ Step 3: Spotify API Setup

In [10]:
Client_id = 'd36d287f66204c2e81262f2ab0a083d7'
client_secret = 'd79e12dbd1f74ec280f42b17605b820c'

# Directly use the client_id and client_secret variables

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=Client_id,
                                                           client_secret=client_secret)) # Use the variable client_secret

## ✅ Step 4: Load Song data (from CSV)

In [12]:
df = pd.read_csv("/content/Spotify_data.csv")  # Replace with your actual path
df = df.dropna().drop_duplicates(subset='id')  # Clean data
df.reset_index(drop=True, inplace=True)

## ✅ Step 5: Create Feature Vectors

We combine audio features with text-based embeddings from the song ID.

In [None]:
# Select only numeric audio features
features = ['danceability', 'energy', 'loudness', 'speechiness',
            'acousticness', 'instrumentalness', 'tempo']

# SentenceTransformer model for ID-based embedding
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create embeddings for song IDs
id_embeddings = model.encode(df['id'].tolist(), show_progress_bar=True)

# Convert audio features to numpy
audio_features = df[features].values.astype('float32')

# Combine audio + ID embeddings
combined_features = np.hstack((audio_features, id_embeddings))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/5333 [00:00<?, ?it/s]

## ✅ Step 6: Build FAISS Index

In [None]:
# Build Faiss Index...

dimension = combined_features.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(combined_features)

## ✅ Step 7: Recommendation Function

In [None]:
def recommend_similar_tracks(song_id, top_k=5):
    # Get audio features of the input song
    input_audio = df[df['id'] == song_id][features].values.astype('float32')

    # Get ID embedding
    input_id_embedding = model.encode([song_id])

    # Combine for search vector
    input_vector = np.hstack((input_audio, input_id_embedding)).astype('float32')

    # Perform FAISS search
    distances, indices = index.search(input_vector, top_k + 1)

    # Return similar tracks (excluding the query track itself)
    results = df.iloc[indices[0]]
    return results[results['id'] != song_id].head(top_k)

## ✅ Step 8: Generate Recommendations

In [None]:
sample_song_id = df.iloc[10]['id']  # Pick any valid song ID from the dataset
recommendations = recommend_similar_tracks(sample_song_id, top_k=5)
print(recommendations[['id', 'name', 'artists', 'popularity']])
