<a href="https://colab.research.google.com/github/p1a2r3v4e5z6-creat/Music-Recommender/blob/main/Music_Recommender_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U langchain langchain-community chromadb sentence-transformers scikit-learn datasets numpy


In [4]:
# ===== STEP 1: Import Libraries =====
from datasets import load_dataset
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import gradio as gr
import pandas as pd
import numpy as np
from collections import defaultdict
import re

# ===== STEP 2: Load Dataset from Hugging Face =====
print("📦 Loading dataset...")
dataset = load_dataset("maharshipandya/spotify-tracks-dataset")
df = dataset['train'].to_pandas()

# ===== STEP 3: Advanced Data Preparation =====
df = df.sample(5000, random_state=42)

# Clean and normalize data
for col in ['track_name', 'artists', 'track_genre', 'album_name']:
    if col in df.columns:
        df[col] = df[col].astype(str).str.strip()

df['popularity'] = df['popularity'].fillna(0).astype(int)

# Normalize audio features for better comparisons
audio_features = ['danceability', 'energy', 'valence', 'tempo', 'acousticness', 'instrumentalness']
for feat in audio_features:
    if feat in df.columns:
        df[feat] = pd.to_numeric(df[feat], errors='coerce').fillna(0)

if 'track_id' not in df.columns:
    df['track_id'] = df.index

# Enhanced text representation with audio features
df['text'] = (
    "Song: " + df['track_name'] +
    " Artist: " + df['artists'] +
    " Genre: " + df['track_genre']
)

# Add descriptive mood keywords based on audio features
def get_mood_keywords(row):
    keywords = []
    if 'valence' in row and row['valence'] > 0.6:
        keywords.append("happy upbeat cheerful")
    elif 'valence' in row and row['valence'] < 0.4:
        keywords.append("sad melancholic emotional")

    if 'energy' in row and row['energy'] > 0.7:
        keywords.append("energetic intense powerful")
    elif 'energy' in row and row['energy'] < 0.4:
        keywords.append("calm relaxing peaceful")

    if 'danceability' in row and row['danceability'] > 0.7:
        keywords.append("danceable groovy rhythmic")

    if 'acousticness' in row and row['acousticness'] > 0.6:
        keywords.append("acoustic unplugged organic")

    return " ".join(keywords)

df['mood_keywords'] = df.apply(get_mood_keywords, axis=1)
df['text_enhanced'] = df['text'] + " " + df['mood_keywords']

print(f"✅ Dataset ready: {len(df)} tracks with enhanced features")

# ===== STEP 4: Embeddings and Vector Store =====
print("🧠 Creating embeddings...")
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

vectorstore = Chroma.from_texts(
    df['text_enhanced'].tolist(),
    embeddings,
    collection_name="spotify_music_enhanced"
)

# Precompute and normalize embeddings
all_embeddings = np.array([embeddings.embed_query(text) for text in df['text_enhanced'].tolist()])
all_embeddings = all_embeddings / np.linalg.norm(all_embeddings, axis=1, keepdims=True)

print("✅ Embeddings ready!")

# ===== STEP 5: Enhanced Recommendation System =====

MOOD_MAPPINGS = {
    'sad': {'valence': (0, 0.4), 'energy': (0, 0.5), 'keywords': ['sad', 'melancholic', 'emotional', 'heartbreak']},
    'happy': {'valence': (0.6, 1), 'energy': (0.5, 1), 'keywords': ['happy', 'upbeat', 'cheerful', 'joyful']},
    'romantic': {'valence': (0.4, 0.7), 'keywords': ['love', 'romantic', 'sweet', 'tender']},
    'energetic': {'energy': (0.7, 1), 'danceability': (0.6, 1), 'keywords': ['energetic', 'powerful', 'intense']},
    'calm': {'energy': (0, 0.4), 'valence': (0.3, 0.7), 'keywords': ['calm', 'peaceful', 'relaxing', 'chill']},
    'party': {'danceability': (0.7, 1), 'energy': (0.6, 1), 'keywords': ['party', 'dance', 'club', 'groovy']},
    'workout': {'energy': (0.75, 1), 'tempo': (120, 180), 'keywords': ['workout', 'gym', 'intense', 'powerful']},
    'study': {'energy': (0, 0.5), 'instrumentalness': (0.3, 1), 'keywords': ['study', 'focus', 'instrumental', 'ambient']},
    'lofi': {'energy': (0, 0.45), 'acousticness': (0.3, 1), 'keywords': ['lofi', 'chill', 'relaxing', 'lo-fi']},
}

def extract_query_intent(query):
    """Extract mood, genre, and audio feature preferences from query"""
    query_lower = query.lower()

    # Extract mood
    detected_mood = None
    for mood, info in MOOD_MAPPINGS.items():
        if mood in query_lower or any(kw in query_lower for kw in info.get('keywords', [])):
            detected_mood = mood
            break

    # Extract genre
    genres = df['track_genre'].unique().tolist()
    detected_genre = next((g for g in genres if g.lower() in query_lower), None)

    # Extract year/era preferences
    year_match = re.search(r'(19|20)\d{2}', query_lower)
    year_pref = int(year_match.group()) if year_match else None

    return {
        'mood': detected_mood,
        'genre': detected_genre,
        'year': year_pref,
        'original_query': query
    }

def filter_by_audio_features(intent, df_subset=None):
    """Filter tracks based on audio feature ranges"""
    if df_subset is None:
        df_subset = df.copy()

    mask = pd.Series([True] * len(df_subset), index=df_subset.index)

    if intent['mood'] and intent['mood'] in MOOD_MAPPINGS:
        mood_rules = MOOD_MAPPINGS[intent['mood']]

        for feature, value in mood_rules.items():
            if feature in ['keywords']:
                continue
            if isinstance(value, tuple) and len(value) == 2:
                min_val, max_val = value
                if feature in df_subset.columns:
                    mask &= (df_subset[feature] >= min_val) & (df_subset[feature] <= max_val)

    return df_subset[mask]

def recommend_music(query, top_n=10, diversity_factor=0.7, return_intent=False):
    """Advanced context-aware recommendation with diversity"""

    # Parse query intent
    intent = extract_query_intent(query)

    # Get semantic similarity scores
    query_emb = embeddings.embed_query(query + " " + " ".join([v for k, v in intent.items() if v and isinstance(v, str)]))
    query_emb = query_emb / np.linalg.norm(query_emb)
    semantic_scores = np.dot(all_embeddings, query_emb)

    # Apply audio feature filtering
    filtered_df = filter_by_audio_features(intent)

    if len(filtered_df) < 5:
        filtered_df = df.copy()

    # Calculate composite scores
    scores = np.zeros(len(df))

    for idx in filtered_df.index:
        df_idx = df.index.get_loc(idx)

        # Base semantic similarity
        score = semantic_scores[df_idx]

        # Popularity boost (normalized)
        pop_boost = 1 + (df.iloc[df_idx]['popularity'] / 200)
        score *= pop_boost

        # Genre exact match boost
        if intent['genre'] and intent['genre'].lower() in df.iloc[df_idx]['track_genre'].lower():
            score *= 1.3

        # Mood keyword boost
        if intent['mood']:
            mood_keywords = MOOD_MAPPINGS.get(intent['mood'], {}).get('keywords', [])
            text_lower = df.iloc[df_idx]['text_enhanced'].lower()
            if any(kw in text_lower for kw in mood_keywords):
                score *= 1.2

        scores[df_idx] = score

    # Get top candidates
    top_indices = np.argsort(scores)[-(top_n * 3):][::-1]

    # Apply diversity
    selected = []
    artist_count = defaultdict(int)
    genre_count = defaultdict(int)

    for idx in top_indices:
        if len(selected) >= top_n:
            break

        artist = df.iloc[idx]['artists']
        genre = df.iloc[idx]['track_genre']

        if artist_count[artist] >= 2 or genre_count[genre] >= 4:
            if np.random.random() > diversity_factor:
                continue

        artist_count[artist] += 1
        genre_count[genre] += 1
        selected.append(idx)

    # Format results
    results = []
    for idx in selected:
        row = df.iloc[idx]
        results.append({
            "Track": row['track_name'],
            "Artist": row['artists'],
            "Genre": row['track_genre'],
            "Album": row.get('album_name', 'N/A'),
        })

    if return_intent:
        return results, intent
    return results

# ===== STEP 6: Premium Gradio Interface =====

def search_music(query):
    """Main search interface function"""
    if not query.strip():
        return "⚠️ Please enter a search query!"

    results, intent = recommend_music(query, top_n=15, return_intent=True)

    if not results:
        return "❌ No matches found. Try different keywords!"

    # Build clean, attractive output
    output = '<div style="padding: 20px;">'

    # Header section
    output += f'<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 30px; border-radius: 15px; margin-bottom: 30px; color: white; text-align: center;">'
    output += f'<h1 style="margin: 0; font-size: 2.5em; font-weight: 700;">🎵 Your Playlist</h1>'
    output += f'<p style="font-size: 1.2em; margin: 10px 0 0 0; opacity: 0.9;">{query}</p>'

    if intent['mood'] or intent['genre']:
        output += f'<div style="margin-top: 15px; font-size: 0.95em;">'
        if intent['mood']:
            output += f'<span style="background: rgba(255,255,255,0.2); padding: 5px 15px; border-radius: 20px; margin: 5px;">💭 {intent["mood"].title()}</span>'
        if intent['genre']:
            output += f'<span style="background: rgba(255,255,255,0.2); padding: 5px 15px; border-radius: 20px; margin: 5px;">🎸 {intent["genre"]}</span>'
        output += '</div>'

    output += '</div>'

    # Results grid
    output += '<div style="display: grid; gap: 15px;">'

    for i, r in enumerate(results, 1):
        output += f'''
        <div style="background: white;
                    padding: 20px;
                    border-radius: 12px;
                    border-left: 5px solid #667eea;
                    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
                    transition: transform 0.2s;">
            <div style="display: flex; align-items: center; gap: 15px;">
                <div style="background: #667eea;
                            color: white;
                            width: 40px;
                            height: 40px;
                            border-radius: 50%;
                            display: flex;
                            align-items: center;
                            justify-content: center;
                            font-weight: bold;
                            font-size: 1.2em;
                            flex-shrink: 0;">
                    {i}
                </div>
                <div style="flex-grow: 1;">
                    <h3 style="margin: 0 0 8px 0; color: #1a202c; font-size: 1.3em; font-weight: 600;">{r['Track']}</h3>
                    <p style="margin: 0; color: #2d3748; font-size: 1em;">
                        <span style="font-weight: 600;">👤 {r['Artist']}</span>
                    </p>
                    <p style="margin: 5px 0 0 0; color: #4a5568; font-size: 0.9em;">
                        💿 {r['Album']} • 🎸 {r['Genre']}
                    </p>
                </div>
            </div>
        </div>
        '''

    output += '</div>'
    output += f'<p style="text-align: center; color: #718096; margin-top: 30px; font-size: 0.9em;">Found {len(results)} perfect matches for you ✨</p>'
    output += '</div>'

    return output

# Example queries
examples = [
    ["sad romantic songs"],
    ["energetic workout music"],
    ["chill lofi for studying"],
    ["happy dance party hits"],
    ["calm acoustic evening music"],
    ["upbeat pop songs"]
]

# Custom CSS for modern look
custom_css = """
.gradio-container {
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
    max-width: 1200px !important;
    margin: auto !important;
}

.input-container {
    background: white;
    padding: 30px;
    border-radius: 15px;
    box-shadow: 0 4px 6px rgba(0,0,0,0.07);
}

/* Fix search input visibility */
textarea, input[type="text"] {
    color: #1a202c !important;
    background: white !important;
    border: 2px solid #e2e8f0 !important;
    border-radius: 10px !important;
    font-size: 1.1em !important;
    padding: 12px !important;
}

textarea:focus, input[type="text"]:focus {
    border-color: #667eea !important;
    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
    outline: none !important;
}

textarea::placeholder, input[type="text"]::placeholder {
    color: #718096 !important;
    opacity: 1 !important;
}

button.primary {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    font-weight: 600 !important;
    padding: 12px 30px !important;
    border-radius: 8px !important;
    font-size: 1.05em !important;
}

button.primary:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 12px rgba(102, 126, 234, 0.3) !important;
}

.examples {
    margin-top: 20px !important;
}

.example-item {
    background: #f7fafc !important;
    border: 1px solid #e2e8f0 !important;
    border-radius: 8px !important;
    padding: 10px 15px !important;
    transition: all 0.2s !important;
}

.example-item:hover {
    background: #edf2f7 !important;
    border-color: #667eea !important;
}

#output-markdown {
    border: none !important;
    padding: 0 !important;
}
"""

iface = gr.Interface(
    fn=search_music,
    inputs=gr.Textbox(
        label="",
        placeholder="✨ What vibe are you looking for? (e.g., 'sad romantic songs', 'workout beats', 'chill lofi')",
        lines=1,
        elem_id="search-input"
    ),
    outputs=gr.HTML(label=""),
    title="🎧 AI Music Recommender",
    description="""
    <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #f5f7fa 0%, #e4e9f2 100%); border-radius: 10px; margin-bottom: 20px;">
        <p style="font-size: 1.15em; color: #4a5568; margin: 0; line-height: 1.6;">
            Discover your perfect soundtrack powered by AI 🚀<br>
            <span style="font-size: 0.95em; color: #718096;">Search by mood, genre, activity, or any combination!</span>
        </p>
    </div>
    """,
    examples=examples,
    theme=gr.themes.Soft(
        primary_hue="purple",
        secondary_hue="blue",
    ),
    css=custom_css,
    allow_flagging="never"
)

# Launch
print("\n🚀 Launching interface...")
iface.launch(share=True, inline=False, debug=True)

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://b48cb2c716b12cd022.gradio.live


