In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from sklearn.preprocessing import LabelEncoder
from implicit.als import AlternatingLeastSquares

# Load triplets
triplet_path = 'C:/Users/arnav/Downloads/Compressed/train_triplets.txt/train_triplets.txt'
#df = pd.read_csv(triplet_path, sep='\t', header=None, names=['user_id', 'song_id', 'play_count'])
df = pd.read_csv(triplet_path, sep='\t', engine='python', header=None, names=['user_id', 'song_id', 'play_count'])

# Load metadata for mapping song_id -> title/artist
metadata_df = pd.read_csv("songs_metadata.csv")  # OR extract from .h5 like in your earlier code
metadata_map = metadata_df.set_index('song_id')[['title', 'artist']].to_dict('index')

# Encode IDs
user_enc = LabelEncoder()
song_enc = LabelEncoder()
df['user_idx'] = user_enc.fit_transform(df['user_id'])
df['song_idx'] = song_enc.fit_transform(df['song_id'])

# Build sparse matrix
interaction = coo_matrix((df['play_count'], (df['user_idx'], df['song_idx'])))

# Train ALS model
model = AlternatingLeastSquares(factors=50, regularization=0.1, iterations=20)
model.fit(interaction.T.tocsr())


  from .autonotebook import tqdm as notebook_tqdm
  check_blas_config()
  check_blas_config()
100%|██████████| 20/20 [02:39<00:00,  7.96s/it]


In [7]:
# Load metadata for mapping song_id -> title/artist
import pandas as pd
import matplotlib.pyplot as plt
import os
import h5py

# Function to read .h5 file and extract features
def load_song_features(file_path):
    with h5py.File(file_path, 'r') as f:
        song_id = f['analysis']['songs']['track_id'][0].decode()
        tempo = f['analysis']['songs']['tempo'][0]
        loudness = f['analysis']['songs']['loudness'][0]
        key = f['analysis']['songs']['key'][0]
        artist = f['metadata']['songs']['artist_name'][0].decode()
        title = f['metadata']['songs']['title'][0].decode()
        return {
            'song_id': song_id,
            'tempo': tempo,
            'loudness': loudness,
            'key': key,
            'artist': artist,
            'title': title
        }

# Traverse MSD directory and read files
songs_data = []
base_dir = "C:/Users/arnav/Downloads/Compressed/millionsongsubset/MillionSongSubset"
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".h5"):
            song_info = load_song_features(os.path.join(root, file))
            songs_data.append(song_info)

metadata_df = pd.DataFrame(songs_data)
print(metadata_df.head())
#metadata_df = pd.read_csv("data/songs_metadata.csv")  # OR extract from .h5 like in your earlier code
metadata_map = metadata_df.set_index('song_id')[['title', 'artist']].to_dict('index')
print(metadata_map)


              song_id    tempo  loudness  key            artist  \
0  TRAAAAW128F429D538   92.198   -11.197    1            Casual   
1  TRAAABD128F429CF47  121.274    -9.843    6      The Box Tops   
2  TRAAADZ128F9348C2E  100.070    -9.689    8  Sonora Santanera   
3  TRAAAEF128F4273421  119.293    -9.013    0          Adam Ant   
4  TRAAAFD128F92F423A  129.738    -4.501    2               Gob   

              title  
0  I Didn't Mean To  
1         Soul Deep  
2   Amor De Cabaret  
3   Something Girls  
4    Face the Ashes  


In [9]:
def recommend_songs_for_user(user_raw_id, N=10):
    try:
        user_idx = user_enc.transform([user_raw_id])[0]
    except:
        return []

    recommendations = model.recommend(user_idx, interaction.tocsr()[user_idx], N=N)
    rec_list = []

    for song_idx, score in recommendations:
        song_id = song_enc.inverse_transform([song_idx])[0]
        meta = metadata_map.get(song_id, {"title": "Unknown", "artist": "Unknown"})
        rec_list.append({
            "song_id": song_id,
            "title": meta["title"],
            "artist": meta["artist"],
            "score": round(score, 2)
        })

    return rec_list

# Example:
print(recommend_songs_for_user("b80344d063b5ccb3212f76538f3d9e43d87dca9e"))


IndexError: index 732659 is out of bounds for axis 0 with size 384546

In [10]:
from fastapi import FastAPI
app = FastAPI()

@app.get("/recommend")
def recommend(user_id: str):
    return {"user_id": user_id, "recommendations": recommend_songs_for_user(user_id)}

# Run: uvicorn app:app --reload
