In [32]:
import pandas as pd
import numpy as np
np.random.seed(10)

In [33]:
import sys
project_root = '../../src/'
sys.path.insert(0, project_root)

In [34]:
user_data = pd.read_csv('../../datasets/seven_day_listening_history.csv')
print(user_data.head())

FileNotFoundError: [Errno 2] No such file or directory: '../../datasets/seven_day_listening_history.csv.csv'

In [None]:
from scipy.sparse.linalg import svds

def create_user_item_matrix(df):
    """
    Step 1: Create the user-item matrix
    - Rows represent users
    - Columns represent songs (items)
    - Values represent the interaction score
    """
    # First, let's create an interaction score based on the features
    feature_columns = [
        'danceability', 'energy', 'loudness', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
        'duration (ms)', 'spec_rate', 'labels'
    ]
    
    # Create an interaction score (average of normalized features)
    df['interaction_score'] = np.mean(df[feature_columns], axis=1)
    
    # Create the user-item matrix
    user_item_matrix = df.pivot_table(
        index='user_id',
        columns='uri',
        values='interaction_score',
        fill_value=0
    )
    
    return user_item_matrix

In [None]:

def perform_svd(matrix, k=10):
    """
    Step 2: Perform SVD on the matrix
    - Decompose matrix into U, Sigma, and V matrices
    - k is the number of latent factors
    """
    # Convert to numpy array
    matrix_numpy = matrix.values
    
    # Center the matrix (subtract mean)
    matrix_mean = np.mean(matrix_numpy, axis=1)
    matrix_centered = matrix_numpy - matrix_mean.reshape(-1, 1)
    
    # Perform SVD
    U, sigma, Vt = svds(matrix_centered, k=k)
    
    # Convert sigma to diagonal matrix
    sigma = np.diag(sigma)
    
    return U, sigma, Vt, matrix_mean

def reconstruct_matrix(U, sigma, Vt, matrix_mean):
    """
    Step 3: Reconstruct the matrix to get predictions
    - Multiply U, sigma, and V transpose
    - Add back the mean
    """
    # Reconstruct the matrix
    predictions = np.dot(np.dot(U, sigma), Vt)
    
    # Add the mean back
    predictions += matrix_mean.reshape(-1, 1)
    
    return predictions

def get_recommendations(predictions, user_item_matrix, user_id, n_recommendations=5):
    """
    Step 4: Generate recommendations for a user
    - Find songs the user hasn't interacted with
    - Rank them by predicted score
    """
    # Get user index
    user_idx = user_item_matrix.index.get_loc(user_id)
    
    # Get song indices and names
    song_indices = np.arange(user_item_matrix.shape[1])
    song_names = user_item_matrix.columns
    
    # Get user's predictions
    user_predictions = predictions[user_idx]
    
    # Get indices of songs user hasn't interacted with
    user_songs = user_item_matrix.iloc[user_idx].values
    unlistened_songs = song_indices[user_songs == 0]
    
    # Get predictions for unlistened songs
    unlistened_predictions = user_predictions[unlistened_songs]
    
    # Get top N recommendations
    top_n_idx = np.argsort(unlistened_predictions)[-n_recommendations:][::-1]
    recommended_song_indices = unlistened_songs[top_n_idx]
    recommended_songs = song_names[recommended_song_indices]
    
    return recommended_songs, user_predictions[recommended_song_indices]

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Set up Spotify API credentials
client_id = '75d0ab19dcdc4db7821a27bf07df72a0'  # Replace with your Spotify client ID
client_secret = 'f64897e446834d7cb83b1c90916242df'  # Replace with your Spotify client secret

# Authenticate with Spotify
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Function to extract song name from Spotify URL
def get_song_names_from_url(song_urls):
    song_names = []
    for i in range(len(song_urls)):
        track_id = song_urls[i].split("/")[-1].split("?")[0]  # Extract the track ID from the URL
        track_info = sp.track(track_id)  # Get track information
        song_name = track_info['name']  # Extract song name
        artist_name = track_info['artists'][0]['name']  # Extract artist name
        song_names.append(f"{song_name} by {artist_name}")
    return song_names



In [None]:
from constants.constants import group_mood_mapping

def main_svd_pipeline(df, n_factors=10):
    """
    Run the complete SVD pipeline
    """
    # Step 1: Create user-item matrix
    print("Creating user-item matrix...")
    user_item_matrix = create_user_item_matrix(df)
    print(f"Matrix shape: {user_item_matrix.shape}")
    
    # Step 2: Perform SVD
    print("\nPerforming SVD...")
    U, sigma, Vt, matrix_mean = perform_svd(user_item_matrix, k=n_factors)
    print(f"U shape: {U.shape}")
    print(f"Sigma shape: {sigma.shape}")
    print(f"V^T shape: {Vt.shape}")
    
    # Step 3: Generate predictions
    print("\nGenerating predictions...")
    predictions = reconstruct_matrix(U, sigma, Vt, matrix_mean)
    
    return user_item_matrix, predictions

# Example usage
def demonstrate_recommendations(df):
    """
    Demonstrate the recommendation system
    """
    # Run SVD pipeline
    user_item_matrix, predictions = main_svd_pipeline(df)
    
    # Get recommendations for a sample user
    i = np.random.randint(user_item_matrix.shape[0])
    sample_user = user_item_matrix.index[i]
    group_no = df.loc[df['user_id'] == sample_user, 'group_no'].iloc[0]
    print(f"\nGetting recommendations for user {sample_user} from group {group_no} {group_mood_mapping[group_no]}...")
    recommended_songs, pred_scores = get_recommendations(
        predictions, 
        user_item_matrix, 
        sample_user
    )
    recommended_songs = get_song_names_from_url(recommended_songs)
    print(f"\n {recommended_songs}")
    
    return recommended_songs, pred_scores

In [None]:
recommended_songs, pred_scores = demonstrate_recommendations(user_data)

Creating user-item matrix...
Matrix shape: (100, 83654)

Performing SVD...
U shape: (100, 10)
Sigma shape: (10, 10)
V^T shape: (10, 83654)

Generating predictions...

Getting recommendations for user 28 from group 3 Energetic...

 ['The Feeling (Of Warmth And Beauty) by Richard H. Kirk', 'Sad Songs (Say So Much) by Elton John', 'E Quella Notte Danzammo Dinanzi Alla Luna by Elica Mayres', 'My Dad And Who The F... Is Wifi by Kevin Hart', 'Beach Runner - Manuel Darquart Balearic Rerub by Pacific Coliseum']
