In [17]:
!pip install numpy pandas scikit-learn surprise



In [18]:
# Import necessary libraries
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate

# Section 1: Load Imaginary Data

In [19]:
def load_data():
    """
    Create an imaginary dataset for testing.

    Returns:
        df (DataFrame): DataFrame containing the imaginary dataset with columns user_id, song_id, and rating.
    """
    # Creating an imaginary dataset
    data = {
        'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5],
        'song_id': [101, 102, 103, 101, 104, 105, 102, 103, 106, 104, 105, 106, 101, 105, 107],
        'rating':  [5, 4, 3, 5, 4, 2, 3, 2, 4, 5, 3, 4, 2, 5, 4]
    }

    # Convert the dictionary to a pandas DataFrame
    df = pd.DataFrame(data)

    return df

# Section 2: Prepare the Data for the Surprise Library

In [20]:
def prepare_data(df):
    """
    Prepare the data for the Surprise library.

    Args:
        df (DataFrame): DataFrame containing the dataset with columns user_id, song_id, and rating.

    Returns:
        data (Dataset): Dataset object ready to be used by the Surprise library.
    """
    # Define a Reader with the rating scale
    reader = Reader(rating_scale=(df['rating'].min(), df['rating'].max()))

    # Load the DataFrame into a Dataset object
    data = Dataset.load_from_df(df[['user_id', 'song_id', 'rating']], reader)

    return data

# Section 3: Train the Model

In [21]:
def train_model(data):
    """
    Train an SVD model using the Surprise library.

    Args:
        data (Dataset): Dataset object ready to be used by the Surprise library.

    Returns:
        algo (SVD): Trained SVD model.
    """
    # Split the data into training and test sets
    trainset, testset = train_test_split(data, test_size=0.25)

    # Initialize the SVD algorithm
    algo = SVD()

    # Train the model on the training set
    algo.fit(trainset)

    # Evaluate the model using cross-validation
    cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

    return algo

# Section 4: Get Song Recommendations for a User

In [22]:
def get_recommendations(algo, user_id, df, num_recommendations=10):
    """
    Get song recommendations for a specific user.

    Args:
        algo (SVD): Trained SVD model.
        user_id (int): ID of the user for whom to get recommendations.
        df (DataFrame): DataFrame containing the dataset with columns user_id, song_id, and rating.
        num_recommendations (int): Number of recommendations to return.

    Returns:
        recommendations (list): List of tuples containing song_id and predicted rating.
    """
    # Get all unique song IDs
    all_songs = df['song_id'].unique()

    # Get the songs rated by the user
    user_songs = df[df['user_id'] == user_id]['song_id'].unique()

    # Get the songs not rated by the user
    songs_to_predict = [song for song in all_songs if song not in user_songs]

    # Predict ratings for the songs not rated by the user
    predictions = []
    for song_id in songs_to_predict:
        predictions.append((song_id, algo.predict(user_id, song_id).est))

    # Sort the predictions by the predicted rating in descending order
    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:num_recommendations]

    return recommendations

# Section 5: Main Function

In [23]:
def main():
    """
    Main function to orchestrate the data loading, model training, and recommendation generation processes.
    """
    # Load the imaginary data
    df = load_data()

    # Prepare the data for the Surprise library
    data = prepare_data(df)

    # Train the SVD model
    algo = train_model(data)

    # Example: Get recommendations for user with ID 1
    user_id = 1
    recommendations = get_recommendations(algo, user_id, df)

    # Print the top recommendations
    print(f"Top recommendations for user {user_id}:")
    for song_id, score in recommendations:
        print(f"Song ID: {song_id}, Predicted Rating: {score:.2f}")

# Execute the main function
if __name__ == "__main__":
    main()

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2749  1.0600  1.1528  1.4611  1.4207  1.2739  0.1530  
MAE (testset)     1.2392  0.8555  0.8578  1.2689  1.3153  1.1073  0.2061  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Top recommendations for user 1:
Song ID: 106, Predicted Rating: 3.70
Song ID: 104, Predicted Rating: 3.56
Song ID: 107, Predicted Rating: 3.44
Song ID: 105, Predicted Rating: 3.44
