In [3]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505179 sha256=5615d9c69ec3682a16a8856c9bb31edbf266ad57be1ac4822def0ef0350b4dd2
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Install

In [4]:
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [6]:
# Load data (sample structure: instrument_id, user_id, rating, review)
data = pd.read_csv("music_reviews.csv")

# Sentiment Analysis
sia = SentimentIntensityAnalyzer()
data['sentiment_score'] = data['review'].apply(lambda x: sia.polarity_scores(str(x))['compound'])

def normalize_sentiment(score):
    return (score + 1) / 2  # Convert [-1,1] range to [0,1]

data['normalized_sentiment'] = data['sentiment_score'].apply(normalize_sentiment)

In [7]:
# Collaborative Filtering (Matrix Factorization)
reader = Reader(rating_scale=(0, 5))
dataset = Dataset.load_from_df(data[['user_id', 'instrument_id', 'rating']], reader)
trainset, testset = train_test_split(dataset, test_size=0.2)

model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7889e44d5e90>

In [8]:
# Content-Based Filtering (TF-IDF on reviews)
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(data['review'].astype(str))
similarity_matrix = cosine_similarity(tfidf_matrix)

# Hybrid Recommendation Function
def hybrid_recommend(user_id, instrument_id, top_n=5):
    # Collaborative Filtering Score
    cf_score = model.predict(user_id, instrument_id).est

    # Content Similarity Score
    instrument_idx = data[data['instrument_id'] == instrument_id].index[0]
    content_scores = similarity_matrix[instrument_idx]

    # Sentiment Score
    sentiment_score = data.loc[instrument_idx, 'normalized_sentiment']

    # Hybrid Score (weighted sum)
    hybrid_score = 0.5 * cf_score + 0.3 * np.mean(content_scores) + 0.2 * sentiment_score

    # Recommend Top N instruments
    recommendations = data[['instrument_id']].copy()
    recommendations['score'] = hybrid_score
    return recommendations.sort_values(by='score', ascending=False).head(top_n)

# Example Recommendation
print(hybrid_recommend(user_id=123, instrument_id=456))

     instrument_id     score
0              616  1.672844
671            739  1.672844
658            791  1.672844
659            594  1.672844
660            565  1.672844
